From b929a900d49e2c15897134209ed9de5fc7f238cd Mon Sep 17 00:00:00 2001 From: Chalmer Lowe Date: Wed, 15 Jan 2025 12:44:27 -0500 Subject: [PATCH] feat: adds ExternalCatalogDatasetOptions and tests (#2111) MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit * feat: adds ExternalCatalogDatasetOptions and tests * Update google/cloud/bigquery/dataset.py Co-authored-by: Tim Sweña (Swast) * Update google/cloud/bigquery/dataset.py Co-authored-by: Tim Sweña (Swast) * 🦉 Updates from OwlBot post-processor See /~https://github.com/googleapis/repo-automation-bots/blob/main/packages/owl-bot/README.md --------- Co-authored-by: Tim Sweña (Swast) Co-authored-by: Owl Bot --- google/cloud/bigquery/dataset.py | 25 +++++++ google/cloud/bigquery/external_config.py | 76 +++++++++++++++++++- tests/unit/test_dataset.py | 84 ++++++++++++++++++++++ tests/unit/test_external_config.py | 89 ++++++++++++++++++++++++ 4 files changed, 273 insertions(+), 1 deletion(-) diff --git a/google/cloud/bigquery/dataset.py b/google/cloud/bigquery/dataset.py index 4d06d729d..15a11fb40 100644 --- a/google/cloud/bigquery/dataset.py +++ b/google/cloud/bigquery/dataset.py @@ -27,6 +27,7 @@ from google.cloud.bigquery.routine import Routine, RoutineReference from google.cloud.bigquery.table import Table, TableReference from google.cloud.bigquery.encryption_configuration import EncryptionConfiguration +from google.cloud.bigquery import external_config from typing import Optional, List, Dict, Any, Union @@ -531,6 +532,7 @@ class Dataset(object): "max_time_travel_hours": "maxTimeTravelHours", "default_rounding_mode": "defaultRoundingMode", "resource_tags": "resourceTags", + "external_catalog_dataset_options": "externalCatalogDatasetOptions", } def __init__(self, dataset_ref) -> None: @@ -898,6 +900,29 @@ def storage_billing_model(self, value): ) self._properties["storageBillingModel"] = value + @property + def external_catalog_dataset_options(self): + """Options defining open source compatible datasets living in the + BigQuery catalog. Contains metadata of open source database, schema + or namespace represented by the current dataset.""" + + prop = _helpers._get_sub_prop( + self._properties, ["externalCatalogDatasetOptions"] + ) + + if prop is not None: + prop = external_config.ExternalCatalogDatasetOptions.from_api_repr(prop) + return prop + + @external_catalog_dataset_options.setter + def external_catalog_dataset_options(self, value): + value = _helpers._isinstance_or_raise( + value, external_config.ExternalCatalogDatasetOptions, none_allowed=True + ) + self._properties[ + self._PROPERTY_TO_API_FIELD["external_catalog_dataset_options"] + ] = (value.to_api_repr() if value is not None else None) + @classmethod def from_string(cls, full_dataset_id: str) -> "Dataset": """Construct a dataset from fully-qualified dataset ID. diff --git a/google/cloud/bigquery/external_config.py b/google/cloud/bigquery/external_config.py index a891bc232..7f2b58f2b 100644 --- a/google/cloud/bigquery/external_config.py +++ b/google/cloud/bigquery/external_config.py @@ -18,7 +18,7 @@ Job.configuration.query.tableDefinitions. """ -from __future__ import absolute_import +from __future__ import absolute_import, annotations import base64 import copy @@ -28,6 +28,7 @@ from google.cloud.bigquery._helpers import _bytes_to_json from google.cloud.bigquery._helpers import _int_or_none from google.cloud.bigquery._helpers import _str_or_none +from google.cloud.bigquery import _helpers from google.cloud.bigquery.format_options import AvroOptions, ParquetOptions from google.cloud.bigquery.schema import SchemaField @@ -1003,3 +1004,76 @@ def from_api_repr(cls, resource: dict) -> "ExternalConfig": config = cls(resource["sourceFormat"]) config._properties = copy.deepcopy(resource) return config + + +class ExternalCatalogDatasetOptions: + """Options defining open source compatible datasets living in the BigQuery catalog. + Contains metadata of open source database, schema or namespace represented + by the current dataset. + + Args: + default_storage_location_uri (Optional[str]): The storage location URI for all + tables in the dataset. Equivalent to hive metastore's database + locationUri. Maximum length of 1024 characters. (str) + parameters (Optional[dict[str, Any]]): A map of key value pairs defining the parameters + and properties of the open source schema. Maximum size of 2Mib. + """ + + def __init__( + self, + default_storage_location_uri: Optional[str] = None, + parameters: Optional[Dict[str, Any]] = None, + ): + self._properties: Dict[str, Any] = {} + self.default_storage_location_uri = default_storage_location_uri + self.parameters = parameters + + @property + def default_storage_location_uri(self) -> Optional[str]: + """Optional. The storage location URI for all tables in the dataset. + Equivalent to hive metastore's database locationUri. Maximum length of + 1024 characters.""" + + return self._properties.get("defaultStorageLocationUri") + + @default_storage_location_uri.setter + def default_storage_location_uri(self, value: Optional[str]): + value = _helpers._isinstance_or_raise(value, str, none_allowed=True) + self._properties["defaultStorageLocationUri"] = value + + @property + def parameters(self) -> Optional[Dict[str, Any]]: + """Optional. A map of key value pairs defining the parameters and + properties of the open source schema. Maximum size of 2Mib.""" + + return self._properties.get("parameters") + + @parameters.setter + def parameters(self, value: Optional[Dict[str, Any]]): + value = _helpers._isinstance_or_raise(value, dict, none_allowed=True) + self._properties["parameters"] = value + + def to_api_repr(self) -> dict: + """Build an API representation of this object. + + Returns: + Dict[str, Any]: + A dictionary in the format used by the BigQuery API. + """ + return self._properties + + @classmethod + def from_api_repr(cls, api_repr: dict) -> ExternalCatalogDatasetOptions: + """Factory: constructs an instance of the class (cls) + given its API representation. + + Args: + api_repr (Dict[str, Any]): + API representation of the object to be instantiated. + + Returns: + An instance of the class initialized with data from 'resource'. + """ + config = cls() + config._properties = api_repr + return config diff --git a/tests/unit/test_dataset.py b/tests/unit/test_dataset.py index 46bcd6611..8ab8dffec 100644 --- a/tests/unit/test_dataset.py +++ b/tests/unit/test_dataset.py @@ -650,6 +650,16 @@ class TestDataset(unittest.TestCase): DS_ID = "dataset-id" DS_REF = DatasetReference(PROJECT, DS_ID) KMS_KEY_NAME = "projects/1/locations/us/keyRings/1/cryptoKeys/1" + DEFAULT_STORAGE_LOCATION_URI = "gs://test-bucket/test-path" + PARAMETERS = {"key": "value"} + API_REPR = { + "datasetReference": {"projectId": "project", "datasetId": "dataset-id"}, + "labels": {}, + "externalCatalogDatasetOptions": { + "defaultStorageLocationUri": DEFAULT_STORAGE_LOCATION_URI, + "parameters": PARAMETERS, + }, + } @staticmethod def _get_target_class(): @@ -1067,6 +1077,80 @@ def test___repr__(self): expected = "Dataset(DatasetReference('project1', 'dataset1'))" self.assertEqual(repr(dataset), expected) + def test_external_catalog_dataset_options_setter(self): + # GIVEN the parameters DEFAULT_STORAGE_LOCATION_URI and PARAMETERS + # WHEN an ExternalCatalogDatasetOptions obj is created + # and added to a dataset. + # THEN the api representation of the dataset will match API_REPR + + from google.cloud.bigquery.external_config import ExternalCatalogDatasetOptions + + dataset = self._make_one(self.DS_REF) + + ecdo_obj = ExternalCatalogDatasetOptions( + default_storage_location_uri=self.DEFAULT_STORAGE_LOCATION_URI, + parameters=self.PARAMETERS, + ) + dataset.external_catalog_dataset_options = ecdo_obj + + result = dataset.to_api_repr() + expected = self.API_REPR + assert result == expected + + def test_external_catalog_dataset_options_getter_prop_exists(self): + # GIVEN default dataset PLUS an ExternalCatalogDatasetOptions + # THEN confirm that the api_repr of the ExternalCatalogDatasetsOptions + # matches the api_repr of the external_catalog_dataset_options attribute. + + from google.cloud.bigquery.external_config import ExternalCatalogDatasetOptions + + dataset = self._make_one(self.DS_REF) + ecdo_obj = ExternalCatalogDatasetOptions( + default_storage_location_uri=self.DEFAULT_STORAGE_LOCATION_URI, + parameters=self.PARAMETERS, + ) + dataset.external_catalog_dataset_options = ecdo_obj + result = dataset.external_catalog_dataset_options.to_api_repr() + expected = ecdo_obj.to_api_repr() + assert result == expected + + def test_external_catalog_dataset_options_getter_prop_is_none(self): + # GIVEN only a default dataset + # THEN confirm that external_catalog_dataset_options is None + + dataset = self._make_one(self.DS_REF) + expected = None + result = dataset.external_catalog_dataset_options + assert result == expected + + def test_external_catalog_dataset_options_from_api_repr(self): + # GIVEN default dataset including an ExternalCatalogDatasetOptions + # THEN confirm that the api_repr of the ExternalCatalogDatasetsOptions + # on a dataset object created via from_api_repr matches the api_repr + # of the "externalCatalogDatasetOptions" key. + + api_repr = self.API_REPR + klass = self._get_target_class() + dataset = klass.from_api_repr(api_repr) + + result = dataset.external_catalog_dataset_options.to_api_repr() + expected = api_repr["externalCatalogDatasetOptions"] + assert result == expected + + def test_external_catalog_dataset_options_to_api_repr(self): + # GIVEN a dataset api_repr including an ExternalCatalogDatasetOptions key + # THEN confirm that the api_repr of that key from a dataset object created + # via the to_api_repr() method matches the value of the key + # used to create the dataset object + + api_repr = self.API_REPR + klass = self._get_target_class() + dataset = klass.from_api_repr(api_repr) + + result = dataset.to_api_repr()["externalCatalogDatasetOptions"] + expected = api_repr["externalCatalogDatasetOptions"] + assert result == expected + class TestDatasetListItem(unittest.TestCase): @staticmethod diff --git a/tests/unit/test_external_config.py b/tests/unit/test_external_config.py index 9fd16e699..0c27d8e56 100644 --- a/tests/unit/test_external_config.py +++ b/tests/unit/test_external_config.py @@ -19,6 +19,8 @@ from google.cloud.bigquery import external_config from google.cloud.bigquery import schema +import pytest + class TestExternalConfig(unittest.TestCase): SOURCE_URIS = ["gs://foo", "gs://bar"] @@ -890,3 +892,90 @@ def _copy_and_update(d, u): d = copy.deepcopy(d) d.update(u) return d + + +class TestExternalCatalogDatasetOptions: + @staticmethod + def _get_target_class(): + from google.cloud.bigquery.external_config import ExternalCatalogDatasetOptions + + return ExternalCatalogDatasetOptions + + def _make_one(self, *args, **kw): + return self._get_target_class()(*args, **kw) + + DEFAULT_STORAGE_LOCATION_URI = "gs://test-bucket/test-path" + PARAMETERS = {"key": "value"} + + @pytest.mark.parametrize( + "default_storage_location_uri,parameters", + [ + (DEFAULT_STORAGE_LOCATION_URI, PARAMETERS), # set all params + (DEFAULT_STORAGE_LOCATION_URI, None), # set only one argument at a time + (None, PARAMETERS), + (None, None), # use default parameters + ], + ) + def test_ctor_initialization( + self, + default_storage_location_uri, + parameters, + ): + """Test ExternalCatalogDatasetOptions constructor with explicit values.""" + + instance = self._make_one( + default_storage_location_uri=default_storage_location_uri, + parameters=parameters, + ) + + assert instance.default_storage_location_uri == default_storage_location_uri + assert instance.parameters == parameters + + @pytest.mark.parametrize( + "default_storage_location_uri,parameters", + [ + (123, None), # does not accept integers + (None, 123), + ], + ) + def test_ctor_invalid_input(self, default_storage_location_uri, parameters): + """Test ExternalCatalogDatasetOptions constructor with invalid input.""" + + with pytest.raises(TypeError) as e: + self._make_one( + default_storage_location_uri=default_storage_location_uri, + parameters=parameters, + ) + + # Looking for the first word from the string "Pass as..." + assert "Pass " in str(e.value) + + def test_to_api_repr(self): + """Test ExternalCatalogDatasetOptions.to_api_repr method.""" + + instance = self._make_one( + default_storage_location_uri=self.DEFAULT_STORAGE_LOCATION_URI, + parameters=self.PARAMETERS, + ) + resource = instance.to_api_repr() + assert ( + resource["defaultStorageLocationUri"] == self.DEFAULT_STORAGE_LOCATION_URI + ) + assert resource["parameters"] == self.PARAMETERS + + def test_from_api_repr(self): + """GIVEN an api representation of an ExternalCatalogDatasetOptions object (i.e. api_repr) + WHEN converted into an ExternalCatalogDatasetOptions object using from_api_repr() + THEN it will have the representation in dict format as an ExternalCatalogDatasetOptions + object made directly (via _make_one()) and represented in dict format. + """ + + instance = self._make_one() + api_repr = { + "defaultStorageLocationUri": self.DEFAULT_STORAGE_LOCATION_URI, + "parameters": self.PARAMETERS, + } + result = instance.from_api_repr(api_repr) + + assert isinstance(result, external_config.ExternalCatalogDatasetOptions) + assert result._properties == api_repr