Skip to content

Commit

Permalink
feat: adds ExternalCatalogDatasetOptions and tests (#2111)
Browse files Browse the repository at this point in the history
* feat: adds ExternalCatalogDatasetOptions and tests

* Update google/cloud/bigquery/dataset.py

Co-authored-by: Tim Sweña (Swast) <swast@google.com>

* Update google/cloud/bigquery/dataset.py

Co-authored-by: Tim Sweña (Swast) <swast@google.com>

* 🦉 Updates from OwlBot post-processor

See /~https://github.com/googleapis/repo-automation-bots/blob/main/packages/owl-bot/README.md

---------

Co-authored-by: Tim Sweña (Swast) <swast@google.com>
Co-authored-by: Owl Bot <gcf-owl-bot[bot]@users.noreply.github.com>
  • Loading branch information
3 people authored Jan 15, 2025
1 parent 55ca63c commit b929a90
Show file tree
Hide file tree
Showing 4 changed files with 273 additions and 1 deletion.
25 changes: 25 additions & 0 deletions google/cloud/bigquery/dataset.py
Original file line number Diff line number Diff line change
Expand Up @@ -27,6 +27,7 @@
from google.cloud.bigquery.routine import Routine, RoutineReference
from google.cloud.bigquery.table import Table, TableReference
from google.cloud.bigquery.encryption_configuration import EncryptionConfiguration
from google.cloud.bigquery import external_config

from typing import Optional, List, Dict, Any, Union

Expand Down Expand Up @@ -531,6 +532,7 @@ class Dataset(object):
"max_time_travel_hours": "maxTimeTravelHours",
"default_rounding_mode": "defaultRoundingMode",
"resource_tags": "resourceTags",
"external_catalog_dataset_options": "externalCatalogDatasetOptions",
}

def __init__(self, dataset_ref) -> None:
Expand Down Expand Up @@ -898,6 +900,29 @@ def storage_billing_model(self, value):
)
self._properties["storageBillingModel"] = value

@property
def external_catalog_dataset_options(self):
"""Options defining open source compatible datasets living in the
BigQuery catalog. Contains metadata of open source database, schema
or namespace represented by the current dataset."""

prop = _helpers._get_sub_prop(
self._properties, ["externalCatalogDatasetOptions"]
)

if prop is not None:
prop = external_config.ExternalCatalogDatasetOptions.from_api_repr(prop)
return prop

@external_catalog_dataset_options.setter
def external_catalog_dataset_options(self, value):
value = _helpers._isinstance_or_raise(
value, external_config.ExternalCatalogDatasetOptions, none_allowed=True
)
self._properties[
self._PROPERTY_TO_API_FIELD["external_catalog_dataset_options"]
] = (value.to_api_repr() if value is not None else None)

@classmethod
def from_string(cls, full_dataset_id: str) -> "Dataset":
"""Construct a dataset from fully-qualified dataset ID.
Expand Down
76 changes: 75 additions & 1 deletion google/cloud/bigquery/external_config.py
Original file line number Diff line number Diff line change
Expand Up @@ -18,7 +18,7 @@
Job.configuration.query.tableDefinitions.
"""

from __future__ import absolute_import
from __future__ import absolute_import, annotations

import base64
import copy
Expand All @@ -28,6 +28,7 @@
from google.cloud.bigquery._helpers import _bytes_to_json
from google.cloud.bigquery._helpers import _int_or_none
from google.cloud.bigquery._helpers import _str_or_none
from google.cloud.bigquery import _helpers
from google.cloud.bigquery.format_options import AvroOptions, ParquetOptions
from google.cloud.bigquery.schema import SchemaField

Expand Down Expand Up @@ -1003,3 +1004,76 @@ def from_api_repr(cls, resource: dict) -> "ExternalConfig":
config = cls(resource["sourceFormat"])
config._properties = copy.deepcopy(resource)
return config


class ExternalCatalogDatasetOptions:
"""Options defining open source compatible datasets living in the BigQuery catalog.
Contains metadata of open source database, schema or namespace represented
by the current dataset.
Args:
default_storage_location_uri (Optional[str]): The storage location URI for all
tables in the dataset. Equivalent to hive metastore's database
locationUri. Maximum length of 1024 characters. (str)
parameters (Optional[dict[str, Any]]): A map of key value pairs defining the parameters
and properties of the open source schema. Maximum size of 2Mib.
"""

def __init__(
self,
default_storage_location_uri: Optional[str] = None,
parameters: Optional[Dict[str, Any]] = None,
):
self._properties: Dict[str, Any] = {}
self.default_storage_location_uri = default_storage_location_uri
self.parameters = parameters

@property
def default_storage_location_uri(self) -> Optional[str]:
"""Optional. The storage location URI for all tables in the dataset.
Equivalent to hive metastore's database locationUri. Maximum length of
1024 characters."""

return self._properties.get("defaultStorageLocationUri")

@default_storage_location_uri.setter
def default_storage_location_uri(self, value: Optional[str]):
value = _helpers._isinstance_or_raise(value, str, none_allowed=True)
self._properties["defaultStorageLocationUri"] = value

@property
def parameters(self) -> Optional[Dict[str, Any]]:
"""Optional. A map of key value pairs defining the parameters and
properties of the open source schema. Maximum size of 2Mib."""

return self._properties.get("parameters")

@parameters.setter
def parameters(self, value: Optional[Dict[str, Any]]):
value = _helpers._isinstance_or_raise(value, dict, none_allowed=True)
self._properties["parameters"] = value

def to_api_repr(self) -> dict:
"""Build an API representation of this object.
Returns:
Dict[str, Any]:
A dictionary in the format used by the BigQuery API.
"""
return self._properties

@classmethod
def from_api_repr(cls, api_repr: dict) -> ExternalCatalogDatasetOptions:
"""Factory: constructs an instance of the class (cls)
given its API representation.
Args:
api_repr (Dict[str, Any]):
API representation of the object to be instantiated.
Returns:
An instance of the class initialized with data from 'resource'.
"""
config = cls()
config._properties = api_repr
return config
84 changes: 84 additions & 0 deletions tests/unit/test_dataset.py
Original file line number Diff line number Diff line change
Expand Up @@ -650,6 +650,16 @@ class TestDataset(unittest.TestCase):
DS_ID = "dataset-id"
DS_REF = DatasetReference(PROJECT, DS_ID)
KMS_KEY_NAME = "projects/1/locations/us/keyRings/1/cryptoKeys/1"
DEFAULT_STORAGE_LOCATION_URI = "gs://test-bucket/test-path"
PARAMETERS = {"key": "value"}
API_REPR = {
"datasetReference": {"projectId": "project", "datasetId": "dataset-id"},
"labels": {},
"externalCatalogDatasetOptions": {
"defaultStorageLocationUri": DEFAULT_STORAGE_LOCATION_URI,
"parameters": PARAMETERS,
},
}

@staticmethod
def _get_target_class():
Expand Down Expand Up @@ -1067,6 +1077,80 @@ def test___repr__(self):
expected = "Dataset(DatasetReference('project1', 'dataset1'))"
self.assertEqual(repr(dataset), expected)

def test_external_catalog_dataset_options_setter(self):
# GIVEN the parameters DEFAULT_STORAGE_LOCATION_URI and PARAMETERS
# WHEN an ExternalCatalogDatasetOptions obj is created
# and added to a dataset.
# THEN the api representation of the dataset will match API_REPR

from google.cloud.bigquery.external_config import ExternalCatalogDatasetOptions

dataset = self._make_one(self.DS_REF)

ecdo_obj = ExternalCatalogDatasetOptions(
default_storage_location_uri=self.DEFAULT_STORAGE_LOCATION_URI,
parameters=self.PARAMETERS,
)
dataset.external_catalog_dataset_options = ecdo_obj

result = dataset.to_api_repr()
expected = self.API_REPR
assert result == expected

def test_external_catalog_dataset_options_getter_prop_exists(self):
# GIVEN default dataset PLUS an ExternalCatalogDatasetOptions
# THEN confirm that the api_repr of the ExternalCatalogDatasetsOptions
# matches the api_repr of the external_catalog_dataset_options attribute.

from google.cloud.bigquery.external_config import ExternalCatalogDatasetOptions

dataset = self._make_one(self.DS_REF)
ecdo_obj = ExternalCatalogDatasetOptions(
default_storage_location_uri=self.DEFAULT_STORAGE_LOCATION_URI,
parameters=self.PARAMETERS,
)
dataset.external_catalog_dataset_options = ecdo_obj
result = dataset.external_catalog_dataset_options.to_api_repr()
expected = ecdo_obj.to_api_repr()
assert result == expected

def test_external_catalog_dataset_options_getter_prop_is_none(self):
# GIVEN only a default dataset
# THEN confirm that external_catalog_dataset_options is None

dataset = self._make_one(self.DS_REF)
expected = None
result = dataset.external_catalog_dataset_options
assert result == expected

def test_external_catalog_dataset_options_from_api_repr(self):
# GIVEN default dataset including an ExternalCatalogDatasetOptions
# THEN confirm that the api_repr of the ExternalCatalogDatasetsOptions
# on a dataset object created via from_api_repr matches the api_repr
# of the "externalCatalogDatasetOptions" key.

api_repr = self.API_REPR
klass = self._get_target_class()
dataset = klass.from_api_repr(api_repr)

result = dataset.external_catalog_dataset_options.to_api_repr()
expected = api_repr["externalCatalogDatasetOptions"]
assert result == expected

def test_external_catalog_dataset_options_to_api_repr(self):
# GIVEN a dataset api_repr including an ExternalCatalogDatasetOptions key
# THEN confirm that the api_repr of that key from a dataset object created
# via the to_api_repr() method matches the value of the key
# used to create the dataset object

api_repr = self.API_REPR
klass = self._get_target_class()
dataset = klass.from_api_repr(api_repr)

result = dataset.to_api_repr()["externalCatalogDatasetOptions"]
expected = api_repr["externalCatalogDatasetOptions"]
assert result == expected


class TestDatasetListItem(unittest.TestCase):
@staticmethod
Expand Down
89 changes: 89 additions & 0 deletions tests/unit/test_external_config.py
Original file line number Diff line number Diff line change
Expand Up @@ -19,6 +19,8 @@
from google.cloud.bigquery import external_config
from google.cloud.bigquery import schema

import pytest


class TestExternalConfig(unittest.TestCase):
SOURCE_URIS = ["gs://foo", "gs://bar"]
Expand Down Expand Up @@ -890,3 +892,90 @@ def _copy_and_update(d, u):
d = copy.deepcopy(d)
d.update(u)
return d


class TestExternalCatalogDatasetOptions:
@staticmethod
def _get_target_class():
from google.cloud.bigquery.external_config import ExternalCatalogDatasetOptions

return ExternalCatalogDatasetOptions

def _make_one(self, *args, **kw):
return self._get_target_class()(*args, **kw)

DEFAULT_STORAGE_LOCATION_URI = "gs://test-bucket/test-path"
PARAMETERS = {"key": "value"}

@pytest.mark.parametrize(
"default_storage_location_uri,parameters",
[
(DEFAULT_STORAGE_LOCATION_URI, PARAMETERS), # set all params
(DEFAULT_STORAGE_LOCATION_URI, None), # set only one argument at a time
(None, PARAMETERS),
(None, None), # use default parameters
],
)
def test_ctor_initialization(
self,
default_storage_location_uri,
parameters,
):
"""Test ExternalCatalogDatasetOptions constructor with explicit values."""

instance = self._make_one(
default_storage_location_uri=default_storage_location_uri,
parameters=parameters,
)

assert instance.default_storage_location_uri == default_storage_location_uri
assert instance.parameters == parameters

@pytest.mark.parametrize(
"default_storage_location_uri,parameters",
[
(123, None), # does not accept integers
(None, 123),
],
)
def test_ctor_invalid_input(self, default_storage_location_uri, parameters):
"""Test ExternalCatalogDatasetOptions constructor with invalid input."""

with pytest.raises(TypeError) as e:
self._make_one(
default_storage_location_uri=default_storage_location_uri,
parameters=parameters,
)

# Looking for the first word from the string "Pass <variable> as..."
assert "Pass " in str(e.value)

def test_to_api_repr(self):
"""Test ExternalCatalogDatasetOptions.to_api_repr method."""

instance = self._make_one(
default_storage_location_uri=self.DEFAULT_STORAGE_LOCATION_URI,
parameters=self.PARAMETERS,
)
resource = instance.to_api_repr()
assert (
resource["defaultStorageLocationUri"] == self.DEFAULT_STORAGE_LOCATION_URI
)
assert resource["parameters"] == self.PARAMETERS

def test_from_api_repr(self):
"""GIVEN an api representation of an ExternalCatalogDatasetOptions object (i.e. api_repr)
WHEN converted into an ExternalCatalogDatasetOptions object using from_api_repr()
THEN it will have the representation in dict format as an ExternalCatalogDatasetOptions
object made directly (via _make_one()) and represented in dict format.
"""

instance = self._make_one()
api_repr = {
"defaultStorageLocationUri": self.DEFAULT_STORAGE_LOCATION_URI,
"parameters": self.PARAMETERS,
}
result = instance.from_api_repr(api_repr)

assert isinstance(result, external_config.ExternalCatalogDatasetOptions)
assert result._properties == api_repr

0 comments on commit b929a90

Please sign in to comment.