Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

feat: adds ExternalCatalogDatasetOptions and tests #2111

Merged
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
25 changes: 25 additions & 0 deletions google/cloud/bigquery/dataset.py
Original file line number Diff line number Diff line change
Expand Up @@ -27,6 +27,7 @@
from google.cloud.bigquery.routine import Routine, RoutineReference
from google.cloud.bigquery.table import Table, TableReference
from google.cloud.bigquery.encryption_configuration import EncryptionConfiguration
from google.cloud.bigquery import external_config

from typing import Optional, List, Dict, Any, Union

Expand Down Expand Up @@ -531,6 +532,7 @@ class Dataset(object):
"max_time_travel_hours": "maxTimeTravelHours",
"default_rounding_mode": "defaultRoundingMode",
"resource_tags": "resourceTags",
"external_catalog_dataset_options": "externalCatalogDatasetOptions",
}

def __init__(self, dataset_ref) -> None:
Expand Down Expand Up @@ -898,6 +900,29 @@ def storage_billing_model(self, value):
)
self._properties["storageBillingModel"] = value

@property
def external_catalog_dataset_options(self):
"""Options defining open source compatible datasets living in the
BigQuery catalog. Contains metadata of open source database, schema
or namespace represented by the current dataset."""

prop = _helpers._get_sub_prop(
self._properties, ["externalCatalogDatasetOptions"]
)

if prop is not None:
prop = external_config.ExternalCatalogDatasetOptions.from_api_repr(prop)
return prop

@external_catalog_dataset_options.setter
def external_catalog_dataset_options(self, value):
value = _helpers._isinstance_or_raise(
value, external_config.ExternalCatalogDatasetOptions, none_allowed=True
)
self._properties[
self._PROPERTY_TO_API_FIELD["external_catalog_dataset_options"]
] = (value.to_api_repr() if value is not None else None)

@classmethod
def from_string(cls, full_dataset_id: str) -> "Dataset":
"""Construct a dataset from fully-qualified dataset ID.
Expand Down
76 changes: 75 additions & 1 deletion google/cloud/bigquery/external_config.py
Original file line number Diff line number Diff line change
Expand Up @@ -18,7 +18,7 @@
Job.configuration.query.tableDefinitions.
"""

from __future__ import absolute_import
from __future__ import absolute_import, annotations

import base64
import copy
Expand All @@ -28,6 +28,7 @@
from google.cloud.bigquery._helpers import _bytes_to_json
from google.cloud.bigquery._helpers import _int_or_none
from google.cloud.bigquery._helpers import _str_or_none
from google.cloud.bigquery import _helpers
from google.cloud.bigquery.format_options import AvroOptions, ParquetOptions
from google.cloud.bigquery.schema import SchemaField

Expand Down Expand Up @@ -1003,3 +1004,76 @@ def from_api_repr(cls, resource: dict) -> "ExternalConfig":
config = cls(resource["sourceFormat"])
config._properties = copy.deepcopy(resource)
return config


class ExternalCatalogDatasetOptions:
"""Options defining open source compatible datasets living in the BigQuery catalog.
Contains metadata of open source database, schema or namespace represented
by the current dataset.

Args:
default_storage_location_uri (Optional[str]): The storage location URI for all
tables in the dataset. Equivalent to hive metastore's database
locationUri. Maximum length of 1024 characters. (str)
parameters (Optional[dict[str, Any]]): A map of key value pairs defining the parameters
and properties of the open source schema. Maximum size of 2Mib.
"""

def __init__(
self,
default_storage_location_uri: Optional[str] = None,
parameters: Optional[Dict[str, Any]] = None,
):
self._properties: Dict[str, Any] = {}
self.default_storage_location_uri = default_storage_location_uri
self.parameters = parameters

@property
def default_storage_location_uri(self) -> Optional[str]:
"""Optional. The storage location URI for all tables in the dataset.
Equivalent to hive metastore's database locationUri. Maximum length of
1024 characters."""

return self._properties.get("defaultStorageLocationUri")

@default_storage_location_uri.setter
def default_storage_location_uri(self, value: Optional[str]):
value = _helpers._isinstance_or_raise(value, str, none_allowed=True)
self._properties["defaultStorageLocationUri"] = value

@property
def parameters(self) -> Optional[Dict[str, Any]]:
"""Optional. A map of key value pairs defining the parameters and
properties of the open source schema. Maximum size of 2Mib."""

return self._properties.get("parameters")

@parameters.setter
def parameters(self, value: Optional[Dict[str, Any]]):
value = _helpers._isinstance_or_raise(value, dict, none_allowed=True)
self._properties["parameters"] = value

def to_api_repr(self) -> dict:
"""Build an API representation of this object.

Returns:
Dict[str, Any]:
A dictionary in the format used by the BigQuery API.
"""
return self._properties

@classmethod
def from_api_repr(cls, api_repr: dict) -> ExternalCatalogDatasetOptions:
"""Factory: constructs an instance of the class (cls)
given its API representation.

Args:
api_repr (Dict[str, Any]):
API representation of the object to be instantiated.

Returns:
An instance of the class initialized with data from 'resource'.
"""
config = cls()
config._properties = api_repr
return config
84 changes: 84 additions & 0 deletions tests/unit/test_dataset.py
Original file line number Diff line number Diff line change
Expand Up @@ -650,6 +650,16 @@ class TestDataset(unittest.TestCase):
DS_ID = "dataset-id"
DS_REF = DatasetReference(PROJECT, DS_ID)
KMS_KEY_NAME = "projects/1/locations/us/keyRings/1/cryptoKeys/1"
DEFAULT_STORAGE_LOCATION_URI = "gs://test-bucket/test-path"
PARAMETERS = {"key": "value"}
API_REPR = {
"datasetReference": {"projectId": "project", "datasetId": "dataset-id"},
"labels": {},
"externalCatalogDatasetOptions": {
"defaultStorageLocationUri": DEFAULT_STORAGE_LOCATION_URI,
"parameters": PARAMETERS,
},
}

@staticmethod
def _get_target_class():
Expand Down Expand Up @@ -1067,6 +1077,80 @@ def test___repr__(self):
expected = "Dataset(DatasetReference('project1', 'dataset1'))"
self.assertEqual(repr(dataset), expected)

def test_external_catalog_dataset_options_setter(self):
# GIVEN the parameters DEFAULT_STORAGE_LOCATION_URI and PARAMETERS
# WHEN an ExternalCatalogDatasetOptions obj is created
# and added to a dataset.
# THEN the api representation of the dataset will match API_REPR

from google.cloud.bigquery.external_config import ExternalCatalogDatasetOptions

dataset = self._make_one(self.DS_REF)

ecdo_obj = ExternalCatalogDatasetOptions(
default_storage_location_uri=self.DEFAULT_STORAGE_LOCATION_URI,
parameters=self.PARAMETERS,
)
dataset.external_catalog_dataset_options = ecdo_obj

result = dataset.to_api_repr()
expected = self.API_REPR
assert result == expected

def test_external_catalog_dataset_options_getter_prop_exists(self):
# GIVEN default dataset PLUS an ExternalCatalogDatasetOptions
# THEN confirm that the api_repr of the ExternalCatalogDatasetsOptions
# matches the api_repr of the external_catalog_dataset_options attribute.

from google.cloud.bigquery.external_config import ExternalCatalogDatasetOptions

dataset = self._make_one(self.DS_REF)
ecdo_obj = ExternalCatalogDatasetOptions(
default_storage_location_uri=self.DEFAULT_STORAGE_LOCATION_URI,
parameters=self.PARAMETERS,
)
dataset.external_catalog_dataset_options = ecdo_obj
result = dataset.external_catalog_dataset_options.to_api_repr()
expected = ecdo_obj.to_api_repr()
assert result == expected

def test_external_catalog_dataset_options_getter_prop_is_none(self):
# GIVEN only a default dataset
# THEN confirm that external_catalog_dataset_options is None

dataset = self._make_one(self.DS_REF)
expected = None
result = dataset.external_catalog_dataset_options
assert result == expected

def test_external_catalog_dataset_options_from_api_repr(self):
# GIVEN default dataset including an ExternalCatalogDatasetOptions
# THEN confirm that the api_repr of the ExternalCatalogDatasetsOptions
# on a dataset object created via from_api_repr matches the api_repr
# of the "externalCatalogDatasetOptions" key.

api_repr = self.API_REPR
klass = self._get_target_class()
dataset = klass.from_api_repr(api_repr)

result = dataset.external_catalog_dataset_options.to_api_repr()
expected = api_repr["externalCatalogDatasetOptions"]
assert result == expected

def test_external_catalog_dataset_options_to_api_repr(self):
# GIVEN a dataset api_repr including an ExternalCatalogDatasetOptions key
# THEN confirm that the api_repr of that key from a dataset object created
# via the to_api_repr() method matches the value of the key
# used to create the dataset object

api_repr = self.API_REPR
klass = self._get_target_class()
dataset = klass.from_api_repr(api_repr)

result = dataset.to_api_repr()["externalCatalogDatasetOptions"]
expected = api_repr["externalCatalogDatasetOptions"]
assert result == expected


class TestDatasetListItem(unittest.TestCase):
@staticmethod
Expand Down
89 changes: 89 additions & 0 deletions tests/unit/test_external_config.py
Original file line number Diff line number Diff line change
Expand Up @@ -19,6 +19,8 @@
from google.cloud.bigquery import external_config
from google.cloud.bigquery import schema

import pytest


class TestExternalConfig(unittest.TestCase):
SOURCE_URIS = ["gs://foo", "gs://bar"]
Expand Down Expand Up @@ -890,3 +892,90 @@ def _copy_and_update(d, u):
d = copy.deepcopy(d)
d.update(u)
return d


class TestExternalCatalogDatasetOptions:
@staticmethod
def _get_target_class():
from google.cloud.bigquery.external_config import ExternalCatalogDatasetOptions

return ExternalCatalogDatasetOptions

def _make_one(self, *args, **kw):
return self._get_target_class()(*args, **kw)

DEFAULT_STORAGE_LOCATION_URI = "gs://test-bucket/test-path"
PARAMETERS = {"key": "value"}

@pytest.mark.parametrize(
"default_storage_location_uri,parameters",
[
(DEFAULT_STORAGE_LOCATION_URI, PARAMETERS), # set all params
(DEFAULT_STORAGE_LOCATION_URI, None), # set only one argument at a time
(None, PARAMETERS),
(None, None), # use default parameters
],
)
def test_ctor_initialization(
self,
default_storage_location_uri,
parameters,
):
"""Test ExternalCatalogDatasetOptions constructor with explicit values."""

instance = self._make_one(
default_storage_location_uri=default_storage_location_uri,
parameters=parameters,
)

assert instance.default_storage_location_uri == default_storage_location_uri
assert instance.parameters == parameters

@pytest.mark.parametrize(
"default_storage_location_uri,parameters",
[
(123, None), # does not accept integers
(None, 123),
],
)
def test_ctor_invalid_input(self, default_storage_location_uri, parameters):
"""Test ExternalCatalogDatasetOptions constructor with invalid input."""

with pytest.raises(TypeError) as e:
self._make_one(
default_storage_location_uri=default_storage_location_uri,
parameters=parameters,
)

# Looking for the first word from the string "Pass <variable> as..."
assert "Pass " in str(e.value)

def test_to_api_repr(self):
"""Test ExternalCatalogDatasetOptions.to_api_repr method."""

instance = self._make_one(
default_storage_location_uri=self.DEFAULT_STORAGE_LOCATION_URI,
parameters=self.PARAMETERS,
)
resource = instance.to_api_repr()
assert (
resource["defaultStorageLocationUri"] == self.DEFAULT_STORAGE_LOCATION_URI
)
assert resource["parameters"] == self.PARAMETERS

def test_from_api_repr(self):
"""GIVEN an api representation of an ExternalCatalogDatasetOptions object (i.e. api_repr)
WHEN converted into an ExternalCatalogDatasetOptions object using from_api_repr()
THEN it will have the representation in dict format as an ExternalCatalogDatasetOptions
object made directly (via _make_one()) and represented in dict format.
"""

instance = self._make_one()
api_repr = {
"defaultStorageLocationUri": self.DEFAULT_STORAGE_LOCATION_URI,
"parameters": self.PARAMETERS,
}
result = instance.from_api_repr(api_repr)

assert isinstance(result, external_config.ExternalCatalogDatasetOptions)
assert result._properties == api_repr
Loading