-
Notifications
You must be signed in to change notification settings - Fork 6.5k
Commit
This commit does not belong to any branch on this repository, and may belong to a fork outside of the repository.
docs(samples): Add Dataflow to Pub/Sub snippet (#11104)
* docs(samples): Add Dataflow to Pub/Sub snippet Adds a snippet that shows how to write messages to Pub/Sub from Dataflow. * Fix region tag * Skip Python 3.12 test see apache/beam#29149 * Fix copyright date * Improve IT by reading messages from Pub/Sub * Incorporate review feedback * Fix testfor 3.8 * Fix linter error
- Loading branch information
1 parent
da40d4e
commit 93429e0
Showing
5 changed files
with
222 additions
and
4 deletions.
There are no files selected for viewing
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,42 @@ | ||
# Copyright 2024 Google LLC | ||
# | ||
# Licensed under the Apache License, Version 2.0 (the "License"); | ||
# you may not use this file except in compliance with the License. | ||
# You may obtain a copy of the License at | ||
# | ||
# http://www.apache.org/licenses/LICENSE-2.0 | ||
# | ||
# Unless required by applicable law or agreed to in writing, software | ||
# distributed under the License is distributed on an "AS IS" BASIS, | ||
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. | ||
# See the License for the specific language governing permissions and | ||
# limitations under the License. | ||
|
||
# Default TEST_CONFIG_OVERRIDE for python repos. | ||
|
||
# You can copy this file into your directory, then it will be imported from | ||
# the noxfile.py. | ||
|
||
# The source of truth: | ||
# /~https://github.com/GoogleCloudPlatform/python-docs-samples/blob/main/noxfile_config.py | ||
|
||
TEST_CONFIG_OVERRIDE = { | ||
# You can opt out from the test for specific Python versions. | ||
"ignored_versions": ["2.7", "3.7", "3.9", "3.10", "3.12"], | ||
# Old samples are opted out of enforcing Python type hints | ||
# All new samples should feature them | ||
"enforce_type_hints": True, | ||
# An envvar key for determining the project id to use. Change it | ||
# to 'BUILD_SPECIFIC_GCLOUD_PROJECT' if you want to opt in using a | ||
# build specific Cloud project. You can also use your own string | ||
# to use your own Cloud project. | ||
"gcloud_project_env": "GOOGLE_CLOUD_PROJECT", | ||
# 'gcloud_project_env': 'BUILD_SPECIFIC_GCLOUD_PROJECT', | ||
# If you need to use a specific version of pip, | ||
# change pip_version_override to the string representation | ||
# of the version number, for example, "20.2.4" | ||
"pip_version_override": None, | ||
# A dictionary you want to inject into your test. Don't put any | ||
# secrets here. These values will override predefined values. | ||
"envs": {}, | ||
} |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,96 @@ | ||
# !/usr/bin/env python | ||
# Copyright 2024 Google LLC | ||
# | ||
# Licensed under the Apache License, Version 2.0 (the "License"); | ||
# you may not use this file except in compliance with the License. | ||
# You may obtain a copy of the License at | ||
# | ||
# http://www.apache.org/licenses/LICENSE-2.0 | ||
# | ||
# Unless required by applicable law or agreed to in writing, software | ||
# distributed under the License is distributed on an "AS IS" BASIS, | ||
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. | ||
# See the License for the specific language governing permissions and | ||
# limitations under the License. | ||
|
||
import os | ||
import time | ||
from unittest.mock import patch | ||
import uuid | ||
|
||
from google.cloud import pubsub_v1 | ||
|
||
import pytest | ||
|
||
from ..write_pubsub import write_to_pubsub | ||
|
||
|
||
topic_id = f'test-topic-{uuid.uuid4()}' | ||
subscription_id = f'{topic_id}-sub' | ||
project_id = os.environ["GOOGLE_CLOUD_PROJECT"] | ||
|
||
publisher = pubsub_v1.PublisherClient() | ||
subscriber = pubsub_v1.SubscriberClient() | ||
|
||
NUM_MESSAGES = 4 | ||
TIMEOUT = 60 * 5 | ||
|
||
|
||
@pytest.fixture(scope="function") | ||
def setup_and_teardown() -> None: | ||
topic_path = publisher.topic_path(project_id, topic_id) | ||
subscription_path = subscriber.subscription_path(project_id, subscription_id) | ||
|
||
try: | ||
publisher.create_topic(request={"name": topic_path}) | ||
subscriber.create_subscription( | ||
request={"name": subscription_path, "topic": topic_path} | ||
) | ||
yield | ||
finally: | ||
subscriber.delete_subscription( | ||
request={"subscription": subscription_path}) | ||
publisher.delete_topic(request={"topic": topic_path}) | ||
|
||
|
||
def read_messages() -> None: | ||
received_messages = [] | ||
ack_ids = [] | ||
|
||
# Read messages from Pub/Sub. It might be necessary to read multiple | ||
# batches, Use a timeout value to avoid potentially looping forever. | ||
start_time = time.time() | ||
while time.time() - start_time <= TIMEOUT: | ||
# Pull messages from Pub/Sub. | ||
subscription_path = subscriber.subscription_path(project_id, subscription_id) | ||
response = subscriber.pull( | ||
request={"subscription": subscription_path, "max_messages": NUM_MESSAGES} | ||
) | ||
received_messages.append(response.received_messages) | ||
|
||
for received_message in response.received_messages: | ||
ack_ids.append(received_message.ack_id) | ||
|
||
# Acknowledge the received messages so they will not be sent again. | ||
subscriber.acknowledge( | ||
request={"subscription": subscription_path, "ack_ids": ack_ids} | ||
) | ||
|
||
if (len(received_messages) >= NUM_MESSAGES): | ||
break | ||
|
||
time.sleep(5) | ||
|
||
return received_messages | ||
|
||
|
||
def test_write_to_pubsub(setup_and_teardown: None) -> None: | ||
with patch("sys.argv", [ | ||
"", '--streaming', f'--project={project_id}', f'--topic={topic_id}' | ||
]): | ||
write_to_pubsub() | ||
|
||
# Read from Pub/Sub to verify the pipeline successfully wrote messages. | ||
# Duplicate reads are possible. | ||
messages = read_messages() | ||
assert (len(messages) >= NUM_MESSAGES) |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,75 @@ | ||
#!/usr/bin/env python | ||
# Copyright 2024 Google LLC | ||
# | ||
# Licensed under the Apache License, Version 2.0 (the "License"); | ||
# you may not use this file except in compliance with the License. | ||
# You may obtain a copy of the License at | ||
# | ||
# http://www.apache.org/licenses/LICENSE-2.0 | ||
# | ||
# Unless required by applicable law or agreed to in writing, software | ||
# distributed under the License is distributed on an "AS IS" BASIS, | ||
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. | ||
# See the License for the specific language governing permissions and | ||
# limitations under the License. | ||
|
||
# [START dataflow_pubsub_write_with_attributes]i | ||
import argparse | ||
from typing import Any, Dict, List | ||
|
||
import apache_beam as beam | ||
from apache_beam.io import PubsubMessage | ||
from apache_beam.io import WriteToPubSub | ||
from apache_beam.options.pipeline_options import PipelineOptions | ||
|
||
from typing_extensions import Self | ||
|
||
|
||
def item_to_message(item: Dict[str, Any]) -> PubsubMessage: | ||
attributes = { | ||
'buyer': item['name'], | ||
'timestamp': str(item['ts']) | ||
} | ||
data = bytes(item['product'], 'utf-8') | ||
|
||
return PubsubMessage(data=data, attributes=attributes) | ||
|
||
|
||
def write_to_pubsub(argv: List[str] = None) -> None: | ||
|
||
# Parse the pipeline options passed into the application. Example: | ||
# --project=$PROJECT_ID --topic=$TOPIC_NAME --streaming | ||
# For more information, see | ||
# https://beam.apache.org/documentation/programming-guide/#configuring-pipeline-options | ||
class MyOptions(PipelineOptions): | ||
@classmethod | ||
# Define custom pipeline options that specify the project ID and Pub/Sub | ||
# topic. | ||
def _add_argparse_args(cls: Self, parser: argparse.ArgumentParser) -> None: | ||
parser.add_argument("--project", required=True) | ||
parser.add_argument("--topic", required=True) | ||
|
||
example_data = [ | ||
{'name': 'Robert', 'product': 'TV', 'ts': 1613141590000}, | ||
{'name': 'Maria', 'product': 'Phone', 'ts': 1612718280000}, | ||
{'name': 'Juan', 'product': 'Laptop', 'ts': 1611618000000}, | ||
{'name': 'Rebeca', 'product': 'Video game', 'ts': 1610000000000} | ||
] | ||
options = MyOptions() | ||
|
||
with beam.Pipeline(options=options) as pipeline: | ||
( | ||
pipeline | ||
| "Create elements" >> beam.Create(example_data) | ||
| "Convert to Pub/Sub messages" >> beam.Map(item_to_message) | ||
| WriteToPubSub( | ||
topic=f'projects/{options.project}/topics/{options.topic}', | ||
with_attributes=True) | ||
) | ||
|
||
print('Pipeline ran successfully.') | ||
# [END dataflow_pubsub_write_with_attributes] | ||
|
||
|
||
if __name__ == "__main__": | ||
write_to_pubsub() |