Skip to content

Commit

Permalink
Remove redundant comments from transform notebooks
Browse files Browse the repository at this point in the history
  • Loading branch information
MattPCollins authored and MattPCollins committed Jul 25, 2024
1 parent 9180063 commit 0977805
Show file tree
Hide file tree
Showing 3 changed files with 0 additions and 64 deletions.
Original file line number Diff line number Diff line change
Expand Up @@ -29,26 +29,6 @@

# COMMAND ----------

# payload = {
# "CuratedStorageAccessKey": "cumulusframeworkdevcuratedaccesskey",
# "CuratedStorageName": "cumulusframeworkdev",
# "CuratedContainerName": "curated",
# "CleansedStorageAccessKey": "cumulusframeworkdevcleansedaccesskey",
# "CleansedStorageName": "cumulusframeworkdev",
# "CleansedContainerName": "cleansed",
# "SchemaName": "Dimensions",
# "DatasetName": "GoldTable1",
# "ColumnsList": "AddressId,FullAddress",
# "ColumnTypeList": "INTEGER,STRING",
# "BkAttributesList": "AddressId",
# "PartitionByAttributesList": "",
# "SurrogateKey": "GoldTable1Id",
# "LoadType": "F",
# "BusinessLogicNotebookPath": "./businesslogicnotebooks/BespokeNotebook",
# }

# COMMAND ----------

cleansedSecret, cleansedStorageName, cleansedContainerName, curatedSecret, curatedStorageName, curatedContainerName, curatedSchemaName, curatedDatasetName, columnsList, columnTypeList, bkList, partitionList, surrogateKey, loadType, businessLogicNotebookPath = getTransformPayloadVariables(payload)

# COMMAND ----------
Expand Down
21 changes: 0 additions & 21 deletions src/azure.databricks/python/notebooks/transform/CreateFactTable.py
Original file line number Diff line number Diff line change
Expand Up @@ -27,27 +27,6 @@
import json
payload = json.loads(dbutils.widgets.get("Notebook Payload"))

# COMMAND ----------

# payload = {
# "CuratedStorageAccessKey": "cumulusframeworkdevcuratedaccesskey",
# "CuratedStorageName": "cumulusframeworkdev",
# "CuratedContainerName": "curated",
# "CleansedStorageAccessKey": "cumulusframeworkdevcleansedaccesskey",
# "CleansedStorageName": "cumulusframeworkdev",
# "CleansedContainerName": "cleansed",
# "SchemaName": "Facts",
# "DatasetName": "GoldAgg1",
# "ColumnsList": "AddressId,FullAddress",
# "ColumnTypeList": "INTEGER,STRING",
# "CuratedPkList": "AddressId",
# "CuratedPkListPartitionFields":"",
# "SurrogateKey": "GoldAgg1Id",
# "LoadType": "F",
# "BusinessLogicNotebookPath": "./businesslogicnotebooks/BespokeNotebook",
# }


# COMMAND ----------

cleansedSecret, cleansedStorageName, cleansedContainerName, curatedSecret, curatedStorageName, curatedContainerName, curatedSchemaName, curatedDatasetName, columnsList, columnTypeList, bkList, partitionList, surrogateKey, loadType, businessLogicNotebookPath = getTransformPayloadVariables(payload)
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -71,29 +71,6 @@

# COMMAND ----------

# DataFrame Validation:
# Check Columns in metadata match those in DataFrame
# Check DF result is non-zero before overwriting
# ADVISORY: Check for unadvised column types (e.g. STRING)
# ADVISORY: Check aggregations exist
# ADVISORY: Check for partitionby fields being used if data size is expected < 1TB

# COMMAND ----------

# # check schemas match
# if sourceDf.schema == targetDf.schema:
# print("Target vs Source Schema Validation Successful")
# elif sourceDf.schema != targetDf.schema:
# raise Exception("Target vs Source Schema Validation Unsuccessful")
# elif type(sourceDf) != "pyspark.sql.dataframe.DataFrame":
# raise TypeError("Error in Source DataFrame provided.")
# elif type(targetDf) != "pyspark.sql.dataframe.DataFrame":
# raise TypeError("Error in Target DataFrame provided.")
# else:
# raise Exception("Unexpected state. Please review.")

# COMMAND ----------

output = {}

# COMMAND ----------
Expand Down

0 comments on commit 0977805

Please sign in to comment.