Merge pull request #89 from CloudFormations/develop

Add Development changes
CloudFormations · Aug 12, 2024 · b8aa841 · b8aa841
2 parents 8996208 + 6f3c9a6
commit b8aa841
Show file tree

Hide file tree

Showing 7 changed files with 22 additions and 65 deletions.
diff --git a/src/azure.databricks/python/notebooks/transform/CreateDimensionTable.py b/src/azure.databricks/python/notebooks/transform/CreateDimensionTable.py
@@ -29,26 +29,6 @@
 
 # COMMAND ----------
 
-# payload = {
-#     "CuratedStorageAccessKey": "cumulusframeworkdevcuratedaccesskey",
-#     "CuratedStorageName": "cumulusframeworkdev", 
-#     "CuratedContainerName": "curated", 
-#     "CleansedStorageAccessKey": "cumulusframeworkdevcleansedaccesskey",
-#     "CleansedStorageName": "cumulusframeworkdev", 
-#     "CleansedContainerName": "cleansed", 
-#     "SchemaName": "Dimensions",
-#     "DatasetName": "GoldTable1",
-#     "ColumnsList": "AddressId,FullAddress",
-#     "ColumnTypeList": "INTEGER,STRING",
-#     "BkAttributesList": "AddressId",
-#     "PartitionByAttributesList": "",
-#     "SurrogateKey": "GoldTable1Id",
-#     "LoadType": "F",
-#     "BusinessLogicNotebookPath": "./businesslogicnotebooks/BespokeNotebook",
-# }
-
-# COMMAND ----------
-
 cleansedSecret, cleansedStorageName, cleansedContainerName, curatedSecret, curatedStorageName, curatedContainerName, curatedSchemaName, curatedDatasetName, columnsList, columnTypeList, bkList, partitionList, surrogateKey, loadType, businessLogicNotebookPath = getTransformPayloadVariables(payload)
 
 # COMMAND ----------

diff --git a/src/azure.databricks/python/notebooks/transform/CreateFactTable.py b/src/azure.databricks/python/notebooks/transform/CreateFactTable.py
@@ -27,27 +27,6 @@
 import json
 payload = json.loads(dbutils.widgets.get("Notebook Payload"))
 
-# COMMAND ----------
-
-# payload = {
-#     "CuratedStorageAccessKey": "cumulusframeworkdevcuratedaccesskey",
-#     "CuratedStorageName": "cumulusframeworkdev", 
-#     "CuratedContainerName": "curated", 
-#     "CleansedStorageAccessKey": "cumulusframeworkdevcleansedaccesskey",
-#     "CleansedStorageName": "cumulusframeworkdev", 
-#     "CleansedContainerName": "cleansed", 
-#     "SchemaName": "Facts",
-#     "DatasetName": "GoldAgg1",
-#     "ColumnsList": "AddressId,FullAddress",
-#     "ColumnTypeList": "INTEGER,STRING",
-#     "CuratedPkList": "AddressId",
-#     "CuratedPkListPartitionFields":"",
-#     "SurrogateKey": "GoldAgg1Id",
-#     "LoadType": "F",
-#     "BusinessLogicNotebookPath": "./businesslogicnotebooks/BespokeNotebook",
-# }
-
-
 # COMMAND ----------
 
 cleansedSecret, cleansedStorageName, cleansedContainerName, curatedSecret, curatedStorageName, curatedContainerName, curatedSchemaName, curatedDatasetName, columnsList, columnTypeList, bkList, partitionList, surrogateKey, loadType, businessLogicNotebookPath = getTransformPayloadVariables(payload)

diff --git a/src/azure.databricks/python/notebooks/transform/TransformExecution.py b/src/azure.databricks/python/notebooks/transform/TransformExecution.py
@@ -71,29 +71,6 @@
 
 # COMMAND ----------
 
-# DataFrame Validation: 
-# Check Columns in metadata match those in DataFrame
-# Check DF result is non-zero before overwriting
-# ADVISORY: Check for unadvised column types (e.g. STRING)
-# ADVISORY: Check aggregations exist
-# ADVISORY: Check for partitionby fields being used if data size is expected < 1TB
-
-# COMMAND ----------
-
-# # check schemas match
-# if sourceDf.schema == targetDf.schema:
-#     print("Target vs Source Schema Validation Successful")
-# elif sourceDf.schema != targetDf.schema:
-#     raise Exception("Target vs Source Schema Validation Unsuccessful")
-# elif type(sourceDf) != "pyspark.sql.dataframe.DataFrame":
-#     raise TypeError("Error in Source DataFrame provided.")
-# elif type(targetDf) != "pyspark.sql.dataframe.DataFrame":
-#     raise TypeError("Error in Target DataFrame provided.")
-# else:
-#     raise Exception("Unexpected state. Please review.")
-
-# COMMAND ----------
-
 output = {}
 
 # COMMAND ----------

diff --git a/src/metadata.ingest/ingest/Stored Procedures/AddIngestPayloadPipeline.sql b/src/metadata.ingest/ingest/Stored Procedures/AddIngestPayloadPipeline.sql
@@ -16,10 +16,12 @@ WHERE StageName = @StageName
 IF @StageCount = 0
 BEGIN
     RAISERROR('No rows returned. Please review the Stage Details provided and confirm this is enabled.',16,1)
+    RETURN 0;
 END
 IF @StageCount > 1
 BEGIN
     RAISERROR('Multiple rows returned. Please review the Stage Details provided.',16,1)
+    RETURN 0;
 END
 
 
@@ -42,10 +44,12 @@ AND ds.DatasetDisplayName = @DatasetDisplayName
 IF @DatasetCount = 0
 BEGIN
     RAISERROR('No rows returned. Please review the Dataset Id provided and confirm this is enabled.',16,1)
+    RETURN 0;
 END
 IF @DatasetCount > 1
 BEGIN
     RAISERROR('More than 1 row returned. Please review there is 1 active Dataset for the provided Dataset Id, and the connection details.',16,1)
+    RETURN 0;
 END
 
 -- Store all dataset ids associated with DatasetDisplayName

diff --git a/src/metadata.ingest/ingest/Stored Procedures/AddIngestPayloadPipelineDependencies.sql b/src/metadata.ingest/ingest/Stored Procedures/AddIngestPayloadPipelineDependencies.sql
@@ -17,10 +17,12 @@ AND ds.DatasetDisplayName = @DatasetDisplayName
 IF @DatasetCount = 0
 BEGIN
     RAISERROR('No rows returned. Please review the Dataset Id provided and confirm this is enabled.',16,1)
+    RETURN 0;
 END
 IF @DatasetCount > 1
 BEGIN
     RAISERROR('More than 1 row returned. Please review there is 1 active Dataset for the provided Dataset Id, and the connection details.',16,1)
+    RETURN 0;
 END
 
 SELECT 
@@ -71,14 +73,17 @@ SELECT @PipelineIdResult, @DependantPipelineIdResult
 IF @PipelineIdResult IS NULL AND @DependantPipelineIdResult IS NULL
 BEGIN 
 	RAISERROR('Missing Ids for this Dataset',16,1)
+    RETURN 0;
 END
 ELSE IF @PipelineIdResult IS NULL AND @DependantPipelineIdResult IS NOT NULL
 BEGIN 
 	RAISERROR('Missing PipelineId (Raw Ingest Pipeline)',16,1)
+    RETURN 0;
 END
 ELSE IF @PipelineIdResult IS NOT NULL AND @DependantPipelineIdResult IS NULL
 BEGIN 
 	RAISERROR('Missing DependantPipelineId (Cleansed Merge Pipeline)',16,1)
+    RETURN 0;
 END
 ELSE IF @PipelineIdResult IS NOT NULL AND @DependantPipelineIdResult IS NOT NULL
 BEGIN 
@@ -93,5 +98,6 @@ END
 ELSE 
 BEGIN
 	RAISERROR('Unexpected Error',16,1)
+    RETURN 0;
 END
 
diff --git a/src/metadata.ingest/ingest/Stored Procedures/GetDatasetPayload.sql b/src/metadata.ingest/ingest/Stored Procedures/GetDatasetPayload.sql
@@ -24,6 +24,7 @@ BEGIN
     IF @ResultRowCount = 0
     BEGIN
         RAISERROR('No results returned for the provided Dataset Id. Confirm Dataset is enabled, and related Connections are enabled.',16,1)
+		RETURN 0;
     END
 
 
@@ -166,7 +167,10 @@ BEGIN
             ds.DatasetId = @DatasetId
     END
 	ELSE
+	BEGIN
 		RAISERROR('Language Type not supported.',16,1)
+		RETURN 0;
+	END
 
     IF (@LoadType = 'F')
 		BEGIN
@@ -193,6 +197,7 @@ BEGIN
 	ELSE
 		BEGIN
 			RAISERROR('Load type condition not yet supported.',16,1);
+			RETURN 0;
 		END
 
 	IF @SourceLanguageType IN ('T-SQL', 'PSQL', 'SQL')

diff --git a/src/metadata.ingest/ingest/Stored Procedures/GetMergePayload.sql b/src/metadata.ingest/ingest/Stored Procedures/GetMergePayload.sql
@@ -43,6 +43,7 @@ BEGIN
     IF @ResultRowCount = 0
     BEGIN
         RAISERROR('No results returned for the provided Dataset Id. Confirm Dataset is enabled, and related Connections and Attributes are enabled.',16,1)
+        RETURN 0;
     END
 
 
@@ -125,10 +126,15 @@ BEGIN
             DatasetId = @DatasetId
 
     ELSE IF @LoadAction = 'X'
+    BEGIN
         RAISERROR('Erroneous Load Status. Review the ingest LoadStatus value for the dataset in [ingest].[DatasetsLatestVersion]',16,1)
+        RETURN 0;
+    END
     ELSE
+    BEGIN
         RAISERROR('Unexpected Load action. Review the ingest LoadStatus value for the dataset in [ingest].[DatasetsLatestVersion]',16,1)
-
+        RETURN 0;
+    END
     SELECT 
         @DateTimeFolderHierarchy = 'year=' + CAST(FORMAT(@RawLastLoadDate,'yyyy') AS VARCHAR) + '/' + 
         'month=' + CAST(FORMAT(@RawLastLoadDate,'MM') AS VARCHAR) + '/' +