Skip to content

Commit

Permalink
Merge pull request #89 from CloudFormations/develop
Browse files Browse the repository at this point in the history
Add Development changes
  • Loading branch information
MattPCollins authored Aug 12, 2024
2 parents 8996208 + 6f3c9a6 commit b8aa841
Show file tree
Hide file tree
Showing 7 changed files with 22 additions and 65 deletions.
Original file line number Diff line number Diff line change
Expand Up @@ -29,26 +29,6 @@

# COMMAND ----------

# payload = {
# "CuratedStorageAccessKey": "cumulusframeworkdevcuratedaccesskey",
# "CuratedStorageName": "cumulusframeworkdev",
# "CuratedContainerName": "curated",
# "CleansedStorageAccessKey": "cumulusframeworkdevcleansedaccesskey",
# "CleansedStorageName": "cumulusframeworkdev",
# "CleansedContainerName": "cleansed",
# "SchemaName": "Dimensions",
# "DatasetName": "GoldTable1",
# "ColumnsList": "AddressId,FullAddress",
# "ColumnTypeList": "INTEGER,STRING",
# "BkAttributesList": "AddressId",
# "PartitionByAttributesList": "",
# "SurrogateKey": "GoldTable1Id",
# "LoadType": "F",
# "BusinessLogicNotebookPath": "./businesslogicnotebooks/BespokeNotebook",
# }

# COMMAND ----------

cleansedSecret, cleansedStorageName, cleansedContainerName, curatedSecret, curatedStorageName, curatedContainerName, curatedSchemaName, curatedDatasetName, columnsList, columnTypeList, bkList, partitionList, surrogateKey, loadType, businessLogicNotebookPath = getTransformPayloadVariables(payload)

# COMMAND ----------
Expand Down
21 changes: 0 additions & 21 deletions src/azure.databricks/python/notebooks/transform/CreateFactTable.py
Original file line number Diff line number Diff line change
Expand Up @@ -27,27 +27,6 @@
import json
payload = json.loads(dbutils.widgets.get("Notebook Payload"))

# COMMAND ----------

# payload = {
# "CuratedStorageAccessKey": "cumulusframeworkdevcuratedaccesskey",
# "CuratedStorageName": "cumulusframeworkdev",
# "CuratedContainerName": "curated",
# "CleansedStorageAccessKey": "cumulusframeworkdevcleansedaccesskey",
# "CleansedStorageName": "cumulusframeworkdev",
# "CleansedContainerName": "cleansed",
# "SchemaName": "Facts",
# "DatasetName": "GoldAgg1",
# "ColumnsList": "AddressId,FullAddress",
# "ColumnTypeList": "INTEGER,STRING",
# "CuratedPkList": "AddressId",
# "CuratedPkListPartitionFields":"",
# "SurrogateKey": "GoldAgg1Id",
# "LoadType": "F",
# "BusinessLogicNotebookPath": "./businesslogicnotebooks/BespokeNotebook",
# }


# COMMAND ----------

cleansedSecret, cleansedStorageName, cleansedContainerName, curatedSecret, curatedStorageName, curatedContainerName, curatedSchemaName, curatedDatasetName, columnsList, columnTypeList, bkList, partitionList, surrogateKey, loadType, businessLogicNotebookPath = getTransformPayloadVariables(payload)
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -71,29 +71,6 @@

# COMMAND ----------

# DataFrame Validation:
# Check Columns in metadata match those in DataFrame
# Check DF result is non-zero before overwriting
# ADVISORY: Check for unadvised column types (e.g. STRING)
# ADVISORY: Check aggregations exist
# ADVISORY: Check for partitionby fields being used if data size is expected < 1TB

# COMMAND ----------

# # check schemas match
# if sourceDf.schema == targetDf.schema:
# print("Target vs Source Schema Validation Successful")
# elif sourceDf.schema != targetDf.schema:
# raise Exception("Target vs Source Schema Validation Unsuccessful")
# elif type(sourceDf) != "pyspark.sql.dataframe.DataFrame":
# raise TypeError("Error in Source DataFrame provided.")
# elif type(targetDf) != "pyspark.sql.dataframe.DataFrame":
# raise TypeError("Error in Target DataFrame provided.")
# else:
# raise Exception("Unexpected state. Please review.")

# COMMAND ----------

output = {}

# COMMAND ----------
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -16,10 +16,12 @@ WHERE StageName = @StageName
IF @StageCount = 0
BEGIN
RAISERROR('No rows returned. Please review the Stage Details provided and confirm this is enabled.',16,1)
RETURN 0;
END
IF @StageCount > 1
BEGIN
RAISERROR('Multiple rows returned. Please review the Stage Details provided.',16,1)
RETURN 0;
END


Expand All @@ -42,10 +44,12 @@ AND ds.DatasetDisplayName = @DatasetDisplayName
IF @DatasetCount = 0
BEGIN
RAISERROR('No rows returned. Please review the Dataset Id provided and confirm this is enabled.',16,1)
RETURN 0;
END
IF @DatasetCount > 1
BEGIN
RAISERROR('More than 1 row returned. Please review there is 1 active Dataset for the provided Dataset Id, and the connection details.',16,1)
RETURN 0;
END

-- Store all dataset ids associated with DatasetDisplayName
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -17,10 +17,12 @@ AND ds.DatasetDisplayName = @DatasetDisplayName
IF @DatasetCount = 0
BEGIN
RAISERROR('No rows returned. Please review the Dataset Id provided and confirm this is enabled.',16,1)
RETURN 0;
END
IF @DatasetCount > 1
BEGIN
RAISERROR('More than 1 row returned. Please review there is 1 active Dataset for the provided Dataset Id, and the connection details.',16,1)
RETURN 0;
END

SELECT
Expand Down Expand Up @@ -71,14 +73,17 @@ SELECT @PipelineIdResult, @DependantPipelineIdResult
IF @PipelineIdResult IS NULL AND @DependantPipelineIdResult IS NULL
BEGIN
RAISERROR('Missing Ids for this Dataset',16,1)
RETURN 0;
END
ELSE IF @PipelineIdResult IS NULL AND @DependantPipelineIdResult IS NOT NULL
BEGIN
RAISERROR('Missing PipelineId (Raw Ingest Pipeline)',16,1)
RETURN 0;
END
ELSE IF @PipelineIdResult IS NOT NULL AND @DependantPipelineIdResult IS NULL
BEGIN
RAISERROR('Missing DependantPipelineId (Cleansed Merge Pipeline)',16,1)
RETURN 0;
END
ELSE IF @PipelineIdResult IS NOT NULL AND @DependantPipelineIdResult IS NOT NULL
BEGIN
Expand All @@ -93,5 +98,6 @@ END
ELSE
BEGIN
RAISERROR('Unexpected Error',16,1)
RETURN 0;
END

Original file line number Diff line number Diff line change
Expand Up @@ -24,6 +24,7 @@ BEGIN
IF @ResultRowCount = 0
BEGIN
RAISERROR('No results returned for the provided Dataset Id. Confirm Dataset is enabled, and related Connections are enabled.',16,1)
RETURN 0;
END


Expand Down Expand Up @@ -166,7 +167,10 @@ BEGIN
ds.DatasetId = @DatasetId
END
ELSE
BEGIN
RAISERROR('Language Type not supported.',16,1)
RETURN 0;
END

IF (@LoadType = 'F')
BEGIN
Expand All @@ -193,6 +197,7 @@ BEGIN
ELSE
BEGIN
RAISERROR('Load type condition not yet supported.',16,1);
RETURN 0;
END

IF @SourceLanguageType IN ('T-SQL', 'PSQL', 'SQL')
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -43,6 +43,7 @@ BEGIN
IF @ResultRowCount = 0
BEGIN
RAISERROR('No results returned for the provided Dataset Id. Confirm Dataset is enabled, and related Connections and Attributes are enabled.',16,1)
RETURN 0;
END


Expand Down Expand Up @@ -125,10 +126,15 @@ BEGIN
DatasetId = @DatasetId

ELSE IF @LoadAction = 'X'
BEGIN
RAISERROR('Erroneous Load Status. Review the ingest LoadStatus value for the dataset in [ingest].[DatasetsLatestVersion]',16,1)
RETURN 0;
END
ELSE
BEGIN
RAISERROR('Unexpected Load action. Review the ingest LoadStatus value for the dataset in [ingest].[DatasetsLatestVersion]',16,1)

RETURN 0;
END
SELECT
@DateTimeFolderHierarchy = 'year=' + CAST(FORMAT(@RawLastLoadDate,'yyyy') AS VARCHAR) + '/' +
'month=' + CAST(FORMAT(@RawLastLoadDate,'MM') AS VARCHAR) + '/' +
Expand Down

0 comments on commit b8aa841

Please sign in to comment.