nv-morpheus · rapids-bot · Apr 20, 2023 · Apr 18, 2023 · Apr 19, 2023 · Apr 19, 2023
@@ -1642,8 +1642,12 @@ def get_anomaly_score_losses(self, df):
                     loss = self.cce(cat[i], codes[i])
                     # Convert to 2 dimensions
                     cce_loss_slice_of_each_feat.append(loss.data.reshape(-1, 1))
-                # merge the tensors into one (n_records * n_features) tensor
-                cce_loss_slice = torch.cat(cce_loss_slice_of_each_feat, dim=1)
+
+                if cce_loss_slice_of_each_feat:
+                    # merge the tensors into one (n_records * n_features) tensor
+                    cce_loss_slice = torch.cat(cce_loss_slice_of_each_feat, dim=1)
+                else:
+                    cce_loss_slice = torch.empty((len(df_slice), 0))
 
                 mse_loss_slices.append(mse_loss_slice)
                 bce_loss_slices.append(bce_loss_slice)

@@ -289,6 +289,52 @@ def test_auto_encoder_get_anomaly_score(train_ae: autoencoder.AutoEncoder, train
     assert round(anomaly_score.std().item(), 2) == 0.11
 
 
+def test_auto_encoder_get_anomaly_score_losses(train_ae: autoencoder.AutoEncoder):
+    # create a dummy DataFrame with numerical and boolean features only
+    row_cnt = 10
+    # create a dummy DataFrame with categorical features
+    data = {
+        'num_1': [i for i in range(row_cnt)],
+        'num_2': [i / 2 for i in range(row_cnt)],
+        'num_3': [i / 2 for i in range(row_cnt)],
+        'bool_1': [i % 2 == 0 for i in range(row_cnt)],
+        'bool_2': [i % 3 == 0 for i in range(row_cnt)],
+        'cat_1': [f'str_{i}' for i in range(row_cnt)]
+    }
+    df = pd.DataFrame(data)
+
+    train_ae._build_model(df)
+
+    # call the function and check the output
+    mse_loss, bce_loss, cce_loss = train_ae.get_anomaly_score_losses(df)
+
+    # check that the output is of the correct shape
+    assert mse_loss.shape == torch.Size([row_cnt, 3]), "mse_loss has incorrect shape"
+    assert bce_loss.shape == torch.Size([row_cnt, 2]), "bce_loss has incorrect shape"
+    assert cce_loss.shape == torch.Size([row_cnt, 1]), "cce_loss has incorrect shape"
+
+
+def test_auto_encoder_get_anomaly_score_losses_no_cat_feats(train_ae: autoencoder.AutoEncoder):
+    # create a dummy DataFrame with numerical and boolean features only
+    row_cnt = 10
+    data = {
+        'num_1': [i for i in range(row_cnt)],
+        'bool_1': [i % 2 == 0 for i in range(row_cnt)],
+        'bool_2': [i % 3 == 0 for i in range(row_cnt)]
+    }
+    df = pd.DataFrame(data)
+
+    train_ae._build_model(df)
+
+    # call the function and check the output
+    mse_loss, bce_loss, cce_loss = train_ae.get_anomaly_score_losses(df)
+
+    # check that the output is of the correct shape
+    assert mse_loss.shape == torch.Size([row_cnt, 1]), "mse_loss has incorrect shape"
+    assert bce_loss.shape == torch.Size([row_cnt, 2]), "bce_loss has incorrect shape"
+    assert cce_loss.shape == torch.Size([row_cnt, 0]), "cce_loss has incorrect shape"
+
+
 def test_auto_encoder_prepare_df(train_ae: autoencoder.AutoEncoder, train_df: pd.DataFrame):
     train_ae.fit(train_df, epochs=1)