brickstudymasons · drcandacemakedamoore · Nov 1, 2024 · Sep 27, 2024 · Oct 15, 2024 · Oct 19, 2024
diff --git a/.gitignore b/.gitignore
@@ -92,4 +92,5 @@ conda-pkg/meta.yaml
 !tests/sample_synthetic_data/*.csv
 secret_data/*
 secret_data/
-tmp_dcm2bids/*
+tmp_dcm2bids/*
+.Rproj.user
diff --git a/brickstudy.Rproj b/brickstudy.Rproj
@@ -0,0 +1,13 @@
+Version: 1.0
+
+RestoreWorkspace: Default
+SaveWorkspace: Default
+AlwaysSaveHistory: Default
+
+EnableCodeIndexing: Yes
+UseSpacesForTab: Yes
+NumSpacesForTab: 2
+Encoding: UTF-8
+
+RnwWeave: Sweave
+LaTeX: pdfLaTeX
diff --git a/notebooks/Concatenate_volbrain_csvs_deeplesion.ipynb b/notebooks/Concatenate_volbrain_csvs_deeplesion.ipynb
@@ -30,7 +30,8 @@
     "import zipfile\n",
     "import os\n",
     "from zipfile import ZipFile\n",
-    "import glob"
+    "import glob\n",
+    "from io import BytesIO"
    ]
   },
   {
@@ -41,15 +42,19 @@
    },
    "outputs": [],
    "source": [
-    "#navigate to zipfolders and intended folder for output\n",
-    "path_volbrain = 'Z:/VolBrain'\n",
-    "output_folder = \"Z:/VolBrain/Separate_CSV_Deeplesion\"\n"
+    "#navigate to zipfolders and intended folder for output (change to commented out if on Windows)\n",
+    "# path_volbrain = 'Z:/processed_data/VolBrain'\n",
+    "# output_folder = \"Z:/processed_data/VolBrain/Separate_CSV_Deeplesion\"\n",
+    "path_volbrain = '/mnt/data/processed_data/VolBrain'\n",
+    "output_folder = \"/mnt/data/processed_data/VolBrain/Separate_CSV_Deeplesion\""
    ]
   },
   {
    "cell_type": "code",
    "execution_count": null,
-   "metadata": {},
+   "metadata": {
+    "tags": []
+   },
    "outputs": [],
    "source": [
     "#create a list to store the CSV headers in\n",
@@ -65,8 +70,6 @@
    "outputs": [],
    "source": [
     "dataframes = []\n",
-    "from io import BytesIO\n",
-    "\n",
     "\n",
     "for zip_file in glob.glob(os.path.join(path_volbrain, '*.zip')):\n",
     "    # split by underscores\n",
@@ -79,7 +82,7 @@
     "            if entry_name.endswith(\".csv\"):\n",
     "                entry = zf.read(entry_name)\n",
     "                df = pd.read_csv(BytesIO(entry), sep=';')\n",
-    "                df['Paricipant Id'] = participant_id\n",
+    "                df['Participant Id'] = participant_id\n",
     "                dataframes.append(df)"
    ]
   },
@@ -103,7 +106,8 @@
    },
    "outputs": [],
    "source": [
-    "concat_df.to_csv('../secret_data/volbrains.csv')"
+    "#move the participant id row to the front. Now it's last. This is important for upload in castor.\n",
+    "concat_df = concat_df.loc[:, [concat_df.columns[-1]] + list(concat_df.columns[:-1])]\n"
    ]
   },
   {
@@ -113,14 +117,58 @@
     "tags": []
    },
    "outputs": [],
-   "source": []
+   "source": [
+    "concat_df"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "metadata": {
+    "tags": []
+   },
+   "outputs": [],
+   "source": [
+    "#Now make the column names castor appropriate. Do not change the \"Participant Id\" column of course. dl stands for DeepLesion algorithm\n",
+    "concat_df.columns = [\n",
+    "    col if col == \"Participant Id\" else \"dl_\" + col.replace(\" \", \"_\") + \"_T0\"\n",
+    "    for col in concat_df.columns\n",
+    "]\n",
+    "\n",
+    "#For castor, male is 1 and female is 2. Replace these values\n",
+    "concat_df[\"dl_Sex_T0\"] = concat_df[\"dl_Sex_T0\"].replace({\"Female\": 2, \"Male\": 1})\n"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "metadata": {
+    "tags": []
+   },
+   "outputs": [],
+   "source": [
+    "concat_df"
+   ]
   },
   {
    "cell_type": "code",
    "execution_count": null,
    "metadata": {},
    "outputs": [],
    "source": []
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "metadata": {
+    "tags": []
+   },
+   "outputs": [],
+   "source": [
+    "# concat_df.to_csv('Z:/castor_proof_files/volbrains_castor.csv')\n",
+    "#(change to commented out if on Windows)\n",
+    "concat_df.to_csv('/mnt/data/castor_proof_files/volbrains_castor.csv')"
+   ]
   }
  ],
  "metadata": {
@@ -139,7 +187,7 @@
    "name": "python",
    "nbconvert_exporter": "python",
    "pygments_lexer": "ipython3",
-   "version": "3.11.8"
+   "version": "3.11.9"
   }
  },
  "nbformat": 4,

diff --git a/notebooks/experi/.~WISC_convertR.ipynb b/notebooks/experi/.~WISC_convertR.ipynb
@@ -0,0 +1,204 @@
+{
+ "cells": [
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "id": "0",
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "#Laad de benodigde libraries\n",
+    "library(readxl)             # Load the package into your R session\n",
+    "library(writexl)\n",
+    "library(dplyr)\n",
+    " #Importeer excel file van export gemstracker WISCV\n",
+    " # Replace \"data.xlsx\" with the actual file name and path if it's located in a different directory\n",
+    " WISCV_gemstracker <- read.csv(\"../../secret_data/WISC_V_BRICK_T0dd08042024.csv\", sep=\";\")\n",
+    " \n",
+    " #show column names of the new df\n",
+    " print(colnames(WISCV_gemstracker))\n",
+    "\n",
+    " #verander de column names van gemstracker naar castor, voor Baseline.\n",
+    "\n",
+    "#head(WISCV_gemstracker)\n",
+    "\n",
+    " # Change column names\n",
+    " colnames(WISCV_gemstracker)[colnames(WISCV_gemstracker) == \"gr2o_patient_nr\"] <- \"Participant Id\"\n",
+    " colnames(WISCV_gemstracker)[colnames(WISCV_gemstracker) == \"DatumWISCV\"] <- \"Datum_WISC_V\"\n",
+    " colnames(WISCV_gemstracker)[colnames(WISCV_gemstracker) == \"StartWISCV\"] <- \"Start_WISC_V\"\n",
+    " colnames(WISCV_gemstracker)[colnames(WISCV_gemstracker) == \"StopWISCV\"] <- \"Stop_WISC_V\"\n",
+    " colnames(WISCV_gemstracker)[colnames(WISCV_gemstracker) == \"WISCVVolt\"] <- \"WISC_V_voltooid\"\n",
+    " colnames(WISCV_gemstracker)[colnames(WISCV_gemstracker) == \"VolgordeWISC\"] <- \"Volgorde_NPO_3\"\n",
+    " colnames(WISCV_gemstracker)[colnames(WISCV_gemstracker) == \"AfnemerWISCV\"] <- \"Afnemer_WISC_V\"\n",
+    " colnames(WISCV_gemstracker)[colnames(WISCV_gemstracker) == \"WISCVOpm\"] <- \"Opmerkingen_WISC_V\"\n",
+    " colnames(WISCV_gemstracker)[colnames(WISCV_gemstracker) == \"WISCVOpmUit\"] <- \"Uitleg_Opmerkingen_WISC_V\"\n",
+    " \n",
+    " \n",
+    "\n",
+    "#head(WISCV_gemstracker)\n",
+    "\n",
+    " #voeg extra kolom toe om alleen de verplichte BRICK-waarden te exporteren naar castor en niet de hele WISCV. Als je toch alle velden wil invullen, kan\n",
+    " #je of alle velden een \"o\" geven, of dit handmatig wijzigen per participant, in de excel die op het einde gegenereerd wordt\n",
+    "\n",
+    " WISCV_gemstracker$BRICK_of_uitgebreid <- 1\n",
+    " \n",
+    " #De volgende kolommen die wel in de Castor-export van de WICV staan, voegen we niet toe: Participant status, site abbreviation en participation creation date. \n",
+    " #Dit zal niet zorgen voor problemen, zolang de participanten al vóór de Gemstracker-import zijn aangemaakt in Castor. We gaan niet via deze weg nieuwe patienten importeren. Dit kan wel, maar dan heb je wel deze kolommen nodig.\n",
+    "\n",
+    " #Waarden in de kolommen aanpassen op Castor Format\n",
+    " \n",
+    " #1.Te beginnen met de datum:\n",
+    " \n",
+    " # Convert the column \"Datum_WISC_V\" to Date format\n",
+    " WISCV_gemstracker$Datum_WISC_V <- as.Date(WISCV_gemstracker$Datum_WISC_V, format = \"%Y-%m-%d\")\n",
+    " \n",
+    " # Change the date format to \"02-08-2023\" in the same column\n",
+    " WISCV_gemstracker$Datum_WISC_V <- format(WISCV_gemstracker$Datum_WISC_V, \"%d-%m-%Y\")\n",
+    " \n",
+    " \n",
+    "\n",
+    "#head(WISCV_gemstracker)\n",
+    "\n",
+    " #2.Hierbij veranderen we de afnemer meerkeuze kolom naar een kolom met 1 waarde in de Castor dataframe (\"Afnemer_WISC_V_W\")\n",
+    " # Let op! Dit moet aangepast worden als er meer afnemers bij komen, maar dan verandert de 6 in een 7 etc. Goed opletten als de labelsets worden aangepast in Castor en LS!\n",
+    " \n",
+    " # Convert \"AfnemerWISCV_SQ000\" to \"AfnemerWISCV_SQ006\" columns to numeric values\n",
+    " # Convert \"AfnemerWISCV_SQ000\" to \"AfnemerWISCV_SQ006\" columns to numeric values\n",
+    " for (i in 0:6) {\n",
+    "   column_name <- paste(\"AfnemerWISCV_SQ00\", i, sep = \"\")\n",
+    "   WISCV_gemstracker$Afnemer_WISC_V[WISCV_gemstracker[column_name] == \"Y\"] <- i\n",
+    " }\n",
+    "\n",
+    "#sapply(WISCV_gemstracker, class)\n",
+    "\n",
+    "#head(WISCV_gemstracker)\n",
+    "\n",
+    " \n",
+    " #3. Hier veranderen we de waarden in de volgorde_NPO kolom voor in Castor\n",
+    " \n",
+    " # Convert \"VolgordeWISC_SQ001\" to \"VolgordeWISC_SQ004\" columns to numeric values\n",
+    " for (i in 1:4) {\n",
+    "   col_name <- paste(\"VolgordeWISC_SQ00\", i, sep = \"\")\n",
+    "   WISCV_gemstracker$Volgorde_NPO_3[WISCV_gemstracker[, col_name] == \"Y\"] <- i\n",
+    " }\n",
+    " \n",
+    "\n",
+    " #4. Alle waarden onder kolom: Opmerkingen_WISC_V en WISC_V_voltooid gaan van Y naar 1 en van N naar 0\n",
+    " \n",
+    " # Convert \"Y\" to 1 and \"N\" to 0 in the \"Opmerkingen_WISC_V\" column\n",
+    " WISCV_gemstracker$Opmerkingen_WISC_V[WISCV_gemstracker$Opmerkingen_WISC_V == \"Y\"] <- 1\n",
+    " WISCV_gemstracker$Opmerkingen_WISC_V[WISCV_gemstracker$Opmerkingen_WISC_V == \"N\"] <- 0\n",
+    " \n",
+    "\n",
+    " WISCV_gemstracker$WISC_V_voltooid[WISCV_gemstracker$WISC_V_voltooid == \"Y\"] <- 1\n",
+    " WISCV_gemstracker$WISC_V_voltooid[WISCV_gemstracker$WISC_V_voltooid == \"N\"] <- 0\n",
+    " \n",
+    "\n",
+    " ## Nog te testen: Haal het uur en de minuten uit twee afzonderlijke kolommen en zet ze samen in de start kolom.\n",
+    " # Convert numeric columns to characters\n",
+    " WISCV_gemstracker$StartWISCV_SQ001 <- as.character(WISCV_gemstracker$StartWISCV_SQ001)\n",
+    " WISCV_gemstracker$StartWISCV_SQ002 <- as.character(WISCV_gemstracker$StartWISCV_SQ002)\n",
+    " \n",
+    "\n",
+    " # Create Start_WISC_V column\n",
+    " WISCV_gemstracker$Start_WISC_V <- paste(WISCV_gemstracker$StartWISCV_SQ001, WISCV_gemstracker$StartWISCV_SQ002, sep = \":\")\n",
+    " \n",
+    " # Repeat the same process for Stop columns\n",
+    " WISCV_gemstracker$StopWISCV_SQ001 <- as.character(WISCV_gemstracker$StopWISCV_SQ001)\n",
+    " WISCV_gemstracker$StopWISCV_SQ002 <- as.character(WISCV_gemstracker$StopWISCV_SQ002)\n",
+    " \n",
+    "\n",
+    " # Create Stop_WISC_V column\n",
+    " WISCV_gemstracker$Stop_WISC_V <- paste(WISCV_gemstracker$StopWISCV_SQ001, WISCV_gemstracker$StopWISCV_SQ002, sep = \":\")\n",
+    "\n",
+    " #Volgorde kolommen aanpassen\n",
+    " WISCV_gemstracker <- WISCV_gemstracker %>%\n",
+    "   select(\"Participant Id\", \"BRICK_of_uitgebreid\", \"Datum_WISC_V\", \"Start_WISC_V\", \n",
+    "          \"Stop_WISC_V\", \"Volgorde_NPO_3\", \"WISC_V_voltooid\", \n",
+    "          \"Opmerkingen_WISC_V\", \"Uitleg_Opmerkingen_WISC_V\", everything())\n",
+    " \n",
+    " \n",
+    "\n",
+    "#length(colnames(WISCV_gemstracker))\n",
+    "\n",
+    " #Elke field name is uniek. In de baseline meting, hebben bijn alle velden in Castor een _1. In FU1 en FU2 zal dit zeker _2 en _3 worden\n",
+    " #behalve participant id en \"Volgorde_NPO_3\", moeten alle kolommen eraan geloven.\n",
+    "\n",
+    " # # Create a list to hold the new column names\n",
+    " # new_column_names <- vector(\"character\", length(names(WISCV_gemstracker)))\n",
+    " \n",
+    " #Delete de kolommen uit de gemstracker export die je niet nodig hebt (dplyr)\n",
+    "\n",
+    " # List of columns to be removed\n",
+    " columns_to_remove <- c(\"respondentid\", \"organizationid\", \"gto_id_relation\", \"forgroup\", \n",
+    "                        \"consentcode\", \"resptrackid\", \"gto_round_order\", \"gto_round_description\", \n",
+    "                        \"gtr_track_name\", \"gr2t_track_info\", \"gto_completion_time\", \"gto_start_time\", \n",
+    "                        \"gto_valid_from\", \"gto_valid_until\", \"startlanguage\", \"lastpage\", \n",
+    "                        \"gto_id_token\", \"surveyversion\", \"AfnemerWISCV_SQ000\", \"AfnemerWISCV_SQ001\", \n",
+    "                        \"AfnemerWISCV_SQ002\", \"AfnemerWISCV_SQ003\", \"AfnemerWISCV_SQ004\", \n",
+    "                        \"AfnemerWISCV_SQ005\", \"AfnemerWISCV_SQ006\", \n",
+    "                        \"VolgordeWISC_SQ001\", \"VolgordeWISC_SQ002\", \"VolgordeWISC_SQ003\", \n",
+    "                        \"VolgordeWISC_SQ004\", \"Sub\", \"StartWISCV_SQ001\", \"StartWISCV_SQ002\", \"StopWISCV_SQ001\", \"StopWISCV_SQ002\")\n",
+    " \n",
+    " # Remove the specified columns\n",
+    " WISCV_gemstracker <- WISCV_gemstracker %>%\n",
+    "   select(-one_of(columns_to_remove))\n",
+    "\n",
+    "# Create a list to hold the new column names\n",
+    "new_column_names <- vector(\"character\", length(names(WISCV_gemstracker)))\n",
+    " \n",
+    " \n",
+    " # Iterate through each column name\n",
+    " for (i in seq_along(names(WISCV_gemstracker))) {\n",
+    "   if (names(WISCV_gemstracker)[i] != \"Participant Id\" && names(WISCV_gemstracker)[i] != \"Volgorde_NPO_3\") {\n",
+    "     new_column_names[i] <- paste(names(WISCV_gemstracker)[i], \"_1\", sep = \"\")\n",
+    "   } else {\n",
+    "     new_column_names[i] <- names(WISCV_gemstracker)[i]\n",
+    "   }\n",
+    " }\n",
+    " \n",
+    " # Assign the new column names to the dataframe\n",
+    " names(WISCV_gemstracker) <- new_column_names\n",
+    " \n",
+    " \n",
+    " \n",
+    " # Print the updated column names\n",
+    " print(names(WISCV_gemstracker))\n",
+    " \n",
+    " #exporteer nieuwe df naar excel file\n",
+    " write_xlsx(WISCV_gemstracker, path = \"WISCV_gemstracker_poging_makeda.xlsx\")\n",
+    " \n",
+    " # Export to CSV\n",
+    " write.csv(WISCV_gemstracker, file = \"WISCV_gemstracker_poging_makeda_csv.csv\", row.names = FALSE)\n",
+    " \n",
+    " \n",
+    " "
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "id": "1",
+   "metadata": {},
+   "outputs": [],
+   "source": []
+  }
+ ],
+ "metadata": {
+  "kernelspec": {
+   "display_name": "R",
+   "language": "R",
+   "name": "ir"
+  },
+  "language_info": {
+   "codemirror_mode": "r",
+   "file_extension": ".r",
+   "mimetype": "text/x-r-source",
+   "name": "R",
+   "pygments_lexer": "r",
+   "version": "4.1.3"
+  }
+ },
+ "nbformat": 4,
+ "nbformat_minor": 5
+}