diff --git a/.gitignore b/.gitignore index d4a4324..0eba20f 100644 --- a/.gitignore +++ b/.gitignore @@ -92,4 +92,5 @@ conda-pkg/meta.yaml !tests/sample_synthetic_data/*.csv secret_data/* secret_data/ -tmp_dcm2bids/* \ No newline at end of file +tmp_dcm2bids/* +.Rproj.user diff --git a/brickstudy.Rproj b/brickstudy.Rproj new file mode 100644 index 0000000..8e3c2eb --- /dev/null +++ b/brickstudy.Rproj @@ -0,0 +1,13 @@ +Version: 1.0 + +RestoreWorkspace: Default +SaveWorkspace: Default +AlwaysSaveHistory: Default + +EnableCodeIndexing: Yes +UseSpacesForTab: Yes +NumSpacesForTab: 2 +Encoding: UTF-8 + +RnwWeave: Sweave +LaTeX: pdfLaTeX diff --git a/notebooks/Concatenate_volbrain_csvs_deeplesion.ipynb b/notebooks/Concatenate_volbrain_csvs_deeplesion.ipynb index c48c4f6..974d6e4 100644 --- a/notebooks/Concatenate_volbrain_csvs_deeplesion.ipynb +++ b/notebooks/Concatenate_volbrain_csvs_deeplesion.ipynb @@ -30,7 +30,8 @@ "import zipfile\n", "import os\n", "from zipfile import ZipFile\n", - "import glob" + "import glob\n", + "from io import BytesIO" ] }, { @@ -41,15 +42,19 @@ }, "outputs": [], "source": [ - "#navigate to zipfolders and intended folder for output\n", - "path_volbrain = 'Z:/VolBrain'\n", - "output_folder = \"Z:/VolBrain/Separate_CSV_Deeplesion\"\n" + "#navigate to zipfolders and intended folder for output (change to commented out if on Windows)\n", + "# path_volbrain = 'Z:/processed_data/VolBrain'\n", + "# output_folder = \"Z:/processed_data/VolBrain/Separate_CSV_Deeplesion\"\n", + "path_volbrain = '/mnt/data/processed_data/VolBrain'\n", + "output_folder = \"/mnt/data/processed_data/VolBrain/Separate_CSV_Deeplesion\"" ] }, { "cell_type": "code", "execution_count": null, - "metadata": {}, + "metadata": { + "tags": [] + }, "outputs": [], "source": [ "#create a list to store the CSV headers in\n", @@ -65,8 +70,6 @@ "outputs": [], "source": [ "dataframes = []\n", - "from io import BytesIO\n", - "\n", "\n", "for zip_file in glob.glob(os.path.join(path_volbrain, '*.zip')):\n", " # split by underscores\n", @@ -79,7 +82,7 @@ " if entry_name.endswith(\".csv\"):\n", " entry = zf.read(entry_name)\n", " df = pd.read_csv(BytesIO(entry), sep=';')\n", - " df['Paricipant Id'] = participant_id\n", + " df['Participant Id'] = participant_id\n", " dataframes.append(df)" ] }, @@ -103,7 +106,8 @@ }, "outputs": [], "source": [ - "concat_df.to_csv('../secret_data/volbrains.csv')" + "#move the participant id row to the front. Now it's last. This is important for upload in castor.\n", + "concat_df = concat_df.loc[:, [concat_df.columns[-1]] + list(concat_df.columns[:-1])]\n" ] }, { @@ -113,7 +117,38 @@ "tags": [] }, "outputs": [], - "source": [] + "source": [ + "concat_df" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": { + "tags": [] + }, + "outputs": [], + "source": [ + "#Now make the column names castor appropriate. Do not change the \"Participant Id\" column of course. dl stands for DeepLesion algorithm\n", + "concat_df.columns = [\n", + " col if col == \"Participant Id\" else \"dl_\" + col.replace(\" \", \"_\") + \"_T0\"\n", + " for col in concat_df.columns\n", + "]\n", + "\n", + "#For castor, male is 1 and female is 2. Replace these values\n", + "concat_df[\"dl_Sex_T0\"] = concat_df[\"dl_Sex_T0\"].replace({\"Female\": 2, \"Male\": 1})\n" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": { + "tags": [] + }, + "outputs": [], + "source": [ + "concat_df" + ] }, { "cell_type": "code", @@ -121,6 +156,19 @@ "metadata": {}, "outputs": [], "source": [] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": { + "tags": [] + }, + "outputs": [], + "source": [ + "# concat_df.to_csv('Z:/castor_proof_files/volbrains_castor.csv')\n", + "#(change to commented out if on Windows)\n", + "concat_df.to_csv('/mnt/data/castor_proof_files/volbrains_castor.csv')" + ] } ], "metadata": { @@ -139,7 +187,7 @@ "name": "python", "nbconvert_exporter": "python", "pygments_lexer": "ipython3", - "version": "3.11.8" + "version": "3.11.9" } }, "nbformat": 4, diff --git a/notebooks/experi/.~WISC_convertR.ipynb b/notebooks/experi/.~WISC_convertR.ipynb new file mode 100644 index 0000000..1e1cbda --- /dev/null +++ b/notebooks/experi/.~WISC_convertR.ipynb @@ -0,0 +1,204 @@ +{ + "cells": [ + { + "cell_type": "code", + "execution_count": null, + "id": "0", + "metadata": {}, + "outputs": [], + "source": [ + "#Laad de benodigde libraries\n", + "library(readxl) # Load the package into your R session\n", + "library(writexl)\n", + "library(dplyr)\n", + " #Importeer excel file van export gemstracker WISCV\n", + " # Replace \"data.xlsx\" with the actual file name and path if it's located in a different directory\n", + " WISCV_gemstracker <- read.csv(\"../../secret_data/WISC_V_BRICK_T0dd08042024.csv\", sep=\";\")\n", + " \n", + " #show column names of the new df\n", + " print(colnames(WISCV_gemstracker))\n", + "\n", + " #verander de column names van gemstracker naar castor, voor Baseline.\n", + "\n", + "#head(WISCV_gemstracker)\n", + "\n", + " # Change column names\n", + " colnames(WISCV_gemstracker)[colnames(WISCV_gemstracker) == \"gr2o_patient_nr\"] <- \"Participant Id\"\n", + " colnames(WISCV_gemstracker)[colnames(WISCV_gemstracker) == \"DatumWISCV\"] <- \"Datum_WISC_V\"\n", + " colnames(WISCV_gemstracker)[colnames(WISCV_gemstracker) == \"StartWISCV\"] <- \"Start_WISC_V\"\n", + " colnames(WISCV_gemstracker)[colnames(WISCV_gemstracker) == \"StopWISCV\"] <- \"Stop_WISC_V\"\n", + " colnames(WISCV_gemstracker)[colnames(WISCV_gemstracker) == \"WISCVVolt\"] <- \"WISC_V_voltooid\"\n", + " colnames(WISCV_gemstracker)[colnames(WISCV_gemstracker) == \"VolgordeWISC\"] <- \"Volgorde_NPO_3\"\n", + " colnames(WISCV_gemstracker)[colnames(WISCV_gemstracker) == \"AfnemerWISCV\"] <- \"Afnemer_WISC_V\"\n", + " colnames(WISCV_gemstracker)[colnames(WISCV_gemstracker) == \"WISCVOpm\"] <- \"Opmerkingen_WISC_V\"\n", + " colnames(WISCV_gemstracker)[colnames(WISCV_gemstracker) == \"WISCVOpmUit\"] <- \"Uitleg_Opmerkingen_WISC_V\"\n", + " \n", + " \n", + "\n", + "#head(WISCV_gemstracker)\n", + "\n", + " #voeg extra kolom toe om alleen de verplichte BRICK-waarden te exporteren naar castor en niet de hele WISCV. Als je toch alle velden wil invullen, kan\n", + " #je of alle velden een \"o\" geven, of dit handmatig wijzigen per participant, in de excel die op het einde gegenereerd wordt\n", + "\n", + " WISCV_gemstracker$BRICK_of_uitgebreid <- 1\n", + " \n", + " #De volgende kolommen die wel in de Castor-export van de WICV staan, voegen we niet toe: Participant status, site abbreviation en participation creation date. \n", + " #Dit zal niet zorgen voor problemen, zolang de participanten al vóór de Gemstracker-import zijn aangemaakt in Castor. We gaan niet via deze weg nieuwe patienten importeren. Dit kan wel, maar dan heb je wel deze kolommen nodig.\n", + "\n", + " #Waarden in de kolommen aanpassen op Castor Format\n", + " \n", + " #1.Te beginnen met de datum:\n", + " \n", + " # Convert the column \"Datum_WISC_V\" to Date format\n", + " WISCV_gemstracker$Datum_WISC_V <- as.Date(WISCV_gemstracker$Datum_WISC_V, format = \"%Y-%m-%d\")\n", + " \n", + " # Change the date format to \"02-08-2023\" in the same column\n", + " WISCV_gemstracker$Datum_WISC_V <- format(WISCV_gemstracker$Datum_WISC_V, \"%d-%m-%Y\")\n", + " \n", + " \n", + "\n", + "#head(WISCV_gemstracker)\n", + "\n", + " #2.Hierbij veranderen we de afnemer meerkeuze kolom naar een kolom met 1 waarde in de Castor dataframe (\"Afnemer_WISC_V_W\")\n", + " # Let op! Dit moet aangepast worden als er meer afnemers bij komen, maar dan verandert de 6 in een 7 etc. Goed opletten als de labelsets worden aangepast in Castor en LS!\n", + " \n", + " # Convert \"AfnemerWISCV_SQ000\" to \"AfnemerWISCV_SQ006\" columns to numeric values\n", + " # Convert \"AfnemerWISCV_SQ000\" to \"AfnemerWISCV_SQ006\" columns to numeric values\n", + " for (i in 0:6) {\n", + " column_name <- paste(\"AfnemerWISCV_SQ00\", i, sep = \"\")\n", + " WISCV_gemstracker$Afnemer_WISC_V[WISCV_gemstracker[column_name] == \"Y\"] <- i\n", + " }\n", + "\n", + "#sapply(WISCV_gemstracker, class)\n", + "\n", + "#head(WISCV_gemstracker)\n", + "\n", + " \n", + " #3. Hier veranderen we de waarden in de volgorde_NPO kolom voor in Castor\n", + " \n", + " # Convert \"VolgordeWISC_SQ001\" to \"VolgordeWISC_SQ004\" columns to numeric values\n", + " for (i in 1:4) {\n", + " col_name <- paste(\"VolgordeWISC_SQ00\", i, sep = \"\")\n", + " WISCV_gemstracker$Volgorde_NPO_3[WISCV_gemstracker[, col_name] == \"Y\"] <- i\n", + " }\n", + " \n", + "\n", + " #4. Alle waarden onder kolom: Opmerkingen_WISC_V en WISC_V_voltooid gaan van Y naar 1 en van N naar 0\n", + " \n", + " # Convert \"Y\" to 1 and \"N\" to 0 in the \"Opmerkingen_WISC_V\" column\n", + " WISCV_gemstracker$Opmerkingen_WISC_V[WISCV_gemstracker$Opmerkingen_WISC_V == \"Y\"] <- 1\n", + " WISCV_gemstracker$Opmerkingen_WISC_V[WISCV_gemstracker$Opmerkingen_WISC_V == \"N\"] <- 0\n", + " \n", + "\n", + " WISCV_gemstracker$WISC_V_voltooid[WISCV_gemstracker$WISC_V_voltooid == \"Y\"] <- 1\n", + " WISCV_gemstracker$WISC_V_voltooid[WISCV_gemstracker$WISC_V_voltooid == \"N\"] <- 0\n", + " \n", + "\n", + " ## Nog te testen: Haal het uur en de minuten uit twee afzonderlijke kolommen en zet ze samen in de start kolom.\n", + " # Convert numeric columns to characters\n", + " WISCV_gemstracker$StartWISCV_SQ001 <- as.character(WISCV_gemstracker$StartWISCV_SQ001)\n", + " WISCV_gemstracker$StartWISCV_SQ002 <- as.character(WISCV_gemstracker$StartWISCV_SQ002)\n", + " \n", + "\n", + " # Create Start_WISC_V column\n", + " WISCV_gemstracker$Start_WISC_V <- paste(WISCV_gemstracker$StartWISCV_SQ001, WISCV_gemstracker$StartWISCV_SQ002, sep = \":\")\n", + " \n", + " # Repeat the same process for Stop columns\n", + " WISCV_gemstracker$StopWISCV_SQ001 <- as.character(WISCV_gemstracker$StopWISCV_SQ001)\n", + " WISCV_gemstracker$StopWISCV_SQ002 <- as.character(WISCV_gemstracker$StopWISCV_SQ002)\n", + " \n", + "\n", + " # Create Stop_WISC_V column\n", + " WISCV_gemstracker$Stop_WISC_V <- paste(WISCV_gemstracker$StopWISCV_SQ001, WISCV_gemstracker$StopWISCV_SQ002, sep = \":\")\n", + "\n", + " #Volgorde kolommen aanpassen\n", + " WISCV_gemstracker <- WISCV_gemstracker %>%\n", + " select(\"Participant Id\", \"BRICK_of_uitgebreid\", \"Datum_WISC_V\", \"Start_WISC_V\", \n", + " \"Stop_WISC_V\", \"Volgorde_NPO_3\", \"WISC_V_voltooid\", \n", + " \"Opmerkingen_WISC_V\", \"Uitleg_Opmerkingen_WISC_V\", everything())\n", + " \n", + " \n", + "\n", + "#length(colnames(WISCV_gemstracker))\n", + "\n", + " #Elke field name is uniek. In de baseline meting, hebben bijn alle velden in Castor een _1. In FU1 en FU2 zal dit zeker _2 en _3 worden\n", + " #behalve participant id en \"Volgorde_NPO_3\", moeten alle kolommen eraan geloven.\n", + "\n", + " # # Create a list to hold the new column names\n", + " # new_column_names <- vector(\"character\", length(names(WISCV_gemstracker)))\n", + " \n", + " #Delete de kolommen uit de gemstracker export die je niet nodig hebt (dplyr)\n", + "\n", + " # List of columns to be removed\n", + " columns_to_remove <- c(\"respondentid\", \"organizationid\", \"gto_id_relation\", \"forgroup\", \n", + " \"consentcode\", \"resptrackid\", \"gto_round_order\", \"gto_round_description\", \n", + " \"gtr_track_name\", \"gr2t_track_info\", \"gto_completion_time\", \"gto_start_time\", \n", + " \"gto_valid_from\", \"gto_valid_until\", \"startlanguage\", \"lastpage\", \n", + " \"gto_id_token\", \"surveyversion\", \"AfnemerWISCV_SQ000\", \"AfnemerWISCV_SQ001\", \n", + " \"AfnemerWISCV_SQ002\", \"AfnemerWISCV_SQ003\", \"AfnemerWISCV_SQ004\", \n", + " \"AfnemerWISCV_SQ005\", \"AfnemerWISCV_SQ006\", \n", + " \"VolgordeWISC_SQ001\", \"VolgordeWISC_SQ002\", \"VolgordeWISC_SQ003\", \n", + " \"VolgordeWISC_SQ004\", \"Sub\", \"StartWISCV_SQ001\", \"StartWISCV_SQ002\", \"StopWISCV_SQ001\", \"StopWISCV_SQ002\")\n", + " \n", + " # Remove the specified columns\n", + " WISCV_gemstracker <- WISCV_gemstracker %>%\n", + " select(-one_of(columns_to_remove))\n", + "\n", + "# Create a list to hold the new column names\n", + "new_column_names <- vector(\"character\", length(names(WISCV_gemstracker)))\n", + " \n", + " \n", + " # Iterate through each column name\n", + " for (i in seq_along(names(WISCV_gemstracker))) {\n", + " if (names(WISCV_gemstracker)[i] != \"Participant Id\" && names(WISCV_gemstracker)[i] != \"Volgorde_NPO_3\") {\n", + " new_column_names[i] <- paste(names(WISCV_gemstracker)[i], \"_1\", sep = \"\")\n", + " } else {\n", + " new_column_names[i] <- names(WISCV_gemstracker)[i]\n", + " }\n", + " }\n", + " \n", + " # Assign the new column names to the dataframe\n", + " names(WISCV_gemstracker) <- new_column_names\n", + " \n", + " \n", + " \n", + " # Print the updated column names\n", + " print(names(WISCV_gemstracker))\n", + " \n", + " #exporteer nieuwe df naar excel file\n", + " write_xlsx(WISCV_gemstracker, path = \"WISCV_gemstracker_poging_makeda.xlsx\")\n", + " \n", + " # Export to CSV\n", + " write.csv(WISCV_gemstracker, file = \"WISCV_gemstracker_poging_makeda_csv.csv\", row.names = FALSE)\n", + " \n", + " \n", + " " + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "1", + "metadata": {}, + "outputs": [], + "source": [] + } + ], + "metadata": { + "kernelspec": { + "display_name": "R", + "language": "R", + "name": "ir" + }, + "language_info": { + "codemirror_mode": "r", + "file_extension": ".r", + "mimetype": "text/x-r-source", + "name": "R", + "pygments_lexer": "r", + "version": "4.1.3" + } + }, + "nbformat": 4, + "nbformat_minor": 5 +} diff --git a/notebooks/experi/.~tbe.ipynb b/notebooks/experi/.~tbe.ipynb new file mode 100644 index 0000000..b59e1e4 --- /dev/null +++ b/notebooks/experi/.~tbe.ipynb @@ -0,0 +1,801 @@ +{ + "cells": [ + { + "cell_type": "markdown", + "id": "0", + "metadata": {}, + "source": [ + "# Instructions for Freesurfer" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "2", + "metadata": { + "tags": [] + }, + "outputs": [], + "source": [ + "import os \n", + "import shutil\n", + "import glob" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "f4636ad3-773b-4c76-9eb1-2dd6d45877cc", + "metadata": { + "tags": [] + }, + "outputs": [], + "source": [ + "import pandas as pd\n", + "import numpy as np" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "0e72dd35-bd1d-4ba1-9e95-1be3b6b139fd", + "metadata": { + "tags": [] + }, + "outputs": [], + "source": [ + "import matplotlib.pyplot as plt\n", + "import seaborn as sns" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "a7096d2a-302e-4a3f-8eff-044557c6c614", + "metadata": { + "tags": [] + }, + "outputs": [], + "source": [ + "stats1 = pd.read_csv('../../secret_data/aseg_stats.txt', sep ='\\t')\n", + "stats1" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "e59403cb-eb4c-4fb8-b502-0f123545d9da", + "metadata": { + "tags": [] + }, + "outputs": [], + "source": [ + "stats2 = pd.read_csv('Z:/rr2/all/aseg_stats.txt', sep ='\\t')\n", + "stats2" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "8de3524c-0023-40bd-8734-54f9f94ab461", + "metadata": { + "tags": [] + }, + "outputs": [], + "source": [ + "total_stats = pd.concat([stats2, stats1])" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "6e8305e6-6578-4a9e-97f2-6ef148f21882", + "metadata": { + "tags": [] + }, + "outputs": [], + "source": [ + "#total_stats.to_csv('../../secret_data/brain_volumes_from_freesurfer_no_qc.csv')" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "3265aaf8-3a7d-49bb-8bc9-671046862f51", + "metadata": { + "tags": [] + }, + "outputs": [], + "source": [ + "total_stats.columns" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "e2aa660f-bcca-45d2-ac11-869e45554171", + "metadata": { + "tags": [] + }, + "outputs": [], + "source": [ + "total_stats = total_stats.rename(columns= {'Measure:volume':'Participant ID'})\n", + "total_stats['Participant ID'] = total_stats['Participant ID'].str.replace('/', '')" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "e58bc675-159d-4d2b-94f9-32ace51d6d4e", + "metadata": { + "tags": [] + }, + "outputs": [], + "source": [ + "total_stats.head(4)" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "b9a97c11-a970-4a4a-9bb5-024ffe47d540", + "metadata": { + "tags": [] + }, + "outputs": [], + "source": [ + "total_stats.to_csv('../../secret_data/brain_volumes_from_freesurfer_no_qc.csv')" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "0f436652-1422-4cee-bfb4-601f3865f392", + "metadata": {}, + "outputs": [], + "source": [ + "#total" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "38f32f47-2b98-4b48-8780-6642b63790fd", + "metadata": { + "tags": [] + }, + "outputs": [], + "source": [ + "total_stats_numeric = total_stats.drop(['Participant ID'], axis=1)\n", + "total_stats_numeric.corr() \n", + "#.corr()" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "393cb751-d05d-46ea-a6d7-ffca593c46dc", + "metadata": { + "tags": [] + }, + "outputs": [], + "source": [ + "sns.heatmap(total_stats_numeric.corr(method='spearman', numeric_only=False))" + ] + }, + { + "cell_type": "markdown", + "id": "61cbccff-2a1a-4a85-bbb8-10a74ca493be", + "metadata": {}, + "source": [ + "Let's ask if left and right correlate...or other things." + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "320667dc-cd16-47a7-8240-566384ab0a9c", + "metadata": { + "tags": [] + }, + "outputs": [], + "source": [ + "left_to_right = total_stats_numeric[['Left-Lateral-Ventricle', 'Left-Inf-Lat-Vent',\n", + " 'Left-Cerebellum-White-Matter', 'Left-Cerebellum-Cortex',\n", + " 'Left-Thalamus-Proper', 'Left-Caudate', 'Left-Putamen', 'Left-Pallidum',\n", + " 'Left-Hippocampus',\n", + " 'Left-Amygdala', 'Left-Accumbens-area', 'Left-VentralDC',\n", + " 'Left-vessel', 'Left-choroid-plexus', 'Right-Lateral-Ventricle',\n", + " 'Right-Inf-Lat-Vent', 'Right-Cerebellum-White-Matter',\n", + " 'Right-Cerebellum-Cortex', 'Right-Thalamus-Proper', 'Right-Caudate',\n", + " 'Right-Putamen', 'Right-Pallidum', 'Right-Hippocampus',\n", + " 'Right-Amygdala', 'Right-Accumbens-area', 'Right-VentralDC',\n", + " 'Right-vessel', 'Right-choroid-plexus', \n", + " 'Left-WM-hypointensities',\n", + " 'Right-WM-hypointensities', \n", + " 'Left-non-WM-hypointensities', 'Right-non-WM-hypointensities',]]\n", + "sns.heatmap(left_to_right.corr(method='spearman', numeric_only=False))" + ] + }, + { + "cell_type": "markdown", + "id": "9530e548-e8a9-4274-9f86-71ec558c21e1", + "metadata": {}, + "source": [ + "Yes, left and right correlate. Now let's ask whether wmh increase with age...." + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "c1f18974-8d4a-4479-8080-e7d133d4d607", + "metadata": { + "tags": [] + }, + "outputs": [], + "source": [ + "total_stats_numeric.columns" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "b1e5fe32-2bed-4a9c-a6ee-cf0fd71a16f6", + "metadata": { + "tags": [] + }, + "outputs": [], + "source": [ + "total_stats" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "39599499-1409-4c24-ad32-d4ddbdac4b61", + "metadata": { + "tags": [] + }, + "outputs": [], + "source": [ + "wmh_info = total_stats[[ 'Participant ID','3rd-Ventricle', '4th-Ventricle', 'Brain-Stem', 'Left-Hippocampus',\n", + " 'Left-Amygdala', 'CSF', 'Right-Lateral-Ventricle',\n", + " 'Right-Inf-Lat-Vent', 'Right-Cerebellum-White-Matter',\n", + " 'Right-Cerebellum-Cortex', 'Right-Thalamus-Proper', 'Right-Caudate',\n", + " 'Right-Putamen', 'Right-Hippocampus',\n", + " 'Right-Amygdala',\n", + " 'Right-choroid-plexus',\n", + " 'WM-hypointensities', ]]" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "bdbc8251-d3b5-47b7-a224-f8559076b977", + "metadata": { + "tags": [] + }, + "outputs": [], + "source": [ + "\n", + "wmh_info['Participant ID'] = wmh_info['Participant ID'].astype(str)\n", + "wmh_info['Participant ID'] = wmh_info['Participant ID'].str.replace(\"-\", \"_\")\n", + "wmh_info" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "7bfb076f-8d68-46c6-a946-87cfdfc09d56", + "metadata": { + "tags": [] + }, + "outputs": [], + "source": [ + "for_sheet_names = pd.ExcelFile(\"../../secret_data/BRICK_datums_scans_clean_17072024_versie_2.xlsx\")\n", + "sheet_names = for_sheet_names.sheet_names\n", + "print(sheet_names)" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "167618c8-bfae-40a3-bf8e-bbb61e87b7f5", + "metadata": { + "tags": [] + }, + "outputs": [], + "source": [ + "demo_kit = '../../secret_data/BRICK_datums_scans_clean_17072024_versie_2.xlsx'\n", + "demography = pd.read_excel('../../secret_data/BRICK_datums_scans_clean_17072024_versie_2.xlsx', sheet_name='Dag van MRI+NPO overzicht ')\n", + "demography['Participant ID'] = demography['BRICK-nummer']\n", + "demography = demography[['Participant ID','Genotype', 'Leeftijd_bij_scan']]\n", + "demography" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "6309fd11-74d1-4e35-8e6f-766b7b6d4d60", + "metadata": { + "tags": [] + }, + "outputs": [], + "source": [ + "fused_demo_wh = demography.merge(wmh_info, on='Participant ID')\n", + "fused_demo_wh.head(3)" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "e12a3a5c-3975-4561-900a-ad5f0f6de889", + "metadata": { + "tags": [] + }, + "outputs": [], + "source": [ + "# Do wm-hypointensities increase with age or genotype?\n", + "simple_question = fused_demo_wh[['Genotype', 'Leeftijd_bij_scan', 'WM-hypointensities']]\n", + "# recode genotype to number\n", + "simple_question['Genotype_code'] = simple_question['Genotype'].astype('category').cat.codes\n", + "simple_question.to_csv('../../secret_data/forshow1.csv')\n" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "ea9db2ee-7a5a-47b1-a1ac-433f09ed27af", + "metadata": { + "tags": [] + }, + "outputs": [], + "source": [ + "simple_question_matrix= simple_question[['Genotype_code', 'Leeftijd_bij_scan', 'WM-hypointensities']]\n", + "simple_question_matrix.corr(method='spearman').to_csv('../../secret_data/forshow2.csv')\n", + "#simple_question_matrix.corr(method='spearman')" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "d6611e73-f8fe-48d0-967f-85f68ab9ab05", + "metadata": {}, + "outputs": [], + "source": [ + "# OK, BUT DID IT matter for neuropsychological stuff?" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "4605efa3-89ae-476c-b4a1-d07ce4626913", + "metadata": {}, + "outputs": [], + "source": [] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "4bf8fd0e-b484-45f9-a184-dc7a09ac5609", + "metadata": { + "tags": [] + }, + "outputs": [], + "source": [ + "WISC_V_BRICK_T0dd08042024 = pd.read_csv(\"../../secret_data/WISCV_export_gemstracker_BRICK_T0_100424.csv\", sep=\",\")" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "27cf1309-b3d5-4543-8bb9-3dae3b109bcd", + "metadata": { + "tags": [] + }, + "outputs": [], + "source": [ + "WISC_V_BRICK_T0dd08042024.head()" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "d0b5808e-32ca-4970-b0ab-93552ac46b07", + "metadata": { + "tags": [] + }, + "outputs": [], + "source": [ + "# for f in WISC_V_BRICK_T0dd08042024.columns:\n", + "# print(f)" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "4a70ab6e-c4fd-40b5-8d17-88ab9866b783", + "metadata": { + "tags": [] + }, + "outputs": [], + "source": [ + "WISC_V_BRICK_T0dd08042024_for_castor = WISC_V_BRICK_T0dd08042024.rename(\n", + " columns={\"gr2o_patient_nr\": \"Participant Id\",\n", + " \"DatumWISCV\": \"Datum_WISC_V\",\n", + " \"StartWISCV\": \"Start_WISC_V\",\n", + " \"StopWISCV\": \"Stop_WISC_V\",\n", + " \"WISCVVolt\": \"WISC_V_voltooid\",\n", + " \"VolgordeWISC\":\"Volgorde_NPO_3\",\n", + " \"AfnemerWISCV\":\"Afnemer_WISC_V\",\n", + " \"WISCVOpm\": \"Opmerkingen_WISC_V\",\n", + " \"WISCVOpmUit\":\"Uitleg_Opmerkingen_WISC_V\",} )\n", + "\n", + "\n", + " # colnames(WISCV_gemstracker)[colnames(WISCV_gemstracker) == \"gr2o_patient_nr\"] <- \"Participant Id\"\n", + " # colnames(WISCV_gemstracker)[colnames(WISCV_gemstracker) == \"DatumWISCV\"] <- \"Datum_WISC_V\"\n", + " # colnames(WISCV_gemstracker)[colnames(WISCV_gemstracker) == \"StartWISCV\"] <- \"Start_WISC_V\"\n", + " # colnames(WISCV_gemstracker)[colnames(WISCV_gemstracker) == \"StopWISCV\"] <- \"Stop_WISC_V\"\n", + " # colnames(WISCV_gemstracker)[colnames(WISCV_gemstracker) == \"WISCVVolt\"] <- \"WISC_V_voltooid\"\n", + " # colnames(WISCV_gemstracker)[colnames(WISCV_gemstracker) == \"VolgordeWISC\"] <- \"Volgorde_NPO_3\"\n", + " # colnames(WISCV_gemstracker)[colnames(WISCV_gemstracker) == \"AfnemerWISCV\"] <- \"Afnemer_WISC_V\"\n", + " # colnames(WISCV_gemstracker)[colnames(WISCV_gemstracker) == \"WISCVOpm\"] <- \"Opmerkingen_WISC_V\"\n", + " # colnames(WISCV_gemstracker)[colnames(WISCV_gemstracker) == \"WISCVOpmUit\"] <- \"Uitleg_Opmerkingen_WISC_V\"" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "8233cfd4-73b7-461f-93fc-5cfbb0cb80a3", + "metadata": { + "tags": [] + }, + "outputs": [], + "source": [ + "WISC_V_BRICK_T0dd08042024_for_castor['BRICK_of_uitgebreid'] = 1\n", + "WISC_V_BRICK_T0dd08042024_for_castor.head(3)" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "a43f2147-a876-4cad-99dc-c88d84373bb6", + "metadata": { + "tags": [] + }, + "outputs": [], + "source": [ + "len(WISC_V_BRICK_T0dd08042024_for_castor)" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "72049f6a-a0ec-4f6c-a86d-b8313488760b", + "metadata": { + "tags": [] + }, + "outputs": [], + "source": [ + "len(WISC_V_BRICK_T0dd08042024_for_castor['Participant Id'].unique())" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "6487d863-ba5e-46e6-a6a1-259b393bd938", + "metadata": { + "tags": [] + }, + "outputs": [], + "source": [ + "scores_from_wisc = WISC_V_BRICK_T0dd08042024_for_castor[[\n", + " 'Participant Id',\n", + " 'WgITot',\n", + " 'VsITot',\n", + " 'KRITot',\n", + " 'AWITot',\n", + " 'NVITot',\n", + " 'AVITot',\n", + " 'CCITot',\n", + " 'Tot',]]" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "637756c0-a2ec-4cee-a3c1-755f7182f0c6", + "metadata": { + "tags": [] + }, + "outputs": [], + "source": [ + "scores_from_wisc = scores_from_wisc.rename(columns={'Participant Id': 'Participant ID'})" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "ef7f3337-4993-4a12-92c3-8f766fc0d938", + "metadata": { + "tags": [] + }, + "outputs": [], + "source": [ + "pd.merge?" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "a31a2d9f-c941-44bc-9e03-f1f4203fd86a", + "metadata": { + "tags": [] + }, + "outputs": [], + "source": [ + "scores_from_wisc.columns" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "ceb02ba6-a9e7-421f-9c50-a8904e1346bc", + "metadata": { + "tags": [] + }, + "outputs": [], + "source": [ + "#big_fuse = fused_demo_wh.merge(scores_from_wisc, on='Participant ID')\n", + "big_fuse = pd.merge(fused_demo_wh,scores_from_wisc, left_on='Participant ID', right_on='Participant ID', how='outer')\n", + "big_fuse.head(3)" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "70ddf08d-dc63-4b81-b111-43c28fcdf912", + "metadata": { + "tags": [] + }, + "outputs": [], + "source": [ + "big_fuse_matrix = big_fuse[['Genotype','Leeftijd_bij_scan', 'Brain-Stem', 'Right-Hippocampus','Left-Hippocampus', 'Left-Amygdala',\n", + " 'Right-Lateral-Ventricle', 'Right-Inf-Lat-Vent',\n", + " 'Right-Cerebellum-White-Matter', 'Right-Cerebellum-Cortex',\n", + " 'Right-Thalamus-Proper', 'Right-Caudate', 'Right-Putamen',\n", + " 'Right-Amygdala', 'Right-choroid-plexus',\n", + " 'WM-hypointensities', 'WgITot', 'VsITot', 'KRITot', 'AWITot', 'NVITot',\n", + " 'AVITot', 'CCITot', 'Tot']]\n", + "big_fuse_matrix['Genotype_code'] = big_fuse_matrix['Genotype'].astype('category').cat.codes\n", + "big_fuse_matrix = big_fuse_matrix.drop(['Genotype'], axis=1)\n", + "big_fuse_matrix.corr(method='spearman')#.to_csv('../../secret_data/forshow3.csv')" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "d21e0255-ee48-4a9c-b160-2a1b90b49de0", + "metadata": {}, + "outputs": [], + "source": [] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "1c8d00b5-b2ef-4b7c-aa3e-26f93bc561c6", + "metadata": {}, + "outputs": [], + "source": [] + }, + { + "cell_type": "markdown", + "id": "1", + "metadata": { + "tags": [] + }, + "source": [ + "1. first use python to move files a few at a time." + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "64dfc2a1-5dae-4cc3-baa6-ee216e77a725", + "metadata": {}, + "outputs": [], + "source": [] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "d9a8a93c-dea4-4c7a-ae5f-f759549b4c0b", + "metadata": {}, + "outputs": [], + "source": [] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "4255f5e6-d2f9-401c-9bcc-54439132aa9a", + "metadata": {}, + "outputs": [], + "source": [] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "324d3b96-c0b6-438f-9323-670616e6069e", + "metadata": {}, + "outputs": [], + "source": [] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "3", + "metadata": { + "tags": [] + }, + "outputs": [], + "source": [ + "find /mnt/data/output -iwholename '*Freesurfer*/*.nii.gz' -type f" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "4", + "metadata": { + "tags": [] + }, + "outputs": [], + "source": [ + "shutil.copy?" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "5", + "metadata": { + "tags": [] + }, + "outputs": [], + "source": [ + "glob.glob?" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "6", + "metadata": { + "tags": [] + }, + "outputs": [], + "source": [ + "_files = glob.glob('C:/presentations/**', recursive=True)" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "7", + "metadata": { + "tags": [] + }, + "outputs": [], + "source": [ + "#_files" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "8", + "metadata": { + "tags": [] + }, + "outputs": [], + "source": [ + "for fileName_relative in glob.glob('C:/experimental', *, recursive=True): ## first get full file name with directores using for loop\n", + "\n", + " print(\"Full file name with directories: \", fileName_relative)" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "9", + "metadata": {}, + "outputs": [], + "source": [ + "def find_surf_and_move(src, dst, filestring):\n", + " if not os.path.exists(dst):\n", + " os.mkdir(dst)\n", + " files = glob.glob(os.path.join(src,'**'), recursive=True)\n", + "\n", + " # for path, subdirs, files in os.walk(src):\n", + " # print(files)\n", + " # for name in files:\n", + " # if filestring in name:\n", + " # #if fnmatch(name, pattern):\n", + " # full_name = os.path.join(path, name)\n", + " # #destination_folder = \n", + " # print(name)\n", + "\n", + " #shutil.copytree(real_name, dst)\n", + " \n", + " \n", + " \n", + " \n", + "# files = os.listdir()\n", + " \n", + "# if filestring is in file:\n", + " \n", + "# shutil.copytree(src, dst) " + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "10", + "metadata": { + "tags": [] + }, + "outputs": [], + "source": [ + "find_surf_and_move(file_path, 'C:/experimental', 'cvasl')" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "11", + "metadata": { + "tags": [] + }, + "outputs": [], + "source": [ + "root = file_path\n", + "#pattern = \"*.\"\n", + "\n", + "for path, subdirs, files in os.walk(root):\n", + " for name in files:\n", + " if 'cvasl' in name:\n", + " #if fnmatch(name, pattern):\n", + " print(os.path.join(path, name))" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "12", + "metadata": {}, + "outputs": [], + "source": [] + } + ], + "metadata": { + "kernelspec": { + "display_name": "Python 3 (ipykernel)", + "language": "python", + "name": "python3" + }, + "language_info": { + "codemirror_mode": { + "name": "ipython", + "version": 3 + }, + "file_extension": ".py", + "mimetype": "text/x-python", + "name": "python", + "nbconvert_exporter": "python", + "pygments_lexer": "ipython3", + "version": "3.11.8" + } + }, + "nbformat": 4, + "nbformat_minor": 5 +} diff --git a/r_scripts/Script_regions_most_burden_WMH_Volbrain.R b/r_scripts/Script_regions_most_burden_WMH_Volbrain.R new file mode 100644 index 0000000..afb5231 --- /dev/null +++ b/r_scripts/Script_regions_most_burden_WMH_Volbrain.R @@ -0,0 +1,64 @@ +## Experiment with Volbrain data. At this point, we do not have analysed all of the T0 scans + +library(dplyr) +library(tidyr) +voldf <- read.csv("Z:/Aida_experiment/volbrainsnew.csv") + +# Define participants to exclude +excluded_participants <- c("BRICK_003", "BRICK_009", "BRICK_024", "BRICK_025", "BRICK_040") + +#Turn Paricipant.Id into Participant.Id +voldf <- voldf %>% + rename( Participant.Id = Paricipant.Id) + + +# Filter voldf based on your criteria +voldf_clean <- voldf %>% + filter(Quality.control.T1 != "C", # Exclude rows where Quality.control.T1 is "C" + Quality.control.FLAIR != "C", # Exclude rows where Quality.control.FLAIR is "C" + !(Participant.Id %in% excluded_participants)) # Exclude specific participants + +# View the cleaned DataFrame +head(voldf_clean) + + +#We would like to know where the lesion burden is highest in the patients +# Calculate average lesion count and volume for each region using voldf_clean +lesion_avg_summary <- data.frame( + Region = c("Periventricular", "Deep white", "Juxtacortical", "Infratentorial", "Cerebellar", "Medular"), + + Avg_Lesion_Count = c( + mean(voldf_clean$Periventricular.lesion.count, na.rm = TRUE), + mean(voldf_clean$Deep.white.lesion.count, na.rm = TRUE), + mean(voldf_clean$Juxtacortical.lesion.count, na.rm = TRUE), + mean(voldf_clean$Infratentorial.lesion.count, na.rm = TRUE), + mean(voldf_clean$Cerebellar.lesion.count, na.rm = TRUE), + mean(voldf_clean$Medular.lesion.count, na.rm = TRUE) + ), + + Avg_Lesion_Volume_Absolute = c( + mean(voldf_clean$Periventricular.lesion.volume..absolute..cm3, na.rm = TRUE), + mean(voldf_clean$Deep.white.lesion.volume..absolute..cm3, na.rm = TRUE), + mean(voldf_clean$Juxtacortical.lesion.volume..absolute..cm3, na.rm = TRUE), + mean(voldf_clean$Infratentorial.lesion.volume..absolute..cm3, na.rm = TRUE), + mean(voldf_clean$Cerebellar.lesion.volume..absolute..cm3, na.rm = TRUE), + mean(voldf_clean$Medular.lesion.volume..absolute..cm3, na.rm = TRUE) + ), + + Avg_Lesion_Volume_Normalized = c( + mean(voldf_clean$Periventricular.lesion.volume..normalized..., na.rm = TRUE), + mean(voldf_clean$Deep.white.lesion.volume..normalized..., na.rm = TRUE), + mean(voldf_clean$Juxtacortical.lesion.volume..normalized..., na.rm = TRUE), + mean(voldf_clean$Infratentorial.lesion.volume..normalized..., na.rm = TRUE), + mean(voldf_clean$Cerebellar.lesion.volume..normalized..., na.rm = TRUE), + mean(voldf_clean$Medular.lesion.volume..normalized..., na.rm = TRUE) + ) +) + +# Sort by average lesion count and/or average absolute lesion volume +lesion_avg_summary_sorted <- lesion_avg_summary[order(-lesion_avg_summary$Avg_Lesion_Count, -lesion_avg_summary$Avg_Lesion_Volume_Absolute), ] + +# Print the sorted summary +print(lesion_avg_summary_sorted) + +