Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

See vol brain #119

Merged
merged 3 commits into from
Nov 1, 2024
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
3 changes: 2 additions & 1 deletion .gitignore
Original file line number Diff line number Diff line change
Expand Up @@ -92,4 +92,5 @@ conda-pkg/meta.yaml
!tests/sample_synthetic_data/*.csv
secret_data/*
secret_data/
tmp_dcm2bids/*
tmp_dcm2bids/*
.Rproj.user
13 changes: 13 additions & 0 deletions brickstudy.Rproj
Original file line number Diff line number Diff line change
@@ -0,0 +1,13 @@
Version: 1.0

RestoreWorkspace: Default
SaveWorkspace: Default
AlwaysSaveHistory: Default

EnableCodeIndexing: Yes
UseSpacesForTab: Yes
NumSpacesForTab: 2
Encoding: UTF-8

RnwWeave: Sweave
LaTeX: pdfLaTeX
70 changes: 59 additions & 11 deletions notebooks/Concatenate_volbrain_csvs_deeplesion.ipynb
Original file line number Diff line number Diff line change
Expand Up @@ -30,7 +30,8 @@
"import zipfile\n",
"import os\n",
"from zipfile import ZipFile\n",
"import glob"
"import glob\n",
"from io import BytesIO"
]
},
{
Expand All @@ -41,15 +42,19 @@
},
"outputs": [],
"source": [
"#navigate to zipfolders and intended folder for output\n",
"path_volbrain = 'Z:/VolBrain'\n",
"output_folder = \"Z:/VolBrain/Separate_CSV_Deeplesion\"\n"
"#navigate to zipfolders and intended folder for output (change to commented out if on Windows)\n",
"# path_volbrain = 'Z:/processed_data/VolBrain'\n",
"# output_folder = \"Z:/processed_data/VolBrain/Separate_CSV_Deeplesion\"\n",
"path_volbrain = '/mnt/data/processed_data/VolBrain'\n",
"output_folder = \"/mnt/data/processed_data/VolBrain/Separate_CSV_Deeplesion\""
]
},
{
"cell_type": "code",
"execution_count": null,
"metadata": {},
"metadata": {
"tags": []
},
"outputs": [],
"source": [
"#create a list to store the CSV headers in\n",
Expand All @@ -65,8 +70,6 @@
"outputs": [],
"source": [
"dataframes = []\n",
"from io import BytesIO\n",
"\n",
"\n",
"for zip_file in glob.glob(os.path.join(path_volbrain, '*.zip')):\n",
" # split by underscores\n",
Expand All @@ -79,7 +82,7 @@
" if entry_name.endswith(\".csv\"):\n",
" entry = zf.read(entry_name)\n",
" df = pd.read_csv(BytesIO(entry), sep=';')\n",
" df['Paricipant Id'] = participant_id\n",
" df['Participant Id'] = participant_id\n",
" dataframes.append(df)"
]
},
Expand All @@ -103,7 +106,8 @@
},
"outputs": [],
"source": [
"concat_df.to_csv('../secret_data/volbrains.csv')"
"#move the participant id row to the front. Now it's last. This is important for upload in castor.\n",
"concat_df = concat_df.loc[:, [concat_df.columns[-1]] + list(concat_df.columns[:-1])]\n"
]
},
{
Expand All @@ -113,14 +117,58 @@
"tags": []
},
"outputs": [],
"source": []
"source": [
"concat_df"
]
},
{
"cell_type": "code",
"execution_count": null,
"metadata": {
"tags": []
},
"outputs": [],
"source": [
"#Now make the column names castor appropriate. Do not change the \"Participant Id\" column of course. dl stands for DeepLesion algorithm\n",
"concat_df.columns = [\n",
" col if col == \"Participant Id\" else \"dl_\" + col.replace(\" \", \"_\") + \"_T0\"\n",
" for col in concat_df.columns\n",
"]\n",
"\n",
"#For castor, male is 1 and female is 2. Replace these values\n",
"concat_df[\"dl_Sex_T0\"] = concat_df[\"dl_Sex_T0\"].replace({\"Female\": 2, \"Male\": 1})\n"
]
},
{
"cell_type": "code",
"execution_count": null,
"metadata": {
"tags": []
},
"outputs": [],
"source": [
"concat_df"
]
},
{
"cell_type": "code",
"execution_count": null,
"metadata": {},
"outputs": [],
"source": []
},
{
"cell_type": "code",
"execution_count": null,
"metadata": {
"tags": []
},
"outputs": [],
"source": [
"# concat_df.to_csv('Z:/castor_proof_files/volbrains_castor.csv')\n",
"#(change to commented out if on Windows)\n",
"concat_df.to_csv('/mnt/data/castor_proof_files/volbrains_castor.csv')"
]
}
],
"metadata": {
Expand All @@ -139,7 +187,7 @@
"name": "python",
"nbconvert_exporter": "python",
"pygments_lexer": "ipython3",
"version": "3.11.8"
"version": "3.11.9"
}
},
"nbformat": 4,
Expand Down
204 changes: 204 additions & 0 deletions notebooks/experi/.~WISC_convertR.ipynb
Original file line number Diff line number Diff line change
@@ -0,0 +1,204 @@
{
"cells": [
{
"cell_type": "code",
"execution_count": null,
"id": "0",
"metadata": {},
"outputs": [],
"source": [
"#Laad de benodigde libraries\n",
"library(readxl) # Load the package into your R session\n",
"library(writexl)\n",
"library(dplyr)\n",
" #Importeer excel file van export gemstracker WISCV\n",
" # Replace \"data.xlsx\" with the actual file name and path if it's located in a different directory\n",
" WISCV_gemstracker <- read.csv(\"../../secret_data/WISC_V_BRICK_T0dd08042024.csv\", sep=\";\")\n",
" \n",
" #show column names of the new df\n",
" print(colnames(WISCV_gemstracker))\n",
"\n",
" #verander de column names van gemstracker naar castor, voor Baseline.\n",
"\n",
"#head(WISCV_gemstracker)\n",
"\n",
" # Change column names\n",
" colnames(WISCV_gemstracker)[colnames(WISCV_gemstracker) == \"gr2o_patient_nr\"] <- \"Participant Id\"\n",
" colnames(WISCV_gemstracker)[colnames(WISCV_gemstracker) == \"DatumWISCV\"] <- \"Datum_WISC_V\"\n",
" colnames(WISCV_gemstracker)[colnames(WISCV_gemstracker) == \"StartWISCV\"] <- \"Start_WISC_V\"\n",
" colnames(WISCV_gemstracker)[colnames(WISCV_gemstracker) == \"StopWISCV\"] <- \"Stop_WISC_V\"\n",
" colnames(WISCV_gemstracker)[colnames(WISCV_gemstracker) == \"WISCVVolt\"] <- \"WISC_V_voltooid\"\n",
" colnames(WISCV_gemstracker)[colnames(WISCV_gemstracker) == \"VolgordeWISC\"] <- \"Volgorde_NPO_3\"\n",
" colnames(WISCV_gemstracker)[colnames(WISCV_gemstracker) == \"AfnemerWISCV\"] <- \"Afnemer_WISC_V\"\n",
" colnames(WISCV_gemstracker)[colnames(WISCV_gemstracker) == \"WISCVOpm\"] <- \"Opmerkingen_WISC_V\"\n",
" colnames(WISCV_gemstracker)[colnames(WISCV_gemstracker) == \"WISCVOpmUit\"] <- \"Uitleg_Opmerkingen_WISC_V\"\n",
" \n",
" \n",
"\n",
"#head(WISCV_gemstracker)\n",
"\n",
" #voeg extra kolom toe om alleen de verplichte BRICK-waarden te exporteren naar castor en niet de hele WISCV. Als je toch alle velden wil invullen, kan\n",
" #je of alle velden een \"o\" geven, of dit handmatig wijzigen per participant, in de excel die op het einde gegenereerd wordt\n",
"\n",
" WISCV_gemstracker$BRICK_of_uitgebreid <- 1\n",
" \n",
" #De volgende kolommen die wel in de Castor-export van de WICV staan, voegen we niet toe: Participant status, site abbreviation en participation creation date. \n",
" #Dit zal niet zorgen voor problemen, zolang de participanten al vóór de Gemstracker-import zijn aangemaakt in Castor. We gaan niet via deze weg nieuwe patienten importeren. Dit kan wel, maar dan heb je wel deze kolommen nodig.\n",
"\n",
" #Waarden in de kolommen aanpassen op Castor Format\n",
" \n",
" #1.Te beginnen met de datum:\n",
" \n",
" # Convert the column \"Datum_WISC_V\" to Date format\n",
" WISCV_gemstracker$Datum_WISC_V <- as.Date(WISCV_gemstracker$Datum_WISC_V, format = \"%Y-%m-%d\")\n",
" \n",
" # Change the date format to \"02-08-2023\" in the same column\n",
" WISCV_gemstracker$Datum_WISC_V <- format(WISCV_gemstracker$Datum_WISC_V, \"%d-%m-%Y\")\n",
" \n",
" \n",
"\n",
"#head(WISCV_gemstracker)\n",
"\n",
" #2.Hierbij veranderen we de afnemer meerkeuze kolom naar een kolom met 1 waarde in de Castor dataframe (\"Afnemer_WISC_V_W\")\n",
" # Let op! Dit moet aangepast worden als er meer afnemers bij komen, maar dan verandert de 6 in een 7 etc. Goed opletten als de labelsets worden aangepast in Castor en LS!\n",
" \n",
" # Convert \"AfnemerWISCV_SQ000\" to \"AfnemerWISCV_SQ006\" columns to numeric values\n",
" # Convert \"AfnemerWISCV_SQ000\" to \"AfnemerWISCV_SQ006\" columns to numeric values\n",
" for (i in 0:6) {\n",
" column_name <- paste(\"AfnemerWISCV_SQ00\", i, sep = \"\")\n",
" WISCV_gemstracker$Afnemer_WISC_V[WISCV_gemstracker[column_name] == \"Y\"] <- i\n",
" }\n",
"\n",
"#sapply(WISCV_gemstracker, class)\n",
"\n",
"#head(WISCV_gemstracker)\n",
"\n",
" \n",
" #3. Hier veranderen we de waarden in de volgorde_NPO kolom voor in Castor\n",
" \n",
" # Convert \"VolgordeWISC_SQ001\" to \"VolgordeWISC_SQ004\" columns to numeric values\n",
" for (i in 1:4) {\n",
" col_name <- paste(\"VolgordeWISC_SQ00\", i, sep = \"\")\n",
" WISCV_gemstracker$Volgorde_NPO_3[WISCV_gemstracker[, col_name] == \"Y\"] <- i\n",
" }\n",
" \n",
"\n",
" #4. Alle waarden onder kolom: Opmerkingen_WISC_V en WISC_V_voltooid gaan van Y naar 1 en van N naar 0\n",
" \n",
" # Convert \"Y\" to 1 and \"N\" to 0 in the \"Opmerkingen_WISC_V\" column\n",
" WISCV_gemstracker$Opmerkingen_WISC_V[WISCV_gemstracker$Opmerkingen_WISC_V == \"Y\"] <- 1\n",
" WISCV_gemstracker$Opmerkingen_WISC_V[WISCV_gemstracker$Opmerkingen_WISC_V == \"N\"] <- 0\n",
" \n",
"\n",
" WISCV_gemstracker$WISC_V_voltooid[WISCV_gemstracker$WISC_V_voltooid == \"Y\"] <- 1\n",
" WISCV_gemstracker$WISC_V_voltooid[WISCV_gemstracker$WISC_V_voltooid == \"N\"] <- 0\n",
" \n",
"\n",
" ## Nog te testen: Haal het uur en de minuten uit twee afzonderlijke kolommen en zet ze samen in de start kolom.\n",
" # Convert numeric columns to characters\n",
" WISCV_gemstracker$StartWISCV_SQ001 <- as.character(WISCV_gemstracker$StartWISCV_SQ001)\n",
" WISCV_gemstracker$StartWISCV_SQ002 <- as.character(WISCV_gemstracker$StartWISCV_SQ002)\n",
" \n",
"\n",
" # Create Start_WISC_V column\n",
" WISCV_gemstracker$Start_WISC_V <- paste(WISCV_gemstracker$StartWISCV_SQ001, WISCV_gemstracker$StartWISCV_SQ002, sep = \":\")\n",
" \n",
" # Repeat the same process for Stop columns\n",
" WISCV_gemstracker$StopWISCV_SQ001 <- as.character(WISCV_gemstracker$StopWISCV_SQ001)\n",
" WISCV_gemstracker$StopWISCV_SQ002 <- as.character(WISCV_gemstracker$StopWISCV_SQ002)\n",
" \n",
"\n",
" # Create Stop_WISC_V column\n",
" WISCV_gemstracker$Stop_WISC_V <- paste(WISCV_gemstracker$StopWISCV_SQ001, WISCV_gemstracker$StopWISCV_SQ002, sep = \":\")\n",
"\n",
" #Volgorde kolommen aanpassen\n",
" WISCV_gemstracker <- WISCV_gemstracker %>%\n",
" select(\"Participant Id\", \"BRICK_of_uitgebreid\", \"Datum_WISC_V\", \"Start_WISC_V\", \n",
" \"Stop_WISC_V\", \"Volgorde_NPO_3\", \"WISC_V_voltooid\", \n",
" \"Opmerkingen_WISC_V\", \"Uitleg_Opmerkingen_WISC_V\", everything())\n",
" \n",
" \n",
"\n",
"#length(colnames(WISCV_gemstracker))\n",
"\n",
" #Elke field name is uniek. In de baseline meting, hebben bijn alle velden in Castor een _1. In FU1 en FU2 zal dit zeker _2 en _3 worden\n",
" #behalve participant id en \"Volgorde_NPO_3\", moeten alle kolommen eraan geloven.\n",
"\n",
" # # Create a list to hold the new column names\n",
" # new_column_names <- vector(\"character\", length(names(WISCV_gemstracker)))\n",
" \n",
" #Delete de kolommen uit de gemstracker export die je niet nodig hebt (dplyr)\n",
"\n",
" # List of columns to be removed\n",
" columns_to_remove <- c(\"respondentid\", \"organizationid\", \"gto_id_relation\", \"forgroup\", \n",
" \"consentcode\", \"resptrackid\", \"gto_round_order\", \"gto_round_description\", \n",
" \"gtr_track_name\", \"gr2t_track_info\", \"gto_completion_time\", \"gto_start_time\", \n",
" \"gto_valid_from\", \"gto_valid_until\", \"startlanguage\", \"lastpage\", \n",
" \"gto_id_token\", \"surveyversion\", \"AfnemerWISCV_SQ000\", \"AfnemerWISCV_SQ001\", \n",
" \"AfnemerWISCV_SQ002\", \"AfnemerWISCV_SQ003\", \"AfnemerWISCV_SQ004\", \n",
" \"AfnemerWISCV_SQ005\", \"AfnemerWISCV_SQ006\", \n",
" \"VolgordeWISC_SQ001\", \"VolgordeWISC_SQ002\", \"VolgordeWISC_SQ003\", \n",
" \"VolgordeWISC_SQ004\", \"Sub\", \"StartWISCV_SQ001\", \"StartWISCV_SQ002\", \"StopWISCV_SQ001\", \"StopWISCV_SQ002\")\n",
" \n",
" # Remove the specified columns\n",
" WISCV_gemstracker <- WISCV_gemstracker %>%\n",
" select(-one_of(columns_to_remove))\n",
"\n",
"# Create a list to hold the new column names\n",
"new_column_names <- vector(\"character\", length(names(WISCV_gemstracker)))\n",
" \n",
" \n",
" # Iterate through each column name\n",
" for (i in seq_along(names(WISCV_gemstracker))) {\n",
" if (names(WISCV_gemstracker)[i] != \"Participant Id\" && names(WISCV_gemstracker)[i] != \"Volgorde_NPO_3\") {\n",
" new_column_names[i] <- paste(names(WISCV_gemstracker)[i], \"_1\", sep = \"\")\n",
" } else {\n",
" new_column_names[i] <- names(WISCV_gemstracker)[i]\n",
" }\n",
" }\n",
" \n",
" # Assign the new column names to the dataframe\n",
" names(WISCV_gemstracker) <- new_column_names\n",
" \n",
" \n",
" \n",
" # Print the updated column names\n",
" print(names(WISCV_gemstracker))\n",
" \n",
" #exporteer nieuwe df naar excel file\n",
" write_xlsx(WISCV_gemstracker, path = \"WISCV_gemstracker_poging_makeda.xlsx\")\n",
" \n",
" # Export to CSV\n",
" write.csv(WISCV_gemstracker, file = \"WISCV_gemstracker_poging_makeda_csv.csv\", row.names = FALSE)\n",
" \n",
" \n",
" "
]
},
{
"cell_type": "code",
"execution_count": null,
"id": "1",
"metadata": {},
"outputs": [],
"source": []
}
],
"metadata": {
"kernelspec": {
"display_name": "R",
"language": "R",
"name": "ir"
},
"language_info": {
"codemirror_mode": "r",
"file_extension": ".r",
"mimetype": "text/x-r-source",
"name": "R",
"pygments_lexer": "r",
"version": "4.1.3"
}
},
"nbformat": 4,
"nbformat_minor": 5
}
Loading
Loading