From 9f10a0344ba3ae823beaee9e121a532d31d57bd0 Mon Sep 17 00:00:00 2001 From: GinnyAquarius Date: Fri, 10 Nov 2017 10:58:26 +0700 Subject: [PATCH] read/write --- src/hash_align.c | 43 ++++++++++++++++++ src/main.c | 112 ++++------------------------------------------- 2 files changed, 52 insertions(+), 103 deletions(-) diff --git a/src/hash_align.c b/src/hash_align.c index 7d00552..ffe886d 100644 --- a/src/hash_align.c +++ b/src/hash_align.c @@ -2050,6 +2050,30 @@ void get_alignment_pair(char *fq1, char *fq2) gzclose(input2); kseq_destroy(R1); kseq_destroy(R2); + + MEAN_LEN /= 2 * N_READ; + MEAN_FRAG = MEAN_FRAG / PAIRED + MEAN_LEN; + for (i = 0; i < REF_INF->n; ++i) { + if (REF_INF->len[i] < MEAN_FRAG) + REF_INF->eff_len[i] = 0; + else + REF_INF->eff_len[i] = REF_INF->len[i] - MEAN_FRAG + 1; + } + + printf("\rNumber of aligned pairs\t: %u/%u\n", MAPPED, N_READ); + printf("Mean read length\t: %f\n", MEAN_LEN); + printf("Mean fragment length\t: %f\n", MEAN_FRAG); + + fprintf(SUMMARY, "\nMAPPING RESULT:\n"); + fprintf(SUMMARY, "\tTotal number of read pairs\t\t\t: %u\n", N_READ); + fprintf(SUMMARY, "\t\t- Number of proper mapped pairs\t: %u (%f)\n", + PAIRED, (float) PAIRED/N_READ); + fprintf(SUMMARY, "\t\t- Number of unproper mapped pairs\t: %u (%f)\n", + MAPPED - PAIRED, (float)(MAPPED - PAIRED)/N_READ); + fprintf(SUMMARY, "\t\t- Number of unmapped pairs\t\t: %u (%f)\n", + N_READ - MAPPED, (float) (N_READ - MAPPED)/N_READ); + fprintf(SUMMARY, "\tMean read length\t\t\t: %f\n", MEAN_LEN); + fprintf(SUMMARY, "\tMean fragment length\t\t\t: %f\n", MEAN_FRAG); } void get_alignment(char *fq) @@ -2081,4 +2105,23 @@ void get_alignment(char *fq) gzclose(input); kseq_destroy(R1); + + MEAN_LEN /= N_READ; + MEAN_FRAG = MEAN_LEN; + for (i = 0; i < REF_INF->n; ++i) { + if (REF_INF->len[i] < MEAN_FRAG) + REF_INF->eff_len[i] = 0; + else + REF_INF->eff_len[i] = REF_INF->len[i] - MEAN_FRAG + 1; + } + printf("\rNumber of aligned reads\t: %u/%u\n", PAIRED, N_READ); + printf("Mean read lenght\t: %f\n", MEAN_LEN); + + fprintf(SUMMARY, "\nMAPPING RESULT:\n"); + fprintf(SUMMARY, "\tTotal number of reads\t\t\t: %u\n", N_READ); + fprintf(SUMMARY, "\t\t- Number of mapped reads\t: %u (%f)\n", + PAIRED, (float) PAIRED/N_READ); + fprintf(SUMMARY, "\t\t- Number of unmapped reads\t: %u (%f)\n", + N_READ - PAIRED, (float) (N_READ - PAIRED)/N_READ); + fprintf(SUMMARY, "\tMean read length\t\t\t: %f\n", MEAN_LEN); } diff --git a/src/main.c b/src/main.c index 1a6dba1..e8b6e26 100644 --- a/src/main.c +++ b/src/main.c @@ -5,8 +5,6 @@ void print_usage() { fprintf(stdout, "VERSION:\n\t%s\n", VERSION); fprintf(stdout, "QUANTILITY:\n\t ./hera quant -i path/to/index_directory [options] \n"); - fprintf(stdout, "\t\t-1:\t Input left read-files, separated by comma\n"); - fprintf(stdout, "\t\t-2:\t Input right read-files, separated by comma (optional)\n"); fprintf(stdout, "\t\t-o:\t Output directory (default: ./)\n"); fprintf(stdout, "\t\t-t:\t Number of threads (default: 1)\n"); fprintf(stdout, "\t\t-z:\t Compress level (1 - 9) (default: -1)\n"); @@ -49,106 +47,16 @@ void index_ref(char *transcriptome, char *genome, char *out_file, int full) } } -void get_file_name(char *file_list, char *file_name, unsigned int *idx) -{ - unsigned int i = *idx; - while (file_list[*idx] != ',' && file_list[*idx] != '\0') - ++*idx; - memcpy(file_name, file_list + i, *idx - i); - file_name[*idx - i] = '\0'; -} - -void process_read_files(char *left_read, char *right_read) -{ - char left_file[1000], right_file[1000]; - unsigned int i, l_len, r_len; - - l_len = r_len = 0; - - while (1){ - get_file_name(left_read, left_file, &l_len); - get_file_name(right_read, right_file, &r_len); - get_alignment_pair(left_file, right_file); - if (left_read[l_len] == '\0' || right_read[r_len] == '\0') - break; - ++l_len; - ++r_len; - } - - MEAN_LEN /= 2 * N_READ; - MEAN_FRAG = MEAN_FRAG / PAIRED + MEAN_LEN; - for (i = 0; i < REF_INF->n; ++i) { - if (REF_INF->len[i] < MEAN_FRAG) - REF_INF->eff_len[i] = 0; - else - REF_INF->eff_len[i] = REF_INF->len[i] - MEAN_FRAG + 1; - } - fprintf(stdout, "\rNumber of aligned pairs\t: %u/%u\n", MAPPED, N_READ); - fprintf(stdout, "Mean read length\t: %f\n", MEAN_LEN); - fprintf(stdout, "Mean fragment length\t: %f\n", MEAN_FRAG); - - fprintf(SUMMARY, "\nMAPPING RESULT:\n"); - fprintf(SUMMARY, "\tTotal number of read pairs\t\t\t: %u\n", N_READ); - fprintf(SUMMARY, "\t\t- Number of proper mapped pairs\t: %u (%f)\n", PAIRED, (float) PAIRED/N_READ); - fprintf(SUMMARY, "\t\t- Number of unproper mapped pairs\t: %u (%f)\n", - MAPPED - PAIRED, (float)(MAPPED - PAIRED)/N_READ); - fprintf(SUMMARY, "\t\t- Number of unmapped pairs\t\t: %u (%f)\n", - N_READ - MAPPED, (float) (N_READ - MAPPED)/N_READ); - fprintf(SUMMARY, "\tMean read length\t\t\t: %f\n", MEAN_LEN); - fprintf(SUMMARY, "\tMean fragment length\t\t\t: %f\n", MEAN_FRAG); -} - -void process_read_file(char *left_read) -{ - char left_file[1000]; - unsigned int i, l_len = 0; - while (1){ - get_file_name(left_read, left_file, &l_len); - get_alignment(left_file); - if (left_read[l_len] == '\0') - break; - - ++l_len; - } - - MEAN_LEN /= N_READ; - MEAN_FRAG = MEAN_LEN; - for (i = 0; i < REF_INF->n; ++i) { - if (REF_INF->len[i] < MEAN_FRAG) - REF_INF->eff_len[i] = 0; - else - REF_INF->eff_len[i] = REF_INF->len[i] - MEAN_FRAG + 1; - } - fprintf(stdout, "\rNumber of aligned reads\t: %u/%u\n", PAIRED, N_READ); - fprintf(stdout, "Mean read lenght\t: %f\n", MEAN_LEN); - - fprintf(SUMMARY, "\nMAPPING RESULT:\n"); - fprintf(SUMMARY, "\tTotal number of reads\t\t\t: %u\n", N_READ); - fprintf(SUMMARY, "\t\t- Number of mapped reads\t: %u (%f)\n", - PAIRED, (float) PAIRED/N_READ); - fprintf(SUMMARY, "\t\t- Number of unmapped reads\t: %u (%f)\n", - N_READ - PAIRED, (float) (N_READ - PAIRED)/N_READ); - fprintf(SUMMARY, "\tMean read length\t\t\t: %f\n", MEAN_LEN); -} - void quantility(int argc, char *argv[]) { - char *idx_dir, *left_read, *right_read; - char *out_dir = "./", *genome = NULL, *prefix = "\0"; + char *idx_dir, *out_dir = "./", *genome = NULL, *prefix = "\0"; EM_val *em_val; int c, temp_nthread, temp_writebam, temp_nbs; unsigned int n_bs = 0; unsigned long i_start = 0; // Get parameter - left_read = right_read = NULL; - while ((c = getopt(argc - 1, argv + 1, "1:2:t:i:o:b:z:h:w:f:p:")) != -1) { + while ((c = getopt(argc - 1, argv + 1, "t:i:o:b:z:h:w:f:p:")) != -1) { switch (c) { - case '1': - left_read = optarg; - break; - case '2': - right_read = optarg; - break; case 't': temp_nthread = atoi(optarg); if (temp_nthread <= 0) { @@ -209,16 +117,14 @@ void quantility(int argc, char *argv[]) { init_output(idx_dir, out_dir, argc, argv, prefix); //Get alignment - if (left_read != NULL && right_read != NULL) { - fprintf(stdout, "Process paired-end reads in:\n\t 1. %s\n\t 2. %s\n", - left_read, right_read); - process_read_files(left_read, right_read); - } else if (left_read != NULL) { - fprintf(stdout, "Process single-end reads in: %s\n", left_read); - process_read_file(left_read); + if (optind + 2 < argc) { + DEBUG_PRINT("Process paired-end reads in:\n\t 1. %s\n\t 2. %s\n", + argv[optind + 1], argv[optind + 2]); + get_alignment_pair(argv[optind + 1], argv[optind + 2]); } else { - fprintf(stdout, "Input file is not defined\n"); - print_usage(); + DEBUG_PRINT("Process single-end reads in: %s\n", + argv[optind + 1]); + get_alignment(argv[optind + 1]); } // Flush and end file