-
Notifications
You must be signed in to change notification settings - Fork 0
/
Copy pathgenemodel_ensembl.config.default
176 lines (148 loc) · 5.16 KB
/
genemodel_ensembl.config.default
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
#format: sh
#
# genemodel_ensembl.config.default
#
# This file sets up the environment variables that are needed by the
# gene model QC reports and loader that are specific to Ensembl.
#
###########################################################################
#
# test TR directory
#
TRDIR=/mgi/all/wts2_projects/1500/WTS2-1591/ENSEMBL
export TRDIR
#
# used by other wrappers in bin directory :
# copydownloads.sh, copyinputs.sh, runTest.sh
#
BIOTYPE_FILE_NAME=Mus_musculus.GRCm39.113.gtf.gz
TRANSCRIPT_FILE_NAME=Mus_musculus.GRCm39.cdna.all.fa.gz
NCRNA_FILE_NAME=Mus_musculus.GRCm39.ncrna.fa.gz
PROTEIN_FILE_NAME=Mus_musculus.GRCm39.pep.all.fa.gz
export BIOTYPE_FILE_NAME TRANSCRIPT_FILE_NAME NCRNA_FILE_NAME PROTEIN_FILE_NAME
#
# Reload the gene models (true/false)? If false, only the associations
# will be deleted/reloaded.
#
RELOAD_GENEMODELS=false
export RELOAD_GENEMODELS
# Full path to the default biotype file
# Used to create gene model ID to biotype mapping for createSeqGeneTrapInput.py
#
BIOTYPE_FILE_DEFAULT=${INPUTDIR}/ensembl_biotypes.gz
export BIOTYPE_FILE_DEFAULT
# Full path to the default transcript file
# Used by ensemblseqload
#
TRANSCRIPT_FILE_DEFAULT=${INPUTDIR}/ensembl_transcripts.gz
export TRANSCRIPT_FILE_DEFAULT
# Full path to the current transcript file
#
TRANSCRIPT_FILE_CURRENT=${TRANSCRIPTDIR}/${TRANSCRIPT_FILE_NAME}
export TRANSCRIPT_FILE_CURRENT
# Full path to the default non coding RNA transcript file
# Used by ensemblseqload
#
NCRNA_FILE_DEFAULT=${INPUTDIR}/ensembl_ncrna.gz
export NCRNA_FILE_DEFAULT
# Full path to the current non-coding RNA transcript file
#
NCRNA_FILE_CURRENT=${TRANSCRIPTDIR}/${NCRNA_FILE_NAME}
export NCRNA_FILE_CURRENT
# Full path to the default transcript file
# Used by ensemblseqload
#
PROTEIN_FILE_DEFAULT=${INPUTDIR}/ensembl_proteins.gz
export PROTEIN_FILE_DEFAULT
# Full path to the current protein file
#
PROTEIN_FILE_CURRENT=${PROTEINDIR}/${PROTEIN_FILE_NAME}
export PROTEIN_FILE_CURRENT
# Gene model provider name.
#
GM_PROVIDER=Ensembl
export GM_PROVIDER
# Full path to the current gene model file
#
GM_FILE_CURRENT=${GENEMODELDIR}/ensembl_genemodels.txt
export GM_FILE_CURRENT
# Full path to the default gene model and association files.
#
GM_FILE_DEFAULT=${INPUTDIR}/ensembl_genemodels.txt
ASSOC_FILE_DEFAULT=${INPUTDIR}/ensembl_assoc.txt
export GM_FILE_DEFAULT ASSOC_FILE_DEFAULT
# Full path to the "cleaned up" QC-ready gene model and association files
# that are created and used by the sanity/QC report script.
#
GM_FILE_QC=${OUTPUTDIR}/ensembl_genemodels_qc.txt
ASSOC_FILE_QC=${OUTPUTDIR}/ensembl_assoc_qc.txt
export GM_FILE_QC ASSOC_FILE_QC
# Full path to the load-ready gene model and association file that are
# created by the sanity/QC report script and used as input by the assembly
# sequence loader and the association loader.
#
GM_FILE_LOAD=${OUTPUTDIR}/ensembl_genemodels_load.txt
ASSOC_FILE_LOAD=${OUTPUTDIR}/ensembl_assoc_load.txt
export GM_FILE_LOAD ASSOC_FILE_LOAD
# Minimum number of lines expected for the input files (for sanity check).
#
GM_FILE_MINIMUM_SIZE=20000
ASSOC_FILE_MINIMUM_SIZE=20000
export GM_FILE_MINIMUM_SIZE ASSOC_FILE_MINIMUM_SIZE
# The logical DB for the header of the load-ready association file.
#
ASSOC_FILE_LOGICALDB="Ensembl Gene Model"
export ASSOC_FILE_LOGICALDB
# Full path to the log files.
#
GENEMODELQC_LOGFILE=${LOGDIR}/ensembl_genemodelQC.log
GENEMODELLOAD_LOGFILE=${LOGDIR}/ensembl_genemodelload.log
export GENEMODELQC_LOGFILE GENEMODELLOAD_LOGFILE
# Temp table that will be loaded from the input files.
#
GM_TEMP_TABLE=Ensembl_GM
ASSOC_TEMP_TABLE=Ensembl_Assoc
export GM_TEMP_TABLE ASSOC_TEMP_TABLE
# Full path to the bcp files for loading the input files into temp tables.
#
GM_FILE_BCP=${OUTPUTDIR}/ensembl_genemodels.bcp
ASSOC_FILE_BCP=${OUTPUTDIR}/ensembl_assoc.bcp
export GM_FILE_BCP ASSOC_FILE_BCP
# Full path to the sanity/QC reports.
#
GM_SANITY_RPT=${RPTDIR}/ensembl_genemodels_sanity.rpt
ASSOC_SANITY_RPT=${RPTDIR}/ensembl_assoc_sanity.rpt
INVALID_MARKER_RPT=${RPTDIR}/ensembl_invalid_marker.rpt
SEC_MARKER_RPT=${RPTDIR}/ensembl_sec_marker.rpt
MISSING_GMID_RPT=${RPTDIR}/ensembl_missing_gmid.rpt
CHR_DISCREP_RPT=${RPTDIR}/ensembl_chr_discrep.rpt
DUP_GM_ID_RPT=${RPTDIR}/ensembl_dup_gm_id.rpt
# names of reports containing discrepancies for a given run
RPT_NAMES_RPT=${RPTDIR}/ensembl_reportsWithDiscrepancies.rpt
# names of reports containing warnings (report and load) for a given run
WARNING_RPT_NAMES_RPT=${RPTDIR}/ensembl_reportsWithWarnings.rpt
export GM_SANITY_RPT ASSOC_SANITY_RPT
export INVALID_MARKER_RPT SEC_MARKER_RPT
export MISSING_GMID_RPT CHR_DISCREP_RPT DUP_GM_ID_RPT
export RPT_NAMES_RPT WARNING_RPT_NAMES_RPT
# Name of the configuration file for the assemblyseqload.
#
ASSEMBLY_CONFIG=ensembl_assemblyseqload.config
export ASSEMBLY_CONFIG
#
# seqgenemodelload.sh configuration
#
# use the Ensembl Genomic Sequence Load user
#
USERKEY=1410
export USERKEY
# path to bcp file
#
BCP_FILE_PATH=${OUTPUTDIR}/SEQ_GeneModel.ensembl.bcp
# full path to the log file
#
SEQGENEMODELLOAD_LOGFILE=${LOGDIR}/ensembl_seqgenemodelload.log
# logicalDB of the gene model sequence provider
#
PROVIDER_LOGICALDB=${ASSOC_FILE_LOGICALDB}
export BCP_FILE_PATH SEQGENEMODELLOAD_LOGFILE PROVIDER_LOGICALDB