-
Notifications
You must be signed in to change notification settings - Fork 0
/
Copy pathblat.xml
163 lines (148 loc) · 7.87 KB
/
blat.xml
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
<?xml version="1.0"?>
<tool id="ucsc_blat" name="UCSC BLAT Alignment Tool" version="1.2">
<description>Standalone blat sequence search command line tool</description>
<requirements>
<requirement type="package" version="1.0">ucsc_tools_340_for_BLAT</requirement>
</requirements>
<command detect_errors="exit_code"><![CDATA[
#if str($reference_source.reference_source_selector) == "history":
#set $reference_fasta_filename = "localref.fa"
ln -s '${reference_source.database}' '${reference_fasta_filename}' &&
#else:
#set $reference_fasta_filename = str($reference_source.database.fields.path)
#end if
blat
#if $noHead == "yes"
-noHead
#end if
-q=$query_type
-t=$database_type
#if $mask_type.mask == "file.out":
-mask=$mask_type.mask_file
#else:
-mask=$mask_type.mask
#end if
'$reference_fasta_filename'
'${query}'
output
&& sort -k 10,10 -k 12,12n output > '${output_sorted}'
]]></command>
<inputs>
<conditional name="reference_source">
<param name="reference_source_selector" type="select" label="Choose the source for the database">
<option value="cached">Locally cached</option>
<option value="history">History</option>
</param>
<when value="cached">
<param name="database" type="select" label="Select database">
<options from_data_table="all_fasta">
<filter type="sort_by" column="2" />
</options>
<validator type="no_options" message="A built-in database is not available" />
</param>
</when>
<when value="history">
<param name="database" type="data" format="fasta,twobit" label="Using database file, either a .fa, .nib or .2bit file" />
</when>
</conditional>
<param type="data" name="query" format="fasta,twobit" label="Query data, either a .fa, .nib or .2bit file"/>
<param type="select" name="database_type" format="text" multiple="false" label="database type" help="Choose your database type, the default is dnax" argument="-t">
<option value="dna">dna - DNA sequence</option>
<option value="prot">prot - protein sequence</option>
<option value="dnax" selected="true">dnax - DNA sequence translated in six frames to protein</option>
</param>
<param type="select" name="query_type" format="text" multiple="false" label="query type" help="Choose your query type, the default is rnax" argument="-q">
<option value="dna">dna - DNA sequence </option>
<option value="rna">rna - RNA sequence</option>
<option value="prot">prot - protein sequence</option>
<option value="dnax">dnax - DNA sequence translated in six frames to protein</option>
<option value="rnax" selected="true">rnax - DNA sequence translated in three frames to protein</option>
</param>
<param type="select" name="noHead" format="text" label="Suppresses .psl header (so it's just a tab-separated file)." >
<option value="yes" selected="true">Yes, suppresses .psl header</option>
<option value="no">No, do not suppresses .psl header</option>
</param>
<conditional name="mask_type">
<param name="mask" type="select" label="Mask out repeats" help="Alignments won't be started in masked region
but may extend through it in nucleotide searches. Masked areas
are ignored entirely in protein or translated searches. Default is lower" argument="-mask">
<option value="lower" selected="true">lower - mask out lower-cased sequence</option>
<option value="upper">upper - mask out upper-cased sequence</option>
<option value="out">out - mask according to database.out RepeatMasker .out file</option>
<option value="file.out">file.out - mask database according to RepeatMasker file.out</option>
</param>
<when value="lower" />
<when value="upper" />
<when value="out" />
<when value="file.out">
<param name="mask_file" type="data" format="txt" label="RepeatMasker file.out" />
</when>
</conditional>
</inputs>
<outputs>
<data format="psl" name="output_sorted" label="${tool.name} on ${on_string}"></data>
</outputs>
<tests>
<!-- test on query of GenBank RefSeq records for Gallus gallus and database of Amazona vittata -->
<test>
<param name="reference_source_selector" value="history" />
<param name="database" value="amaVit1_Gallus/amaVit1.fa" />
<param name="query" value="amaVit1_Gallus/Gallus_gallus_RefSeq.fa" />
<param name="database_type" value="dnax" />
<param name="query_type" value="rnax" />
<param name="noHead" value="yes" />
<param name="mask" value="lower" />
<output name="output_sorted" value="amaVit1_Gallus/amaVit1_Gallus_gallus.psl" />
</test>
<!-- test on query of partial mRNA of Drosophila melanogaster and the database of Drosophila biamipes dot chromosome -->
<test>
<param name="reference_source_selector" value="history" />
<param name="database" value="dbia3/dbia3.fa" />
<param name="query" value="dbia3/dmel-transcript.fa" />
<param name="database_type" value="dnax" />
<param name="query_type" value="rnax" />
<param name="noHead" value="yes" />
<param name="mask" value="lower" />
<output name="output_sorted" value="dbia3/dbia3.sorted.psl" />
</test>
<!-- test on the database masked by repeat masker -->
<test>
<param name="reference_source_selector" value="history" />
<param name="database" value="dbia3/dbia3_masked.2bit" />
<param name="query" value="dbia3/dmel-transcript.fa" />
<param name="database_type" value="dnax" />
<param name="query_type" value="rnax" />
<param name="noHead" value="yes" />
<param name="mask" value="file.out" />
<param name="mask_file" value="dbia3/dbia3_RM.out" />
<output name="output_sorted" value="dbia3/dbia3_masked.sorted.psl" />
</test>
</tests>
<help>
<![CDATA[
BLAT
====
BLAT is a bioinformatics software a tool which performs rapid mRNA/DNA and cross-species protein alignments.
blat (version: v340)- Standalone blat sequence search command line tool.
-------------------------------------------------------------------------
usage:
++++++
$ blat database query [-ooc=11.ooc] output.psl
where:
database and query are each either a .fa, .nib or .2bit file,
or a list of these files with one file name per line.
-ooc=11.ooc tells the program to load over-occurring 11-mers from
an external file. This will increase the speed
by a factor of 40 in many cases, but is not required.
output.psl is the name of the output file.
documentation:
++++++++++++++
See Blat documentation (http://genome.ucsc.edu/goldenPath/help/blatSpec.html)
Source code:
++++++++++++
http://hgdownload.cse.ucsc.edu/admin/exe/
]]></help>
<citations>
<citation type="doi">10.1101/gr.229202</citation>
</citations>
</tool>