-
Notifications
You must be signed in to change notification settings - Fork 12
/
Copy pathpyrad_trim.py
executable file
·58 lines (52 loc) · 1.95 KB
/
pyrad_trim.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
#!/usr/bin/env python
"""
Trims sequence length in PyRAD/ipyrad `.alleles` or `.loci` file.
"""
import sys
import argparse
__author__ = 'Pim Bongaerts'
__copyright__ = 'Copyright (C) 2016 Pim Bongaerts'
__license__ = 'GPL'
def determine_seq_start(line):
""" Determine the start position of the sequence """
sample, seq = line.split()
seq_start = line.find(seq)
if seq_start < 0:
sys.exit('Unexpected error')
else:
return seq_start
def main(pyrad_filename, seq_length):
# Open input file
pyrad_file = open(pyrad_filename, 'r')
# Iterate through file and trim sequences
seq_start = 0
for line in pyrad_file:
if line[0] == '/':
# Extract locus info and number
cols = line.strip().split('|')
locus_info = cols[0]
locus_nr = cols[1]
# Trim locus info
locus_info_trim = locus_info[0:output_line_length]
print('{0}|{1}'.format(locus_info_trim, locus_nr))
else:
# Determine delimiter if not yet set (first line)
if seq_start == 0:
seq_start = determine_seq_start(line)
# Extract sample name and sequence
sample, seq = line.strip().split()
# Trim sequence and determine new line length
trim_seq = seq[0:int(seq_length)]
# Output trimmed sequence
output_line = '{0}{1}'.format(sample.ljust(seq_start), trim_seq)
print(output_line)
output_line_length = len(output_line)
pyrad_file.close()
if __name__ == '__main__':
parser = argparse.ArgumentParser(description=__doc__)
parser.add_argument('pyrad_filename', metavar='pyrad_file',
help='PyRAD allele file (`.loci` or `.allele`)')
parser.add_argument('seq_length', type=int,
help='length to which sequences are trimmed')
args = parser.parse_args()
main(args.pyrad_filename, args.seq_length)