-
Notifications
You must be signed in to change notification settings - Fork 1
/
Copy pathmas.pl
198 lines (171 loc) · 6.47 KB
/
mas.pl
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
#!/usr/bin/env perl
use strict; use warnings;
# a MIPS Assembler
# usage: ./mas.pl infile.asm > outfile.hex
# - it outputs to stdout; redirect it to a file if you need so.
# - exactly one file shall be supplied as arg.
# every line in the input file shall be either an instruction, a comment,
# an empty line (has spaces only), or a labeled instruction, which is a label
# followed by an instrucion on the same line. Lable-only lines are NOT well
# supported.
# an instruction shall exist in %r_instr or %i_instr.
# a lable shall match /[a-zA-Z_][a-zA-Z_0-9]*/ (ie, C-like identifier),
# followed by a colon.
# License: CC0 (Public Domain)
# See the following page for updates:
# https://gist.github.com/noureddin/3c4e6eb3798fcd6388873b10d0c8fa96
sub make_hash_from_keys { return map { $_ => undef } @_ }
# definitions:
my $LBL_REGEX = '[a-zA-Z_][a-zA-Z_0-9]*';
# mips registers
my @reg = qw(zero at v0 v1 a0 a1 a2 a3 t0 t1 t2 t3 t4 t5 t6 t7
s0 s1 s2 s3 s4 s5 s6 s7 t8 t9 k0 k1 gp sp fp ra);
my %reg = map { $_ => $_, $reg[$_] => $_ } (0..31);
# the following are dictionaries for the supported instructions of a certain
# format. they are used for easier detecting of the instruction format.
my %r_instr = make_hash_from_keys(qw[ nop add sub sll and or ]);
my %i_instr = make_hash_from_keys(qw[ sw lw beq ]);
# opcode for the instructions
my %op = map { $_ => 0 } qw[ add sub sll nop and or ];
$op{sw} = 43;
$op{lw} = 35;
$op{beq} = 4;
# functcode for R-format instructions, the last field
my %fn = (
add => 32,
sub => 34,
sll => 0,
nop => 0,
and => 36,
or => 37,
);
# for easier decoding, these dectionaries are instroducted.
# binary:
# for R-format instructions of the form: INSTR $2, $3, $4
# that is assembled into: OP $3 $4 $2 0 FUNCT
my %binary = make_hash_from_keys(qw[ add sub and or ]);
# NOP:
my %nop = make_hash_from_keys(qw[ nop ]);
# shift:
# for R-format instructions of the form: INSTR $2, $3, 4
# that is assembled into: OP 0 $3 $2 4 FUNCT
my %shift = make_hash_from_keys(qw[ sll ]);
# immediate:
# for I-format instructions of the form: INSTR $2, $3, 4
# that is assembled into: OP $3 $2 4
my %immediate = make_hash_from_keys(qw[ ]);
# loadstore:
# for I-format instructions of the form: INSTR $2, 3($4)
# that is assembled into: OP $4 $2 3
my %loadstore = make_hash_from_keys(qw[ lw sw ]);
# binary branch:
# for I-format instructions of the form: INSTR $2, $3, LBL
# that is assembled into: OP $2 $3 OFFSET_OF_LBL
my %binary_branch = make_hash_from_keys(qw[ beq ]);
# global vars
my $line_number = 0;
my %lables = (); # store the line number of a lable with it
# main
open INFILE, "<$ARGV[0]" or die $!;
# 1st pass: detect all labels
$line_number = 0;
while (<INFILE>) {
++$line_number; # line numbering starts from 1
chomp; # trim trailing newline
next if /^\s*$/; # ignore empty lines
if (/\s*($LBL_REGEX)\s*:/) {
if (exists $lables{$1}) { warn "WARNING: redefining lable ``$1''\n"; }
#$lables{$l} = (/:\s*.*/) ? $line_number : $line_number + 1;
$lables{$1} = $line_number;
}
}
# 2nd pass: assembling
seek INFILE, 0, 0; # seek to the 1st byte of the file
$line_number = 0;
while (<INFILE>) {
++$line_number; # line numbering starts from 1
chomp; # trim trailing newline
next if /^\s*$/; # ignore empty lines
next if /^\s*#/; # ignore comments
print assemble($_); # convert to instructions to hex
}
close INFILE;
# functions definition
# given an instruction as string, returns it assembled in hex as string.
# it returns zeros (nop) for invalid instructions.
# and returns nothing for lables.
sub assemble {
my $i = shift;
($i) = $i =~ /\s* (?: $LBL_REGEX \s* :)? \s* (.*) \s*/x;
return if $i =~ /^\s*$/;
my ($instr_name) = $i =~ /(\S+)/;
if (exists $r_instr{$instr_name}) {
assemble_r($i, $instr_name);
} elsif (exists $i_instr{$instr_name}) {
assemble_i($i, $instr_name);
} else {
warn "WARNING: unrecognized instruction ``$1'' at line $line_number\n";
return sprintf "%08x\n", 0; # nop
}
}
# given an R-format instruction as a string, returns its hex representation
sub assemble_r {
# R-format: | op (6) | rs (5) | rt (5) | rd (5) | sa (5) | fn (6) |
# 31 26 25 21 20 16 15 11 10 6 5 0
my ($i, $instr_name) = @_;
if (exists $binary{$instr_name}) {
$i =~ /(\S+) \s+ \$([a-z0-9]+) \s*, \s*
\$([a-z0-9]+) \s*, \s*
\$([a-z0-9]+)/x;
my $hex = $op{$1} << 26 |
$reg{$3} << 21 |
$reg{$4} << 16 |
$reg{$2} << 11 |
$fn{$1};
return sprintf "%08x\n", $hex;
} elsif (exists $shift{$instr_name}) {
$i =~ /(\S+) \s+ \$([a-z0-9]+) \s*, \s*
\$([a-z0-9]+) \s*, \s*
([0-9]+)/x;
my $hex = $op{$1} << 26 |
$reg{$3} << 16 |
$reg{$2} << 11 |
$4 << 6 |
$fn{$1};
return sprintf "%08x\n", $hex;
} elsif (exists $nop{$instr_name}) {
return sprintf "%08x\n", 0;
} else {
warn "WARNING: the impossible happend "
."for instruction ``$1'' at line $line_number in assemble_r()\n";
return sprintf "%08x\n", 0; # nop
}
}
sub assemble_i {
# I-format: | op (6) | rs (5) | rt (5) | immediate (16) |
# 31 26 25 21 20 16 15 0
my ($i, $instr_name) = @_;
if (exists $loadstore{$instr_name}) {
$i =~ /(\S+) \s+ \$([a-z0-9]+) \s*, \s*
(-?[0-9]+) \s*
\( \s* \$([a-z0-9]+) \s* \)/x;
my $hex = $op{$1} << 26 |
$reg{$4} << 21 |
$reg{$2} << 16 |
$3 & 0xffff;
return sprintf "%08x\n", $hex;
} elsif (exists $binary_branch{$instr_name}) {
$i =~ /(\S+) \s+ \$([a-z0-9]+) \s*, \s*
\$([a-z0-9]+) \s*, \s*
([a-zA-Z_][a-zA-Z_0-9]*)/x;
my $hex = $op{$1} << 26 |
$reg{$2} << 21 |
$reg{$3} << 16 |
$lables{$4}-$line_number-1 & 0xffff;
return sprintf "%08x\n", $hex;
} else {
warn "WARNING: the impossible happend "
."for instruction ``$1'' at line $line_number in assemble_i()\n";
return sprintf "%08x\n", 0; # nop
}
}