-
Notifications
You must be signed in to change notification settings - Fork 0
/
Copy pathassembler.y
352 lines (331 loc) · 9.88 KB
/
assembler.y
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
321
322
323
324
325
326
327
328
329
330
331
332
333
334
335
336
337
338
339
340
341
342
343
344
345
346
347
348
349
350
351
352
/*
* This file contains the bison declaration, which handles the parsing and grammer
* portion of the assmebler. Built using the Flex & Bison guide found here
* https://aquamentus.com/flex_bison.html#20
* Authors: [email protected], Tyler Linquata
*/
%{
#include <cstdio>
#include <unordered_map>
#include <vector>
#include <iostream>
#include <string.h>
#include <fstream>
#include "instruction.h"
using namespace std;
// Declare stuff from Flex that Bison needs to know about:
extern int yylex();
extern int yyparse();
extern FILE *yyin;
extern int lineNum;
// stores a map of all labels to their address
unordered_map<string, int> jump_table;
// stores a map of instructions that were called before their label
unordered_map<string, int> unfilled_jumps;
unordered_map<string, int> unfilled_branches;
// stores a list of all parsed instructions
vector<string> instruction_list;
// file that machine code is written to
ofstream writefile;
// tracks the machine code line number
int instruction_list_index;
// various helper methods
void write_instructions_to_file();
void repair_labels();
void label_instruction(string instruction, string label);
void add_instruction(Instruction i);
void process_label(string label);
void yyerror(const char *s);
%}
// Bison fundamentally works by asking flex to get the next token, which it
// returns as an object of type "yystype". Initially (by default), yystype
// is merely a typedef of "int", but for non-trivial projects, tokens could
// be of any arbitrary data type. So, to deal with that, the idea is to
// override yystype's default typedef to be a C union instead. Unions can
// hold all of the types of tokens that Flex could return, and this this means
// we can return ints or floats or strings cleanly. Bison implements this
// mechanism with the %union directive:
%union {
int ival;
float fval;
char *sval;
char *regval;
char *immval;
char *relval;
char *label;
char *comment;
}
// define the constant string tokens:
%token CODEDUMPSTER TYPE
%token END ENDL
// Define the "terminal symbol" token types I'm going to use (in CAPS
// by convention), and associate each with a field of the %union:
%token <ival> INT
%token <fval> FLOAT
%token <sval> INSTR
%token <regval> REG
%token <immval> IMM;
%token <relval> REL;
%token <label> LABEL;
%token <comment> COMMENT;
%%
// the first rule defined is the highest-level rule, which in our
// case is just the concept of a whole assembly file:
assembler:
header body_section footer {
cout << "done with an asm file!" << endl;
}
;
header:
CODEDUMPSTER FLOAT ENDLS {
cout << "reading a codedumpster file version " << $2 << endl;
}
;
body_section:
assembly_lines
;
assembly_lines : assembly_lines reg_type_line
| assembly_lines imm_type_line
| assembly_lines single_reg_line
| assembly_lines single_rel_line
| assembly_lines rel_reg_line
| assembly_lines reg_rel_line
| assembly_lines rel_imm_line
| assembly_lines imm_rel_line
| assembly_lines jump_label
| assembly_lines comment
| assembly_lines branch_line
| assembly_lines op_label_line
| branch_line
| reg_type_line
| imm_type_line
| single_reg_line
| single_rel_line
| rel_reg_line
| reg_rel_line
| rel_imm_line
| imm_rel_line
| jump_label
| comment
| op_label_line
;
branch_line:
INSTR IMM ENDLS {
cout << "op: " << $1 << " imm: " << $2 << endl;
add_instruction(Instruction($1, $2));
free($1);
free($2);
}
;
single_reg_line:
INSTR REG ENDLS {
cout << "op: " << $1 << " Rdst: " << " Rsrc: " << $2 << endl;
add_instruction(Instruction($1, $2));
free($1);
free($2);
}
;
reg_type_line:
INSTR REG REG ENDLS {
cout << "op: " << $1 << " Rdst: " << $3 << " Rsrc: " << $2 << endl;
add_instruction(Instruction($1, $2, $3));
free($1);
free($2);
free($3);
}
;
imm_type_line:
INSTR IMM REG ENDLS {
cout << "op: " << $1 << " Rdst: " << $3 << " Imm: " << $2 << endl;
add_instruction(Instruction($1, $2, $3));
free($1);
free($2);
free($3);
}
;
single_rel_line:
INSTR REL ENDLS {
cout << "op: " << $1 << " relative: " << $2 << endl;
add_instruction(Instruction($1, $2));
free($1);
free($2);
}
;
rel_reg_line:
INSTR REL REG ENDLS {
cout << "R-type op: " << $1 << " rel: " << $2 << " reg_1: " << $3 << endl;
add_instruction(Instruction($1, $2, $3));
free($1);
free($2);
free($3);
}
;
reg_rel_line:
INSTR REG REL ENDLS {
cout << "R-type op: " << $1 << " reg: " << $2 << " rel: " << $3 << endl;
add_instruction(Instruction($1, $2, $3));
free($1);
free($2);
free($3);
}
;
rel_imm_line:
INSTR REL IMM ENDLS {
cout << "I-type op: " << $1 << " rel: " << $2 << " imm: " << $3 << endl;
add_instruction(Instruction($1, $2, $3));
free($1);
free($2);
free($3);
}
;
imm_rel_line:
INSTR IMM REL ENDLS {
cout << "I-type op: " << $1 << " imm: " << $2 << " rel: " << $3 << endl;
add_instruction(Instruction($1, $2, $3));
free($1);
free($2);
free($3);
}
;
jump_label:
LABEL ENDLS {
cout << "this is a label: " << $1 << endl;
process_label($1);
free($1);
}
;
op_label_line:
INSTR LABEL ENDLS {
label_instruction($1, $2);
free($1);
free($2);
}
;
comment: COMMENT ENDLS;
footer:
END ENDLS {
repair_labels();
write_instructions_to_file();
}
;
ENDLS:
ENDLS ENDL
| ENDL ;
%%
// writes all instructions in the instruction list to the specified file
void write_instructions_to_file() {
int i;
for(i = 0; i < instruction_list.size(); i++) {
writefile << instruction_list[i] << endl;
}
}
// fixed instructions that were written with undeclared labels
void repair_labels() {
int replace_line;
string replacement_string;
// iterate over each instruction that needs to be fixed
for(auto const& it: unfilled_jumps) {
replace_line = it.second - 1;
// replace empty moves with correct label address
Instruction i = Instruction("MOVI", "$" + to_string(jump_table[it.first]), "R15");
instruction_list[replace_line] = i.instruction;
}
for(auto const& it: unfilled_branches) {
replace_line = it.second - 1;
// get binary string for displacement by trimming off first 8 characters
Instruction i = Instruction("BNE", "$" + to_string(jump_table[it.first] - replace_line));
// trim off first 8 values as we only care about last 8
replacement_string = i.instruction.substr(8, i.instruction.size());
instruction_list[replace_line] = instruction_list[replace_line].replace(8, 8,
replacement_string);
}
}
// processes an instruction called with a label
void label_instruction(string op, string label) {
// if the operation is jump
if(op[0] == 'J') {
if(jump_table.find(label) != jump_table.end()) {
cout << "op: MOVI, imm: " + to_string(jump_table[label]) + " reg: R15"<< endl;
// put the address into R15
add_instruction(Instruction("MOVI", "$" + to_string(jump_table[label]), "R15"));
}
else {
// because we don't know where the label is yet we fill with zeroes
cout << "op: MOVI, imm: $" + to_string(0) + " reg: R15" << endl;
add_instruction(Instruction("MOVI", "$0", "R15"));
// save this instruction to a list of instructions to be filled at EOF
unfilled_jumps[label] = instruction_list_index;
}
cout << "op: " << op << " reg: R15" << endl;
// branch or jump to value in R15
add_instruction(Instruction(op, "R15"));
}
// if the operation is a branch
if(op[0] == 'B') {
if(jump_table.find(label) != jump_table.end()) {
cout << "op: B, imm: $" << (jump_table[label] - instruction_list_index - 1) << endl;
// put the address into R15
add_instruction(Instruction(op, "$" + to_string(jump_table[label] - instruction_list_index - 1)));
cout << label << endl;
}
else {
// make an empty instruction
add_instruction(Instruction(op, "$0"));
// save the line that needs to be changed later
unfilled_branches[label] = instruction_list_index;
}
}
}
// adds a label to the jump table
void process_label(string label) {
// check if label already exists, return error if dup
if(jump_table.find(label) != jump_table.end()) {
yyerror("Duplicate label.");
}
// add to jump table
else {
jump_table[label] = instruction_list_index + 1;
}
}
// adds an instruction to the instruction list
void add_instruction(Instruction i) {
instruction_list.push_back(i.instruction);
instruction_list_index++;
}
int main(int argc, char *argv[]) {
instruction_list_index = 0;
// Open a file handle to a particular file:
if(argc == 1) {
cout << "Missing command line arguments." << endl;
exit(-1);
}
else if(argc == 2) {
cout << "Missing write file" << endl;
}
else if(argc == 3){
FILE *readfile = fopen(argv[1], "r");
writefile.open(argv[2], ios::trunc);
// Make sure it is valid:
if (!readfile) {
cout << "I can't open that read file!" << endl;
return -1;
}
// Make sure it is valid:
if (!writefile) {
cout << "I can't open that write file!" << endl;
return -1;
}
// Set Flex to read from it instead of defaulting to STDIN:
yyin = readfile;
// Parse through the input:
yyparse();
}
else {
cout << "Too many arguments" << endl;
}
}
void yyerror(const char *s) {
cout << "EEK, parse error on line " << lineNum << "! Message: " << s << endl;
// might as well halt now:
exit(-1);
}