1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
321
322
323
324
325
326
327
328
329
330
331
332
333
334
335
336
337
338
339
340
341
342
343
344
345
346
347
348
349
350
351
352
353
354
355
356
357
358
359
360
361
362
363
364
365
366
367
368
369
370
371
372
373
374
375
376
377
378
379
380
381
382
383
384
385
386
387
388
389
390
391
392
393
394
395
396
397
398
399
400
401
402
403
404
405
406
407
408
409
410
411
412
413
414
415
416
417
418
419
420
421
422
423
424
425
426
427
428
429
430
431
432
433
434
435
436
437
438
439
440
441
442
443
444
445
446
447
448
449
450
451
452
453
454
455
456
457
458
459
460
461
462
463
464
465
466
467
468
469
470
471
472
473
474
475
476
477
478
479
480
481
482
483
484
485
486
487
488
489
490
491
492
493
494
495
496
497
498
499
500
501
502
503
504
505
506
507
508
509
510
511
512
513
514
515
516
517
518
519
520
521
522
523
524
525
526
527
528
529
530
531
532
533
534
535
536
537
538
539
540
541
542
543
544
545
546
547
548
549
550
551
552
553
554
555
556
557
558
559
560
561
562
563
564
565
566
567
568
569
570
571
572
573
574
|
; add 24 + 16 = 40 to the stack pointer to put argc, argv in the right place
byte 0x48
byte 0x81
byte 0xc4
byte 40
byte 0
byte 0
byte 0
goto main
global output_fd
global defines
global defines_end
function main
argument argv2
argument argv1
argument argv0
argument argc
local input_filename
local output_filename
defines = malloc(4000000)
defines_end = defines
if argc < 3 goto default_filenames
input_filename = argv1
output_filename = argv2
goto got_filenames
:default_filenames
input_filename = .str_default_input_filename
output_filename = .str_default_output_filename
:got_filenames
output_fd = syscall(2, output_filename, 0x241, 420) ; 420 = octal 644
if output_fd >= 0 goto output_file_good
file_error(output_filename)
:output_file_good
preprocess(input_filename, output_fd)
close(output_fd)
free(defines)
exit(0)
:str_default_input_filename
string in04a
byte 0
:str_default_output_filename
string out04a
byte 0
function preprocess
argument input_filename
argument output_fd
local input_fd
global 2048 line_buf
local line
local b
local p
local c
local line_number
line_number = 0
line = &line_buf
; first, open the input file
input_fd = syscall(2, input_filename, 0)
if input_fd >= 0 goto input_file_good
file_error(input_filename)
:input_file_good
; output a line directive
fputs(output_fd, .str_line1)
fputs(output_fd, input_filename)
fputc(output_fd, 10)
:preprocess_loop
line_number += 1
b = fgets(input_fd, line, 2000)
if b == 0 goto preprocess_eof
b = str_startswith(line, .str_define)
if b != 0 goto handle_define
b = str_startswith(line, .str_include)
if b != 0 goto handle_include
; normal line (not #define or #include)
p = line
:normal_line_loop
c = *1p
if c == 0 goto normal_line_loop_end
; optimization: don't look this up if it doesn't start with an uppercase letter
b = isupper(c)
if b == 0 goto no_replacement
b = look_up_define(p)
if b == 0 goto no_replacement
; wow! a replacement!
fputs(output_fd, b)
; advance p past this identifier
:advance_loop
c = *1p
b = is_ident(c)
if b == 0 goto normal_line_loop
p += 1
goto advance_loop
:no_replacement
fputc(output_fd, c)
p += 1
goto normal_line_loop
:normal_line_loop_end
fputc(output_fd, 10)
goto preprocess_loop
:handle_define
local def
def = line + 8 ; 8 = length of "#define "
; make sure define name only consists of identifier characters
p = def
c = *1p
b = isupper(c)
if b == 0 goto bad_define
:define_check_loop
c = *1p
if c == 32 goto define_check_loop_end
b = is_ident(c)
if b == 0 goto bad_define
p += 1
goto define_check_loop
:define_check_loop_end
b = look_up_define(def)
if b != 0 goto redefinition
defines_end = strcpy(defines_end, def)
defines_end += 1
fputc(output_fd, 10) ; don't screw up line numbers
goto preprocess_loop
:bad_define
fputs(2, .str_bad_define)
fputs(2, line)
fputc(2, 10)
exit(1)
:redefinition
fputs(2, .str_redefinition)
fputs(2, line)
fputc(2, 10)
exit(1)
:handle_include
local included_filename
local n
included_filename = line + 9 ; 9 = length of "#include "
preprocess(included_filename, output_fd)
; reset filename and line number
fputs(output_fd, .str_line)
n = line_number + 1
fputn(output_fd, n)
fputc(output_fd, 32)
fputs(output_fd, input_filename)
fputc(output_fd, 10)
goto preprocess_loop
:preprocess_eof
close(input_fd)
return
:str_redefinition
string Preprocessor redefinition:
byte 32
byte 0
:str_bad_define
string Bad preprocessor definition:
byte 32
byte 0
:str_define
string #define
byte 32
byte 0
:str_include
string #include
byte 32
byte 0
:str_line
string #line
byte 32
byte 0
:str_line1
string #line
byte 32
string 1
byte 32
byte 0
; returns a pointer to the thing str should be replaced with,
; or 0 if there is no definition for str.
function look_up_define
argument str
local lookup
local p
local c
lookup = defines
:lookup_loop
c = *1lookup
if c == 0 goto lookup_not_found
c = ident_eq(str, lookup)
if c == 1 goto lookup_found
lookup = memchr(lookup, 0)
lookup += 1
goto lookup_loop
:lookup_not_found
return 0
:lookup_found
p = memchr(lookup, 32)
return p + 1 ; the character after the space following the name is the replacement
; returns 1 if the identifiers s1 and s2 are equal; 0 otherwise
function ident_eq
argument s1
argument s2
local p1
local p2
local c1
local c2
local b1
local b2
p1 = s1
p2 = s2
:ident_eq_loop
c1 = *1p1
c2 = *1p2
b1 = is_ident(c1)
b2 = is_ident(c2)
if b1 != b2 goto return_0
if b1 == 0 goto return_1
if c1 != c2 goto return_0
p1 += 1
p2 += 1
goto ident_eq_loop
function is_ident
argument c
if c < '0 goto return_0
if c <= '9 goto return_1
if c < 'A goto return_0
if c <= 'Z goto return_1
if c == '_ goto return_1
goto return_0
function file_error
argument name
fputs(2, .str_file_error)
fputs(2, name)
fputc(2, 10)
exit(1)
:str_file_error
string Error opening file:
byte 32
byte 0
function malloc
argument size
local total_size
local memory
total_size = size + 8
memory = syscall(9, 0, total_size, 3, 0x22, -1, 0)
if memory ] 0xffffffffffff0000 goto malloc_failed
*8memory = total_size
return memory + 8
:malloc_failed
fputs(2, .str_out_of_memory)
exit(1)
:str_out_of_memory
string Out of memory.
byte 10
byte 0
function free
argument memory
local psize
local size
psize = memory - 8
size = *8psize
syscall(11, psize, size)
return
; returns a pointer to a null-terminated string containing the number given
function itos
global 32 itos_string
argument x
local c
local p
p = &itos_string
p += 30
:itos_loop
c = x % 10
c += '0
*1p = c
x /= 10
if x == 0 goto itos_loop_end
p -= 1
goto itos_loop
:itos_loop_end
return p
; returns the number at the start of the given string
function stoi
argument s
local p
local n
local c
n = 0
p = s
:stoi_loop
c = *1p
if c < '0 goto stoi_loop_end
if c > '9 goto stoi_loop_end
n *= 10
n += c - '0
p += 1
goto stoi_loop
:stoi_loop_end
return n
function memchr
argument mem
argument c
local p
p = mem
:memchr_loop
if *1p == c goto memchr_loop_end
p += 1
goto memchr_loop
:memchr_loop_end
return p
function strlen
argument s
local p
p = s
:strlen_loop
if *1p == 0 goto strlen_loop_end
p += 1
goto strlen_loop
:strlen_loop_end
return p - s
function strcpy
argument dest
argument src
local p
local q
local c
p = dest
q = src
:strcpy_loop
c = *1q
*1p = c
if c == 0 goto strcpy_loop_end
p += 1
q += 1
goto strcpy_loop
:strcpy_loop_end
return p
function str_startswith
argument s
argument prefix
local p
local q
local c1
local c2
p = s
q = prefix
:str_startswith_loop
c1 = *1p
c2 = *1q
if c2 == 0 goto return_1
if c1 != c2 goto return_0
p += 1
q += 1
goto str_startswith_loop
function fputs
argument fd
argument s
local length
length = strlen(s)
syscall(1, fd, s, length)
return
function puts
argument s
fputs(1, s)
return
function fputn
argument fd
argument n
local s
s = itos(n)
fputs(fd, s)
return
function fputc
argument fd
argument c
syscall(1, fd, &c, 1)
return
function putc
argument c
fputc(1, c)
return
; returns 0 at end of file
function fgetc
argument fd
local c
c = 0
syscall(0, fd, &c, 1)
return c
; read a line from fd as a null-terminated string
; returns 0 at end of file, 1 otherwise
function fgets
argument fd
argument buf
argument size
local p
local end
local c
p = buf
end = buf + size
:fgets_loop
c = fgetc(fd)
if c == 0 goto fgets_eof
if c == 10 goto fgets_eol
*1p = c
p += 1
if p == end goto fgets_eob
goto fgets_loop
:fgets_eol ; end of line
*1p = 0
return 1
:fgets_eof ; end of file
*1p = 0
return 0
:fgets_eob ; end of buffer
p -= 1
*1p = 0
return 1
function close
argument fd
syscall(3, fd)
return
function isupper
argument c
if c < 'A goto return_0
if c <= 'Z goto return_1
goto return_0
function exit
argument status_code
syscall(0x3c, status_code)
:return_0
return 0
:return_1
return 1
function syscall
; I've done some testing, and this should be okay even if
; rbp-56 goes beyond the end of the stack.
; mov rax, [rbp-16]
byte 0x48
byte 0x8b
byte 0x85
byte 0xf0
byte 0xff
byte 0xff
byte 0xff
; mov rdi, rax
byte 0x48
byte 0x89
byte 0xc7
; mov rax, [rbp-24]
byte 0x48
byte 0x8b
byte 0x85
byte 0xe8
byte 0xff
byte 0xff
byte 0xff
; mov rsi, rax
byte 0x48
byte 0x89
byte 0xc6
; mov rax, [rbp-32]
byte 0x48
byte 0x8b
byte 0x85
byte 0xe0
byte 0xff
byte 0xff
byte 0xff
; mov rdx, rax
byte 0x48
byte 0x89
byte 0xc2
; mov rax, [rbp-40]
byte 0x48
byte 0x8b
byte 0x85
byte 0xd8
byte 0xff
byte 0xff
byte 0xff
; mov r10, rax
byte 0x49
byte 0x89
byte 0xc2
; mov rax, [rbp-48]
byte 0x48
byte 0x8b
byte 0x85
byte 0xd0
byte 0xff
byte 0xff
byte 0xff
; mov r8, rax
byte 0x49
byte 0x89
byte 0xc0
; mov rax, [rbp-56]
byte 0x48
byte 0x8b
byte 0x85
byte 0xc8
byte 0xff
byte 0xff
byte 0xff
; mov r9, rax
byte 0x49
byte 0x89
byte 0xc1
; mov rax, [rbp-8]
byte 0x48
byte 0x8b
byte 0x85
byte 0xf8
byte 0xff
byte 0xff
byte 0xff
; syscall
byte 0x0f
byte 0x05
return
|