KCC - Kayte C Compiler 1.10.0
A C compiler implementation with preprocessor, lexer, parser, and code generator
Loading...
Searching...
No Matches
codegen.c
Go to the documentation of this file.
1
42
43#include "kcc.h"
44#include <stdio.h>
45#include <stdlib.h>
46#include <stdarg.h>
47
48// Detect architecture at compile time
49#ifdef __aarch64__
50#define TARGET_ARM64 1
51#define TARGET_X86_64 0
52#elif defined(__x86_64__)
53#define TARGET_ARM64 0
54#define TARGET_X86_64 1
55#else
56#error "Unsupported architecture"
57#endif
58
59CodeGenerator *codegen_create(const char *output_filename) {
60 CodeGenerator *codegen = malloc(sizeof(CodeGenerator));
61 if (!codegen) {
62 error_fatal("Memory allocation failed for code generator");
63 return NULL;
64 }
65
66 codegen->output_file = fopen(output_filename, "w");
67 if (!codegen->output_file) {
68 free(codegen);
69 error_fatal("Could not open output file '%s'", output_filename);
70 return NULL;
71 }
72
73 codegen->label_counter = 0;
74 codegen->temp_counter = 0;
75
76 return codegen;
77}
78
79void codegen_destroy(CodeGenerator *codegen) {
80 if (codegen) {
81 if (codegen->output_file) {
82 fclose(codegen->output_file);
83 }
84 free(codegen);
85 }
86}
87
88void codegen_emit(CodeGenerator *codegen, const char *format, ...) {
89 va_list args;
90 va_start(args, format);
91 vfprintf(codegen->output_file, format, args);
92 va_end(args);
93 fprintf(codegen->output_file, "\n");
94}
95
96char *codegen_new_label(CodeGenerator *codegen) {
97 char *label = malloc(32);
98 if (!label) {
99 error_fatal("Memory allocation failed for label");
100 return NULL;
101 }
102 snprintf(label, 32, "L%d", codegen->label_counter++);
103 return label;
104}
105
106char *codegen_new_temp(CodeGenerator *codegen) {
107 char *temp = malloc(32);
108 if (!temp) {
109 error_fatal("Memory allocation failed for temp");
110 return NULL;
111 }
112 snprintf(temp, 32, "t%d", codegen->temp_counter++);
113 return temp;
114}
115
116bool codegen_generate(CodeGenerator *codegen, ASTNode *ast) {
117 if (!codegen || !ast) {
118 return false;
119 }
120
121 // Generate assembly header
122#if TARGET_ARM64
123 codegen_emit(codegen, "// Generated by KCC (ARM64/Apple Silicon) v%s", KCC_VERSION);
124 codegen_emit(codegen, ".section __TEXT,__text,regular,pure_instructions");
125 codegen_emit(codegen, ".build_version macos, 11, 0");
126 codegen_emit(codegen, ".globl _main");
127 codegen_emit(codegen, ".p2align 2");
128#else
129 codegen_emit(codegen, "# Generated by KCC (x86-64) v%s", KCC_VERSION);
130 codegen_emit(codegen, ".section __TEXT,__text,regular,pure_instructions");
131 codegen_emit(codegen, ".globl _main");
132#endif
133 codegen_emit(codegen, "");
134
135 // Generate code for the program
136 codegen_program(codegen, ast);
137
138 // Generate main entry point
139 codegen_emit(codegen, "");
140 codegen_emit(codegen, "_main:");
141
142#if TARGET_ARM64
143 codegen_emit(codegen, " stp fp, lr, [sp, #-16]!");
144 codegen_emit(codegen, " mov fp, sp");
145 codegen_emit(codegen, " bl _main_func");
146 codegen_emit(codegen, " mov w0, #0");
147 codegen_emit(codegen, " ldp fp, lr, [sp], #16");
148 codegen_emit(codegen, " ret");
149#else
150 codegen_emit(codegen, " pushq %%rbp");
151 codegen_emit(codegen, " movq %%rsp, %%rbp");
152 codegen_emit(codegen, " callq _main_func");
153 codegen_emit(codegen, " movq $0x2000001, %%rax");
154 codegen_emit(codegen, " movq $0, %%rdi");
155 codegen_emit(codegen, " syscall");
156 codegen_emit(codegen, " popq %%rbp");
157 codegen_emit(codegen, " retq");
158#endif
159
160 return true;
161}
162
163void codegen_program(CodeGenerator *codegen, ASTNode *node) {
164 if (node->type != AST_PROGRAM) return;
165
166 for (int i = 0; i < node->data.program.declaration_count; i++) {
167 ASTNode *decl = node->data.program.declarations[i];
168
169 if (decl->type == AST_FUNCTION_DECLARATION) {
170 codegen_function_declaration(codegen, decl);
171 } else if (decl->type == AST_VARIABLE_DECLARATION) {
172 codegen_variable_declaration(codegen, decl);
173 }
174 }
175}
176
177void codegen_function_declaration(CodeGenerator *codegen, ASTNode *node) {
178 if (node->type != AST_FUNCTION_DECLARATION) return;
179
180 codegen_emit(codegen, "");
181#if TARGET_ARM64
182 codegen_emit(codegen, "// Function: %s", node->data.function_decl.name);
183#else
184 codegen_emit(codegen, "# Function: %s", node->data.function_decl.name);
185#endif
186
187 if (strcmp(node->data.function_decl.name, "main") == 0) {
188 codegen_emit(codegen, "_main_func:");
189 } else {
190 codegen_emit(codegen, "_%s:", node->data.function_decl.name);
191 }
192
193 // Function prologue
194#if TARGET_ARM64
195 codegen_emit(codegen, " stp fp, lr, [sp, #-16]!");
196 codegen_emit(codegen, " mov fp, sp");
197#else
198 codegen_emit(codegen, " pushq %%rbp");
199 codegen_emit(codegen, " movq %%rsp, %%rbp");
200#endif
201
202 if (node->data.function_decl.body) {
203 codegen_compound_statement(codegen, node->data.function_decl.body);
204 }
205
206 // Function epilogue - default return 0
207#if TARGET_ARM64
208 codegen_emit(codegen, " mov w0, #0");
209 codegen_emit(codegen, " ldp fp, lr, [sp], #16");
210 codegen_emit(codegen, " ret");
211#else
212 codegen_emit(codegen, " movq $0, %%rax");
213 codegen_emit(codegen, " movq %%rbp, %%rsp");
214 codegen_emit(codegen, " popq %%rbp");
215 codegen_emit(codegen, " retq");
216#endif
217}
218
219void codegen_variable_declaration(CodeGenerator *codegen, ASTNode *node) {
220 if (node->type != AST_VARIABLE_DECLARATION) return;
221
222#if TARGET_ARM64
223 codegen_emit(codegen, "// Variable: %s", node->data.var_decl.name);
224#else
225 codegen_emit(codegen, "# Variable: %s", node->data.var_decl.name);
226#endif
227
228 if (node->data.var_decl.initializer) {
229 codegen_expression(codegen, node->data.var_decl.initializer);
230#if TARGET_ARM64
231 codegen_emit(codegen, " // Store result in %s", node->data.var_decl.name);
232#else
233 codegen_emit(codegen, " # Store result in %s", node->data.var_decl.name);
234#endif
235 }
236}
237
238void codegen_statement(CodeGenerator *codegen, ASTNode *node) {
239 switch (node->type) {
240 case AST_COMPOUND_STATEMENT:
241 codegen_compound_statement(codegen, node);
242 break;
243 case AST_EXPRESSION_STATEMENT:
244 codegen_expression_statement(codegen, node);
245 break;
246 case AST_RETURN_STATEMENT:
247 codegen_return_statement(codegen, node);
248 break;
249 case AST_IF_STATEMENT:
250 codegen_if_statement(codegen, node);
251 break;
252 case AST_WHILE_STATEMENT:
253 codegen_while_statement(codegen, node);
254 break;
255 case AST_FOR_STATEMENT:
256 codegen_for_statement(codegen, node);
257 break;
258 case AST_VARIABLE_DECLARATION:
259 codegen_variable_declaration(codegen, node);
260 break;
261 default:
262#if TARGET_ARM64
263 codegen_emit(codegen, " // Unsupported statement type: %s",
264 ast_node_type_to_string(node->type));
265#else
266 codegen_emit(codegen, " # Unsupported statement type: %s",
267 ast_node_type_to_string(node->type));
268#endif
269 break;
270 }
271}
272
273void codegen_compound_statement(CodeGenerator *codegen, ASTNode *node) {
274 if (node->type != AST_COMPOUND_STATEMENT) return;
275
276 for (int i = 0; i < node->data.compound_stmt.statement_count; i++) {
277 codegen_statement(codegen, node->data.compound_stmt.statements[i]);
278 }
279}
280
281void codegen_expression_statement(CodeGenerator *codegen, ASTNode *node) {
282 if (node->type != AST_EXPRESSION_STATEMENT) return;
283
284 if (node->data.expression_stmt.expression) {
285 codegen_expression(codegen, node->data.expression_stmt.expression);
286 }
287}
288
289void codegen_return_statement(CodeGenerator *codegen, ASTNode *node) {
290 if (node->type != AST_RETURN_STATEMENT) return;
291
292 if (node->data.return_stmt.expression) {
293 codegen_expression(codegen, node->data.return_stmt.expression);
294#if TARGET_ARM64
295 codegen_emit(codegen, " // Result already in w0/x0");
296#else
297 codegen_emit(codegen, " # Result already in %%rax");
298#endif
299 } else {
300#if TARGET_ARM64
301 codegen_emit(codegen, " mov w0, #0");
302#else
303 codegen_emit(codegen, " movq $0, %%rax");
304#endif
305 }
306
307#if TARGET_ARM64
308 codegen_emit(codegen, " ldp fp, lr, [sp], #16");
309 codegen_emit(codegen, " ret");
310#else
311 codegen_emit(codegen, " movq %%rbp, %%rsp");
312 codegen_emit(codegen, " popq %%rbp");
313 codegen_emit(codegen, " retq");
314#endif
315}
316
317void codegen_if_statement(CodeGenerator *codegen, ASTNode *node) {
318 if (node->type != AST_IF_STATEMENT) return;
319
320 char *else_label = codegen_new_label(codegen);
321 char *end_label = codegen_new_label(codegen);
322
323 codegen_expression(codegen, node->data.if_stmt.condition);
324#if TARGET_ARM64
325 codegen_emit(codegen, " cmp w0, #0");
326 codegen_emit(codegen, " b.eq %s", else_label);
327#else
328 codegen_emit(codegen, " testq %%rax, %%rax");
329 codegen_emit(codegen, " jz %s", else_label);
330#endif
331
332 codegen_statement(codegen, node->data.if_stmt.then_stmt);
333#if TARGET_ARM64
334 codegen_emit(codegen, " b %s", end_label);
335#else
336 codegen_emit(codegen, " jmp %s", end_label);
337#endif
338
339 codegen_emit(codegen, "%s:", else_label);
340 if (node->data.if_stmt.else_stmt) {
341 codegen_statement(codegen, node->data.if_stmt.else_stmt);
342 }
343
344 codegen_emit(codegen, "%s:", end_label);
345
346 free(else_label);
347 free(end_label);
348}
349
350void codegen_while_statement(CodeGenerator *codegen, ASTNode *node) {
351 if (node->type != AST_WHILE_STATEMENT) return;
352
353 char *loop_label = codegen_new_label(codegen);
354 char *end_label = codegen_new_label(codegen);
355
356 codegen_emit(codegen, "%s:", loop_label);
357
358 codegen_expression(codegen, node->data.while_stmt.condition);
359#if TARGET_ARM64
360 codegen_emit(codegen, " cmp w0, #0");
361 codegen_emit(codegen, " b.eq %s", end_label);
362#else
363 codegen_emit(codegen, " testq %%rax, %%rax");
364 codegen_emit(codegen, " jz %s", end_label);
365#endif
366
367 codegen_statement(codegen, node->data.while_stmt.body);
368#if TARGET_ARM64
369 codegen_emit(codegen, " b %s", loop_label);
370#else
371 codegen_emit(codegen, " jmp %s", loop_label);
372#endif
373
374 codegen_emit(codegen, "%s:", end_label);
375
376 free(loop_label);
377 free(end_label);
378}
379
380void codegen_for_statement(CodeGenerator *codegen, ASTNode *node) {
381 if (node->type != AST_FOR_STATEMENT) return;
382
383 char *loop_label = codegen_new_label(codegen);
384 char *update_label = codegen_new_label(codegen);
385 char *end_label = codegen_new_label(codegen);
386
387 if (node->data.for_stmt.init) {
388 codegen_expression(codegen, node->data.for_stmt.init);
389 }
390
391 codegen_emit(codegen, "%s:", loop_label);
392
393 if (node->data.for_stmt.condition) {
394 codegen_expression(codegen, node->data.for_stmt.condition);
395#if TARGET_ARM64
396 codegen_emit(codegen, " cmp w0, #0");
397 codegen_emit(codegen, " b.eq %s", end_label);
398#else
399 codegen_emit(codegen, " testq %%rax, %%rax");
400 codegen_emit(codegen, " jz %s", end_label);
401#endif
402 }
403
404 codegen_statement(codegen, node->data.for_stmt.body);
405
406 codegen_emit(codegen, "%s:", update_label);
407 if (node->data.for_stmt.update) {
408 codegen_expression(codegen, node->data.for_stmt.update);
409 }
410#if TARGET_ARM64
411 codegen_emit(codegen, " b %s", loop_label);
412#else
413 codegen_emit(codegen, " jmp %s", loop_label);
414#endif
415
416 codegen_emit(codegen, "%s:", end_label);
417
418 free(loop_label);
419 free(update_label);
420 free(end_label);
421}
422
423void codegen_expression(CodeGenerator *codegen, ASTNode *node) {
424 switch (node->type) {
425 case AST_BINARY_OP:
426 codegen_binary_expression(codegen, node);
427 break;
428 case AST_UNARY_OP:
429 codegen_unary_expression(codegen, node);
430 break;
431 case AST_FUNCTION_CALL:
432 codegen_call_expression(codegen, node);
433 break;
434 case AST_IDENTIFIER:
435 codegen_identifier(codegen, node);
436 break;
437 case AST_NUMBER_LITERAL:
438 codegen_number(codegen, node);
439 break;
440 case AST_STRING_LITERAL:
441 codegen_string(codegen, node);
442 break;
443 case AST_ASSIGNMENT:
444 codegen_assignment(codegen, node);
445 break;
446 default:
447#if TARGET_ARM64
448 codegen_emit(codegen, " // Unsupported expression type: %s",
449 ast_node_type_to_string(node->type));
450#else
451 codegen_emit(codegen, " # Unsupported expression type: %s",
452 ast_node_type_to_string(node->type));
453#endif
454 break;
455 }
456}
457
458void codegen_binary_expression(CodeGenerator *codegen, ASTNode *node) {
459 if (node->type != AST_BINARY_OP) return;
460
461 codegen_expression(codegen, node->data.binary_expr.left);
462#if TARGET_ARM64
463 codegen_emit(codegen, " str w0, [sp, #-16]!");
464#else
465 codegen_emit(codegen, " pushq %%rax");
466#endif
467
468 codegen_expression(codegen, node->data.binary_expr.right);
469#if TARGET_ARM64
470 codegen_emit(codegen, " mov w1, w0");
471 codegen_emit(codegen, " ldr w0, [sp], #16");
472#else
473 codegen_emit(codegen, " movq %%rax, %%rbx");
474 codegen_emit(codegen, " popq %%rax");
475#endif
476
477 switch (node->data.binary_expr.operator) {
478 case TOKEN_PLUS:
479#if TARGET_ARM64
480 codegen_emit(codegen, " add w0, w0, w1");
481#else
482 codegen_emit(codegen, " addq %%rbx, %%rax");
483#endif
484 break;
485 case TOKEN_MINUS:
486#if TARGET_ARM64
487 codegen_emit(codegen, " sub w0, w0, w1");
488#else
489 codegen_emit(codegen, " subq %%rbx, %%rax");
490#endif
491 break;
492 case TOKEN_MULTIPLY:
493#if TARGET_ARM64
494 codegen_emit(codegen, " mul w0, w0, w1");
495#else
496 codegen_emit(codegen, " imulq %%rbx, %%rax");
497#endif
498 break;
499 case TOKEN_GREATER:
500#if TARGET_ARM64
501 codegen_emit(codegen, " cmp w0, w1");
502 codegen_emit(codegen, " cset w0, gt");
503#else
504 codegen_emit(codegen, " cmpq %%rbx, %%rax");
505 codegen_emit(codegen, " setg %%al");
506 codegen_emit(codegen, " movzbq %%al, %%rax");
507#endif
508 break;
509 default:
510#if TARGET_ARM64
511 codegen_emit(codegen, " // Unsupported binary operator: %s",
512 token_type_to_string(node->data.binary_expr.operator));
513#else
514 codegen_emit(codegen, " # Unsupported binary operator: %s",
515 token_type_to_string(node->data.binary_expr.operator));
516#endif
517 break;
518 }
519}
520
521void codegen_unary_expression(CodeGenerator *codegen, ASTNode *node) {
522 if (node->type != AST_UNARY_OP) return;
523
524 codegen_expression(codegen, node->data.unary_expr.operand);
525
526 switch (node->data.unary_expr.operator) {
527 case TOKEN_MINUS:
528#if TARGET_ARM64
529 codegen_emit(codegen, " neg w0, w0");
530#else
531 codegen_emit(codegen, " negq %%rax");
532#endif
533 break;
534 case TOKEN_NOT:
535#if TARGET_ARM64
536 codegen_emit(codegen, " cmp w0, #0");
537 codegen_emit(codegen, " cset w0, eq");
538#else
539 codegen_emit(codegen, " testq %%rax, %%rax");
540 codegen_emit(codegen, " setz %%al");
541 codegen_emit(codegen, " movzbq %%al, %%rax");
542#endif
543 break;
544 default:
545#if TARGET_ARM64
546 codegen_emit(codegen, " // Unsupported unary operator: %s",
547 token_type_to_string(node->data.unary_expr.operator));
548#else
549 codegen_emit(codegen, " # Unsupported unary operator: %s",
550 token_type_to_string(node->data.unary_expr.operator));
551#endif
552 break;
553 }
554}
555
556void codegen_call_expression(CodeGenerator *codegen, ASTNode *node) {
557 if (node->type != AST_FUNCTION_CALL) return;
558
559#if TARGET_ARM64
560 // ARM64 calling convention uses x0-x7 for first 8 args
561 for (int i = 0; i < node->data.call_expr.argument_count && i < 8; i++) {
562 codegen_expression(codegen, node->data.call_expr.arguments[i]);
563 if (i > 0) {
564 codegen_emit(codegen, " mov x%d, x0", i);
565 }
566 }
567 codegen_emit(codegen, " bl _%s", node->data.call_expr.function_name);
568#else
569 // x86-64 - push args right to left
570 for (int i = node->data.call_expr.argument_count - 1; i >= 0; i--) {
571 codegen_expression(codegen, node->data.call_expr.arguments[i]);
572 codegen_emit(codegen, " pushq %%rax");
573 }
574 codegen_emit(codegen, " callq _%s", node->data.call_expr.function_name);
575 if (node->data.call_expr.argument_count > 0) {
576 codegen_emit(codegen, " addq $%d, %%rsp",
577 node->data.call_expr.argument_count * 8);
578 }
579#endif
580}
581
582void codegen_identifier(CodeGenerator *codegen, ASTNode *node) {
583 if (node->type != AST_IDENTIFIER) return;
584
585#if TARGET_ARM64
586 codegen_emit(codegen, " // Load variable %s", node->data.identifier.name);
587 codegen_emit(codegen, " ldr w0, [fp, #-8]");
588#else
589 codegen_emit(codegen, " # Load variable %s", node->data.identifier.name);
590 codegen_emit(codegen, " movq -8(%%rbp), %%rax");
591#endif
592}
593
594void codegen_number(CodeGenerator *codegen, ASTNode *node) {
595 if (node->type != AST_NUMBER_LITERAL) return;
596
597#if TARGET_ARM64
598 codegen_emit(codegen, " mov w0, #%d", node->data.number.value);
599#else
600 codegen_emit(codegen, " movq $%d, %%rax", node->data.number.value);
601#endif
602}
603
604void codegen_string(CodeGenerator *codegen, ASTNode *node) {
605 if (node->type != AST_STRING_LITERAL) return;
606
607#if TARGET_ARM64
608 codegen_emit(codegen, " // String literal: \"%s\"", node->data.string.value);
609 codegen_emit(codegen, " adrp x0, string_literal_%d@PAGE", codegen->label_counter);
610 codegen_emit(codegen, " add x0, x0, string_literal_%d@PAGEOFF", codegen->label_counter++);
611#else
612 codegen_emit(codegen, " # String literal: \"%s\"", node->data.string.value);
613 codegen_emit(codegen, " movq $string_literal_%d, %%rax", codegen->label_counter++);
614#endif
615}
616
617void codegen_assignment(CodeGenerator *codegen, ASTNode *node) {
618 if (node->type != AST_ASSIGNMENT) return;
619
620 codegen_expression(codegen, node->data.assignment.value);
621
622#if TARGET_ARM64
623 codegen_emit(codegen, " // Assign to %s", node->data.assignment.variable);
624 codegen_emit(codegen, " str w0, [fp, #-8]");
625#else
626 codegen_emit(codegen, " # Assign to %s", node->data.assignment.variable);
627 codegen_emit(codegen, " movq %%rax, -8(%%rbp)");
628#endif
629}
AST Node structure.
Definition types.h:333
CodeGenerator structure.
Definition types.h:528