KCC - Kayte C Compiler 1.10.0
A C compiler implementation with preprocessor, lexer, parser, and code generator
Loading...
Searching...
No Matches
parser.c
1#include <stdio.h>
2#include "kcc.h"
3#include "parser.h"
4
5Parser *parser_create(Lexer *lexer) {
6 Parser *parser = malloc(sizeof(Parser));
7 if (!parser) {
8 error_fatal("Memory allocation failed for parser");
9 return NULL;
10 }
11
12 parser->lexer = lexer;
13 parser->current_token = lexer_next_token(lexer);
14 parser->peek_token = lexer_next_token(lexer);
15
16 return parser;
17}
18
19void parser_destroy(Parser *parser) {
20 if (parser) {
21 free(parser);
22 }
23}
24
25void parser_advance(Parser *parser) {
26 parser->current_token = parser->peek_token;
27 parser->peek_token = lexer_next_token(parser->lexer);
28}
29
30bool parser_match(Parser *parser, TokenType type) {
31 return parser->current_token.type == type;
32}
33
34bool parser_expect(Parser *parser, TokenType type) {
35 if (parser_match(parser, type)) {
36 parser_advance(parser);
37 return true;
38 } else {
39 error_syntax(parser->current_token.line, parser->current_token.column,
40 "Expected %s, got %s",
41 token_type_to_string(type),
42 token_type_to_string(parser->current_token.type));
43 return false;
44 }
45}
46
47bool parser_is_type_specifier(TokenType type) {
48 return type == TOKEN_INT || type == TOKEN_CHAR_KW || type == TOKEN_VOID;
49}
50
51DataType parser_parse_type_specifier(Parser *parser) {
52 DataType type = token_to_data_type(parser->current_token.type);
53 if (type != TYPE_UNKNOWN) {
54 parser_advance(parser);
55 return type;
56 }
57
58 error_syntax(parser->current_token.line, parser->current_token.column,
59 "Expected type specifier");
60 return TYPE_UNKNOWN;
61}
62
63// Helper function to convert TokenType to DataType
64DataType token_type_to_data_type(TokenType type) {
65 switch (type) {
66 case TOKEN_INT: return TYPE_INT;
67 case TOKEN_CHAR_KW: return TYPE_CHAR;
68 case TOKEN_VOID: return TYPE_VOID;
69 default: return TYPE_UNKNOWN;
70 }
71}
72
73ASTNode *parser_parse_program(Parser *parser) {
74 ASTNode *program = ast_create_program();
75
76 while (!parser_match(parser, TOKEN_EOF)) {
77 // Skip any UNKNOWN tokens
78 if (parser->current_token.type == TOKEN_UNKNOWN) {
79 parser_advance(parser);
80 continue;
81 }
82
83 ASTNode *declaration = parser_parse_declaration(parser);
84
85 if (declaration) {
86 ast_add_declaration(program, declaration);
87 } else {
88 // Skip to next potential declaration
89 while (!parser_match(parser, TOKEN_EOF) &&
90 !parser_is_type_specifier(parser->current_token.type)) {
91 parser_advance(parser);
92 }
93 }
94 }
95
96 return program;
97}
98
99ASTNode *parser_parse_declaration(Parser *parser) {
100 // Parse type specifier (int, char, etc.)
101 if (!parser_is_type_specifier(parser->current_token.type)) {
102 // Don't call parser_error if it doesn't exist - handle gracefully
103 return NULL;
104 }
105
106 TokenType type_token = parser->current_token.type;
107 DataType data_type = token_type_to_data_type(type_token);
108 parser_advance(parser);
109
110 // Parse identifier
111 if (parser->current_token.type != TOKEN_IDENTIFIER) {
112 return NULL;
113 }
114
115 char *name = strdup(parser->current_token.value);
116 parser_advance(parser);
117
118 // Check what comes after the identifier
119 if (parser->current_token.type == TOKEN_LPAREN) {
120 // This is a function (declaration or definition)
121 ASTNode *result = parser_parse_function(parser, data_type, name);
122 free(name);
123 return result;
124 } else if (parser->current_token.type == TOKEN_SEMICOLON) {
125 // This is a variable declaration
126 parser_advance(parser); // consume semicolon
127 ASTNode *result = parser_create_variable_declaration(data_type, name);
128 free(name);
129 return result;
130 } else {
131 // Handle error gracefully
132 free(name);
133 return NULL;
134 }
135}
136
137ASTNode *parser_parse_function(Parser *parser, DataType return_type, char *name) {
138 // Expect '('
139 if (parser->current_token.type != TOKEN_LPAREN) {
140 return NULL;
141 }
142 parser_advance(parser);
143
144 // Parse parameters (for now, just handle empty parameter list)
145 if (parser->current_token.type != TOKEN_RPAREN) {
146 // Skip parameters for now - advance until we find ')'
147 while (parser->current_token.type != TOKEN_RPAREN &&
148 parser->current_token.type != TOKEN_EOF) {
149 parser_advance(parser);
150 }
151 }
152
153 if (parser->current_token.type == TOKEN_RPAREN) {
154 parser_advance(parser); // consume ')'
155 }
156
157 // Check if this is a declaration or definition
158 if (parser->current_token.type == TOKEN_SEMICOLON) {
159 // Function declaration: int func();
160 parser_advance(parser);
161 return parser_create_function_declaration(return_type, name);
162 } else if (parser->current_token.type == TOKEN_LBRACE) {
163 // Function definition: int func() { ... }
164 return parser_parse_function_definition(parser, return_type, name);
165 } else {
166 return NULL;
167 }
168}
169
170ASTNode *parser_parse_function_definition(Parser *parser, DataType return_type, char *name) {
171 // Parse the function body
172 ASTNode *body = parser_parse_compound_statement(parser);
173 if (!body) {
174 return NULL;
175 }
176
177 // Create a function declaration node with body
178 ASTNode *func = parser_create_function_declaration(return_type, name);
179 if (func) {
180 // If your AST supports function definitions, set the body here
181 // This depends on your AST structure
182 // func->data.function.body = body; // Adjust according to your AST
183 }
184
185 return func;
186}
187
188// Helper function to create function declaration
189ASTNode *parser_create_function_declaration(DataType return_type, const char *name) {
190 // This should match your AST creation functions
191 // Adjust according to your actual AST API
192 return ast_create_function_decl(return_type, name, NULL, NULL);
193}
194
195// Helper function to create variable declaration
196ASTNode *parser_create_variable_declaration(DataType type, const char *name) {
197 // This should match your AST creation functions
198 // Adjust according to your actual AST API
199 return ast_create_var_decl(type, name, NULL);
200}
201
202ASTNode *parser_parse_variable_declaration(Parser *parser, DataType var_type) {
203 ASTNode *initializer = NULL;
204
205 if (parser_match(parser, TOKEN_ASSIGN)) {
206 parser_advance(parser);
207 initializer = parser_parse_expression(parser);
208 }
209
210 parser_expect(parser, TOKEN_SEMICOLON);
211
212 return ast_create_var_decl(var_type, "", initializer);
213}
214
215ASTNode *parser_parse_statement(Parser *parser) {
216 switch (parser->current_token.type) {
217 case TOKEN_LBRACE:
218 return parser_parse_compound_statement(parser);
219 case TOKEN_RETURN:
220 return parser_parse_return_statement(parser);
221 case TOKEN_IF:
222 return parser_parse_if_statement(parser);
223 case TOKEN_WHILE:
224 return parser_parse_while_statement(parser);
225 case TOKEN_FOR:
226 return parser_parse_for_statement(parser);
227 case TOKEN_BREAK:
228 parser_advance(parser);
229 parser_expect(parser, TOKEN_SEMICOLON);
230 return ast_create_break_stmt();
231 case TOKEN_CONTINUE:
232 parser_advance(parser);
233 parser_expect(parser, TOKEN_SEMICOLON);
234 return ast_create_continue_stmt();
235 default:
236 return parser_parse_expression_statement(parser);
237 }
238}
239
240ASTNode *parser_parse_compound_statement(Parser *parser) {
241 ASTNode *compound = ast_create_compound_stmt();
242
243 parser_expect(parser, TOKEN_LBRACE);
244
245 while (!parser_match(parser, TOKEN_RBRACE) && !parser_match(parser, TOKEN_EOF)) {
246 ASTNode *stmt = NULL;
247
248 if (parser_is_type_specifier(parser->current_token.type)) {
249 // Variable declaration
250 DataType var_type = parser_parse_type_specifier(parser);
251
252 if (parser_match(parser, TOKEN_IDENTIFIER)) {
253 char *var_name = strdup(parser->current_token.value);
254 parser_advance(parser);
255
256 ASTNode *var_decl = parser_parse_variable_declaration(parser, var_type);
257 if (var_decl) {
258 free(var_decl->data.var_decl.name);
259 var_decl->data.var_decl.name = var_name;
260 stmt = var_decl;
261 } else {
262 free(var_name);
263 }
264 }
265 } else {
266 stmt = parser_parse_statement(parser);
267 }
268
269 if (stmt) {
270 ast_add_statement(compound, stmt);
271 }
272 }
273
274 parser_expect(parser, TOKEN_RBRACE);
275
276 return compound;
277}
278
279ASTNode *parser_parse_expression_statement(Parser *parser) {
280 ASTNode *expr = parser_parse_expression(parser);
281 parser_expect(parser, TOKEN_SEMICOLON);
282 return ast_create_expression_stmt(expr);
283}
284
285ASTNode *parser_parse_return_statement(Parser *parser) {
286 parser_advance(parser); // consume 'return'
287
288 ASTNode *expr = NULL;
289 if (!parser_match(parser, TOKEN_SEMICOLON)) {
290 expr = parser_parse_expression(parser);
291 }
292
293 parser_expect(parser, TOKEN_SEMICOLON);
294
295 return ast_create_return_stmt(expr);
296}
297
298ASTNode *parser_parse_if_statement(Parser *parser) {
299 parser_advance(parser); // consume 'if'
300
301 parser_expect(parser, TOKEN_LPAREN);
302 ASTNode *condition = parser_parse_expression(parser);
303 parser_expect(parser, TOKEN_RPAREN);
304
305 ASTNode *then_stmt = parser_parse_statement(parser);
306
307 ASTNode *else_stmt = NULL;
308 if (parser_match(parser, TOKEN_ELSE)) {
309 parser_advance(parser);
310 else_stmt = parser_parse_statement(parser);
311 }
312
313 return ast_create_if_stmt(condition, then_stmt, else_stmt);
314}
315
316ASTNode *parser_parse_while_statement(Parser *parser) {
317 parser_advance(parser); // consume 'while'
318
319 parser_expect(parser, TOKEN_LPAREN);
320 ASTNode *condition = parser_parse_expression(parser);
321 parser_expect(parser, TOKEN_RPAREN);
322
323 ASTNode *body = parser_parse_statement(parser);
324
325 return ast_create_while_stmt(condition, body);
326}
327
328ASTNode *parser_parse_for_statement(Parser *parser) {
329 parser_advance(parser); // consume 'for'
330
331 parser_expect(parser, TOKEN_LPAREN);
332
333 ASTNode *init = NULL;
334 if (!parser_match(parser, TOKEN_SEMICOLON)) {
335 init = parser_parse_expression(parser);
336 }
337 parser_expect(parser, TOKEN_SEMICOLON);
338
339 ASTNode *condition = NULL;
340 if (!parser_match(parser, TOKEN_SEMICOLON)) {
341 condition = parser_parse_expression(parser);
342 }
343 parser_expect(parser, TOKEN_SEMICOLON);
344
345 ASTNode *update = NULL;
346 if (!parser_match(parser, TOKEN_RPAREN)) {
347 update = parser_parse_expression(parser);
348 }
349 parser_expect(parser, TOKEN_RPAREN);
350
351 ASTNode *body = parser_parse_statement(parser);
352
353 return ast_create_for_stmt(init, condition, update, body);
354}
355
356ASTNode *parser_parse_expression(Parser *parser) {
357 return parser_parse_assignment_expression(parser);
358}
359
360ASTNode *parser_parse_assignment_expression(Parser *parser) {
361 ASTNode *left = parser_parse_logical_or_expression(parser);
362
363 if (parser_match(parser, TOKEN_ASSIGN)) {
364 parser_advance(parser);
365 ASTNode *right = parser_parse_assignment_expression(parser);
366
367 if (left->type == AST_IDENTIFIER) {
368 return ast_create_assignment(left->data.identifier.name, right);
369 } else {
370 error_syntax(parser->current_token.line, parser->current_token.column,
371 "Invalid left-hand side in assignment");
372 }
373 }
374
375 return left;
376}
377
378ASTNode *parser_parse_logical_or_expression(Parser *parser) {
379 ASTNode *left = parser_parse_logical_and_expression(parser);
380
381 while (parser_match(parser, TOKEN_OR)) {
382 TokenType op = parser->current_token.type;
383 parser_advance(parser);
384 ASTNode *right = parser_parse_logical_and_expression(parser);
385 left = ast_create_binary_expr(op, left, right);
386 }
387
388 return left;
389}
390
391ASTNode *parser_parse_logical_and_expression(Parser *parser) {
392 ASTNode *left = parser_parse_equality_expression(parser);
393
394 while (parser_match(parser, TOKEN_AND)) {
395 TokenType op = parser->current_token.type;
396 parser_advance(parser);
397 ASTNode *right = parser_parse_equality_expression(parser);
398 left = ast_create_binary_expr(op, left, right);
399 }
400
401 return left;
402}
403
404ASTNode *parser_parse_equality_expression(Parser *parser) {
405 ASTNode *left = parser_parse_relational_expression(parser);
406
407 while (parser_match(parser, TOKEN_EQUAL) || parser_match(parser, TOKEN_NOT_EQUAL)) {
408 TokenType op = parser->current_token.type;
409 parser_advance(parser);
410 ASTNode *right = parser_parse_relational_expression(parser);
411 left = ast_create_binary_expr(op, left, right);
412 }
413
414 return left;
415}
416
417ASTNode *parser_parse_relational_expression(Parser *parser) {
418 ASTNode *left = parser_parse_additive_expression(parser);
419
420 while (parser_match(parser, TOKEN_LESS) || parser_match(parser, TOKEN_LESS_EQUAL) ||
421 parser_match(parser, TOKEN_GREATER) || parser_match(parser, TOKEN_GREATER_EQUAL)) {
422 TokenType op = parser->current_token.type;
423 parser_advance(parser);
424 ASTNode *right = parser_parse_additive_expression(parser);
425 left = ast_create_binary_expr(op, left, right);
426 }
427
428 return left;
429}
430
431ASTNode *parser_parse_additive_expression(Parser *parser) {
432 ASTNode *left = parser_parse_multiplicative_expression(parser);
433
434 while (parser_match(parser, TOKEN_PLUS) || parser_match(parser, TOKEN_MINUS)) {
435 TokenType op = parser->current_token.type;
436 parser_advance(parser);
437 ASTNode *right = parser_parse_multiplicative_expression(parser);
438 left = ast_create_binary_expr(op, left, right);
439 }
440
441 return left;
442}
443
444ASTNode *parser_parse_multiplicative_expression(Parser *parser) {
445 ASTNode *left = parser_parse_unary_expression(parser);
446
447 while (parser_match(parser, TOKEN_MULTIPLY) || parser_match(parser, TOKEN_DIVIDE) ||
448 parser_match(parser, TOKEN_MODULO)) {
449 TokenType op = parser->current_token.type;
450 parser_advance(parser);
451 ASTNode *right = parser_parse_unary_expression(parser);
452 left = ast_create_binary_expr(op, left, right);
453 }
454
455 return left;
456}
457
458ASTNode *parser_parse_unary_expression(Parser *parser) {
459 if (parser_match(parser, TOKEN_MINUS) || parser_match(parser, TOKEN_NOT)) {
460 TokenType op = parser->current_token.type;
461 parser_advance(parser);
462 ASTNode *operand = parser_parse_unary_expression(parser);
463 return ast_create_unary_expr(op, operand);
464 }
465
466 return parser_parse_primary_expression(parser);
467}
468
469ASTNode *parser_parse_primary_expression(Parser *parser) {
470 ASTNode *primary = NULL;
471
472 switch (parser->current_token.type) {
473 case TOKEN_NUMBER: {
474 int value = atoi(parser->current_token.value);
475 primary = ast_create_number(value);
476 parser_advance(parser);
477 break;
478 }
479 case TOKEN_STRING: {
480 primary = ast_create_string(parser->current_token.value);
481 parser_advance(parser);
482 break;
483 }
484 case TOKEN_IDENTIFIER: {
485 primary = ast_create_identifier(parser->current_token.value);
486 parser_advance(parser);
487 break;
488 }
489 case TOKEN_LPAREN: {
490 parser_advance(parser);
491 primary = parser_parse_expression(parser);
492 parser_expect(parser, TOKEN_RPAREN);
493 break;
494 }
495 default:
496 error_syntax(parser->current_token.line, parser->current_token.column,
497 "Expected primary expression");
498 return NULL;
499 }
500
501 // Handle function calls
502 if (primary && primary->type == AST_IDENTIFIER && parser_match(parser, TOKEN_LPAREN)) {
503 primary = parser_parse_call_expression(parser, primary);
504 }
505
506 return primary;
507}
508
509ASTNode *parser_parse_call_expression(Parser *parser, ASTNode *primary) {
510 if (primary->type != AST_IDENTIFIER) {
511 return primary;
512 }
513
514 ASTNode *call = ast_create_call_expr(primary->data.identifier.name);
515
516 parser_advance(parser); // consume '('
517
518 if (!parser_match(parser, TOKEN_RPAREN)) {
519 do {
520 ASTNode *arg = parser_parse_expression(parser);
521 if (arg) {
522 ast_add_argument(call, arg);
523 }
524
525 if (parser_match(parser, TOKEN_COMMA)) {
526 parser_advance(parser);
527 } else {
528 break;
529 }
530 } while (true);
531 }
532
533 parser_expect(parser, TOKEN_RPAREN);
534
535 // Clean up the original identifier node
536 ast_destroy(primary);
537
538 return call;
539}
AST Node structure.
Definition types.h:333
Lexer structure.
Definition types.h:499
Parser structure.
Definition types.h:516
TokenType
Token types for lexical analysis.
Definition types.h:24
DataType
Data types supported by the compiler.
Definition types.h:193