diff options
Diffstat (limited to 'src/parser/parser_core.c')
| -rw-r--r-- | src/parser/parser_core.c | 618 |
1 files changed, 618 insertions, 0 deletions
diff --git a/src/parser/parser_core.c b/src/parser/parser_core.c new file mode 100644 index 0000000..1b40cf4 --- /dev/null +++ b/src/parser/parser_core.c @@ -0,0 +1,618 @@ + +#include <stdio.h> +#include <stdlib.h> +#include <string.h> +#include "parser.h" +#include "zprep.h" + +static ASTNode *generate_derive_impls(ParserContext *ctx, ASTNode *strct, char **traits, int count); + +// Main parsing entry point +ASTNode *parse_program_nodes(ParserContext *ctx, Lexer *l) +{ + ASTNode *h = 0, *tl = 0; + while (1) + { + skip_comments(l); + Token t = lexer_peek(l); + + if (t.type == TOK_EOF) + { + break; + } + + if (t.type == TOK_COMPTIME) + { + ASTNode *gen = parse_comptime(ctx, l); + if (gen) + { + if (!h) + { + h = gen; + } + else + { + tl->next = gen; + } + if (!tl) + { + tl = gen; + } + while (tl->next) + { + tl = tl->next; + } + } + continue; + } + + ASTNode *s = 0; + + int attr_must_use = 0; + int attr_deprecated = 0; + int attr_inline = 0; + int attr_pure = 0; + int attr_noreturn = 0; + int attr_cold = 0; + int attr_hot = 0; + int attr_packed = 0; + int attr_align = 0; + int attr_noinline = 0; + int attr_constructor = 0; + int attr_destructor = 0; + int attr_unused = 0; + int attr_weak = 0; + int attr_export = 0; + int attr_comptime = 0; + char *deprecated_msg = NULL; + char *attr_section = NULL; + + char *derived_traits[32]; + int derived_count = 0; + + while (t.type == TOK_AT) + { + lexer_next(l); + Token attr = lexer_next(l); + if (attr.type != TOK_IDENT && attr.type != TOK_COMPTIME) + { + zpanic("Expected attribute name after @"); + } + + if (0 == strncmp(attr.start, "must_use", 8) && 8 == attr.len) + { + attr_must_use = 1; + } + else if (0 == strncmp(attr.start, "deprecated", 10) && 10 == attr.len) + { + attr_deprecated = 1; + if (lexer_peek(l).type == TOK_LPAREN) + { + lexer_next(l); + Token msg = lexer_next(l); + if (msg.type == TOK_STRING) + { + deprecated_msg = xmalloc(msg.len - 1); + strncpy(deprecated_msg, msg.start + 1, msg.len - 2); + deprecated_msg[msg.len - 2] = 0; + } + if (lexer_next(l).type != TOK_RPAREN) + { + zpanic("Expected ) after deprecated message"); + } + } + } + else if (0 == strncmp(attr.start, "inline", 6) && 6 == attr.len) + { + attr_inline = 1; + } + else if (0 == strncmp(attr.start, "noinline", 8) && 8 == attr.len) + { + attr_noinline = 1; + } + else if (0 == strncmp(attr.start, "pure", 4) && 4 == attr.len) + { + attr_pure = 1; + } + else if (0 == strncmp(attr.start, "noreturn", 8) && 8 == attr.len) + { + attr_noreturn = 1; + } + else if (0 == strncmp(attr.start, "cold", 4) && 4 == attr.len) + { + attr_cold = 1; + } + else if (0 == strncmp(attr.start, "hot", 3) && 3 == attr.len) + { + attr_hot = 1; + } + else if (0 == strncmp(attr.start, "constructor", 11) && 11 == attr.len) + { + attr_constructor = 1; + } + else if (0 == strncmp(attr.start, "destructor", 10) && 10 == attr.len) + { + attr_destructor = 1; + } + else if (0 == strncmp(attr.start, "unused", 6) && 6 == attr.len) + { + attr_unused = 1; + } + else if (0 == strncmp(attr.start, "weak", 4) && 4 == attr.len) + { + attr_weak = 1; + } + else if (0 == strncmp(attr.start, "export", 6) && 6 == attr.len) + { + attr_export = 1; + } + else if (0 == strncmp(attr.start, "comptime", 8) && 8 == attr.len) + { + attr_comptime = 1; + } + else if (0 == strncmp(attr.start, "section", 7) && 7 == attr.len) + { + if (lexer_peek(l).type == TOK_LPAREN) + { + lexer_next(l); + Token sec = lexer_next(l); + if (sec.type == TOK_STRING) + { + attr_section = xmalloc(sec.len - 1); + strncpy(attr_section, sec.start + 1, sec.len - 2); + attr_section[sec.len - 2] = 0; + } + if (lexer_next(l).type != TOK_RPAREN) + { + zpanic("Expected ) after section name"); + } + } + else + { + zpanic("@section requires a name: @section(\"name\")"); + } + } + else if (0 == strncmp(attr.start, "packed", 6) && 6 == attr.len) + { + attr_packed = 1; + } + else if (0 == strncmp(attr.start, "align", 5) && 5 == attr.len) + { + if (lexer_peek(l).type == TOK_LPAREN) + { + lexer_next(l); + Token num = lexer_next(l); + if (num.type == TOK_INT) + { + attr_align = atoi(num.start); + } + if (lexer_next(l).type != TOK_RPAREN) + { + zpanic("Expected ) after align value"); + } + } + else + { + zpanic("@align requires a value: @align(N)"); + } + } + else if (0 == strncmp(attr.start, "derive", 6) && 6 == attr.len) + { + if (lexer_peek(l).type == TOK_LPAREN) + { + lexer_next(l); + while (1) + { + Token t = lexer_next(l); + if (t.type != TOK_IDENT) + { + zpanic("Expected trait name in @derive"); + } + if (derived_count < 32) + { + derived_traits[derived_count++] = token_strdup(t); + } + if (lexer_peek(l).type == TOK_COMMA) + { + lexer_next(l); + } + else + { + break; + } + } + if (lexer_next(l).type != TOK_RPAREN) + { + zpanic("Expected ) after derive traits"); + } + } + else + { + zpanic("@derive requires traits: @derive(Debug, Clone)"); + } + } + else + { + zwarn_at(attr, "Unknown attribute: %.*s", attr.len, attr.start); + } + + t = lexer_peek(l); + } + + if (t.type == TOK_PREPROC) + { + lexer_next(l); + char *content = xmalloc(t.len + 2); + strncpy(content, t.start, t.len); + content[t.len] = '\n'; + content[t.len + 1] = 0; + s = ast_create(NODE_RAW_STMT); + s->raw_stmt.content = content; + } + else if (t.type == TOK_IDENT) + { + // Inline function: inline fn name(...) { } + if (0 == strncmp(t.start, "inline", 6) && 6 == t.len) + { + lexer_next(l); + Token next = lexer_peek(l); + if (next.type == TOK_IDENT && 2 == next.len && 0 == strncmp(next.start, "fn", 2)) + { + s = parse_function(ctx, l, 0); + attr_inline = 1; + } + else + { + zpanic_at(next, "Expected 'fn' after 'inline'"); + } + } + else if (0 == strncmp(t.start, "fn", 2) && 2 == t.len) + { + s = parse_function(ctx, l, 0); + } + else if (0 == strncmp(t.start, "struct", 6) && 6 == t.len) + { + s = parse_struct(ctx, l, 0); + if (s && s->type == NODE_STRUCT) + { + s->strct.is_packed = attr_packed; + s->strct.align = attr_align; + + if (derived_count > 0) + { + ASTNode *impls = + generate_derive_impls(ctx, s, derived_traits, derived_count); + s->next = impls; + } + } + } + else if (0 == strncmp(t.start, "enum", 4) && 4 == t.len) + { + s = parse_enum(ctx, l); + if (s && s->type == NODE_ENUM) + { + if (derived_count > 0) + { + ASTNode *impls = + generate_derive_impls(ctx, s, derived_traits, derived_count); + s->next = impls; + } + } + } + else if (t.len == 4 && strncmp(t.start, "impl", 4) == 0) + { + s = parse_impl(ctx, l); + } + else if (t.len == 5 && strncmp(t.start, "trait", 5) == 0) + { + s = parse_trait(ctx, l); + } + else if (t.len == 7 && strncmp(t.start, "include", 7) == 0) + { + s = parse_include(ctx, l); + } + else if (t.len == 6 && strncmp(t.start, "import", 6) == 0) + { + s = parse_import(ctx, l); + } + else if (t.len == 3 && strncmp(t.start, "var", 3) == 0) + { + s = parse_var_decl(ctx, l); + } + else if (t.len == 5 && strncmp(t.start, "const", 5) == 0) + { + s = parse_const(ctx, l); + } + else if (t.len == 6 && strncmp(t.start, "extern", 6) == 0) + { + lexer_next(l); + + Token peek = lexer_peek(l); + if (peek.type == TOK_IDENT && peek.len == 2 && strncmp(peek.start, "fn", 2) == 0) + { + s = parse_function(ctx, l, 0); + } + else + { + while (1) + { + Token sym = lexer_next(l); + if (sym.type != TOK_IDENT) + { + break; + } + + char *name = token_strdup(sym); + register_extern_symbol(ctx, name); + + Token next = lexer_peek(l); + if (next.type == TOK_COMMA) + { + lexer_next(l); + } + else + { + break; + } + } + + if (lexer_peek(l).type == TOK_SEMICOLON) + { + lexer_next(l); + } + continue; + } + } + else if (0 == strncmp(t.start, "type", 4) && 4 == t.len) + { + s = parse_type_alias(ctx, l); + } + else if (0 == strncmp(t.start, "raw", 3) && 3 == t.len) + { + lexer_next(l); + if (lexer_peek(l).type != TOK_LBRACE) + { + zpanic("Expected { after raw"); + } + lexer_next(l); + + const char *start = l->src + l->pos; + + int depth = 1; + while (depth > 0) + { + Token t = lexer_next(l); + if (t.type == TOK_EOF) + { + zpanic("Unexpected EOF in raw block"); + } + if (t.type == TOK_LBRACE) + { + depth++; + } + if (t.type == TOK_RBRACE) + { + depth--; + } + } + + const char *end = l->src + l->pos - 1; + size_t len = end - start; + + char *content = xmalloc(len + 1); + memcpy(content, start, len); + content[len] = 0; + + s = ast_create(NODE_RAW_STMT); + s->raw_stmt.content = content; + } + else + { + lexer_next(l); + } + } + else if (t.type == TOK_ASYNC) + { + lexer_next(l); + Token next = lexer_peek(l); + if (0 == strncmp(next.start, "fn", 2) && 2 == next.len) + { + s = parse_function(ctx, l, 1); + if (s) + { + s->func.is_async = 1; + } + } + else + { + zpanic_at(next, "Expected 'fn' after 'async'"); + } + } + + else if (t.type == TOK_UNION) + { + s = parse_struct(ctx, l, 1); + } + else if (t.type == TOK_TRAIT) + { + s = parse_trait(ctx, l); + } + else if (t.type == TOK_IMPL) + { + s = parse_impl(ctx, l); + } + else if (t.type == TOK_TEST) + { + s = parse_test(ctx, l); + } + else + { + lexer_next(l); + } + + if (s && s->type == NODE_FUNCTION) + { + s->func.must_use = attr_must_use; + s->func.is_inline = attr_inline || s->func.is_inline; + s->func.noinline = attr_noinline; + s->func.constructor = attr_constructor; + s->func.destructor = attr_destructor; + s->func.unused = attr_unused; + s->func.weak = attr_weak; + s->func.is_export = attr_export; + s->func.cold = attr_cold; + s->func.hot = attr_hot; + s->func.noreturn = attr_noreturn; + s->func.pure = attr_pure; + s->func.section = attr_section; + s->func.is_comptime = attr_comptime; + + if (attr_deprecated && s->func.name) + { + register_deprecated_func(ctx, s->func.name, deprecated_msg); + } + + if (attr_must_use && s->func.name) + { + FuncSig *sig = find_func(ctx, s->func.name); + if (sig) + { + sig->must_use = 1; + } + } + } + + if (s) + { + if (!h) + { + h = s; + } + else + { + tl->next = s; + } + tl = s; + while (tl->next) + { + tl = tl->next; + } + } + } + return h; +} + +ASTNode *parse_program(ParserContext *ctx, Lexer *l) +{ + g_parser_ctx = ctx; + enter_scope(ctx); + register_builtins(ctx); + + ASTNode *r = ast_create(NODE_ROOT); + r->root.children = parse_program_nodes(ctx, l); + return r; +} + +static ASTNode *generate_derive_impls(ParserContext *ctx, ASTNode *strct, char **traits, int count) +{ + ASTNode *head = NULL, *tail = NULL; + char *name = strct->strct.name; + + for (int i = 0; i < count; i++) + { + char *trait = traits[i]; + char *code = NULL; + + if (0 == strcmp(trait, "Clone")) + { + code = xmalloc(1024); + sprintf(code, "impl %s { fn clone(self) -> %s { return *self; } }", name, name); + } + else if (0 == strcmp(trait, "Eq")) + { + char body[4096]; + body[0] = 0; + + if (strct->type == NODE_ENUM) + { + // Simple Enum equality (tag comparison) + // Generate Eq impl for Enum + + sprintf(body, "return self.tag == other.tag;"); + } + else + { + ASTNode *f = strct->strct.fields; + int first = 1; + strcat(body, "return "); + while (f) + { + if (f->type == NODE_FIELD) + { + char *fn = f->field.name; + char *ft = f->field.type; + if (!first) + { + strcat(body, " && "); + } + char cmp[256]; + + ASTNode *fdef = find_struct_def(ctx, ft); + if (fdef && fdef->type == NODE_ENUM) + { + // Enum field: compare tags + sprintf(cmp, "self.%s.tag == other.%s.tag", fn, fn); + } + else if (fdef && fdef->type == NODE_STRUCT) + { + // Struct field: use _eq function + sprintf(cmp, "%s_eq(&self.%s, other.%s)", ft, fn, fn); + } + else + { + // Primitive or unknown: use == + sprintf(cmp, "self.%s == other.%s", fn, fn); + } + strcat(body, cmp); + first = 0; + } + f = f->next; + } + if (first) + { + strcat(body, "true"); + } + strcat(body, ";"); + } + code = xmalloc(4096 + 1024); + sprintf(code, "impl %s { fn eq(self, other: %s) -> bool { %s } }", name, name, body); + } + else if (0 == strcmp(trait, "Debug")) + { + // Simplistic Debug for now, I know. + code = xmalloc(1024); + sprintf(code, "impl %s { fn to_string(self) -> char* { return \"%s { ... }\"; } }", + name, name); + } + + if (code) + { + Lexer tmp; + lexer_init(&tmp, code); + ASTNode *impl = parse_impl(ctx, &tmp); + if (impl) + { + if (!head) + { + head = impl; + } + else + { + tail->next = impl; + } + tail = impl; + } + } + } + return head; +} |
