diff options
| author | Zuhaitz Méndez Fernández de Aránguiz <zuhaitz@debian> | 2026-01-19 12:53:47 +0000 |
|---|---|---|
| committer | Zuhaitz Méndez Fernández de Aránguiz <zuhaitz@debian> | 2026-01-19 12:53:47 +0000 |
| commit | 639c6ac65a1bd44b2ba0725fe7016a4920bf0950 (patch) | |
| tree | 47703f960633d3d4580022583134c28b96d5f36e /src | |
| parent | 526b7748cafcb5a00f8e30df88661f6059d79843 (diff) | |
Iterables and iterators :D
Diffstat (limited to 'src')
| -rw-r--r-- | src/ast/ast.c | 1 | ||||
| -rw-r--r-- | src/ast/ast.h | 10 | ||||
| -rw-r--r-- | src/codegen/codegen.c | 2 | ||||
| -rw-r--r-- | src/codegen/codegen.h | 1 | ||||
| -rw-r--r-- | src/codegen/codegen_decl.c | 38 | ||||
| -rw-r--r-- | src/codegen/codegen_main.c | 5 | ||||
| -rw-r--r-- | src/parser/parser_stmt.c | 302 | ||||
| -rw-r--r-- | src/parser/parser_utils.c | 42 |
8 files changed, 331 insertions, 70 deletions
diff --git a/src/ast/ast.c b/src/ast/ast.c index dee2eed..712f6e3 100644 --- a/src/ast/ast.c +++ b/src/ast/ast.c @@ -52,6 +52,7 @@ void ast_free(ASTNode *node) Type *type_new(TypeKind kind) { Type *t = xmalloc(sizeof(Type)); + memset(t, 0, sizeof(Type)); t->kind = kind; t->name = NULL; t->inner = NULL; diff --git a/src/ast/ast.h b/src/ast/ast.h index cef68c6..2233b09 100644 --- a/src/ast/ast.h +++ b/src/ast/ast.h @@ -54,13 +54,17 @@ typedef struct Type struct Type **args; // For GENERIC args. int arg_count; int is_const; - int is_explicit_struct; // e.g. "struct Foo" vs "Foo" + int is_explicit_struct; // for example, "struct Foo" vs "Foo" union { int array_size; // For fixed-size arrays [T; N]. int is_varargs; // For function types (...). int is_restrict; // For restrict pointers. - int has_drop; // For RAII: does this type implement Drop? + struct + { + int has_drop; // For RAII: does this type implement Drop? + int has_iterable; // For the for iterator: does the type implement Iterable? + } traits; }; } Type; @@ -404,6 +408,8 @@ struct ASTNode { char *name; ASTNode *methods; + char **generic_params; + int generic_param_count; } trait; struct diff --git a/src/codegen/codegen.c b/src/codegen/codegen.c index a371548..01c8204 100644 --- a/src/codegen/codegen.c +++ b/src/codegen/codegen.c @@ -1699,7 +1699,7 @@ void codegen_node_single(ParserContext *ctx, ASTNode *node, FILE *out) { // Cleanup attribute ASTNode *def = find_struct_def(ctx, tname); - if (def && def->type_info && def->type_info->has_drop) + if (def && def->type_info && def->type_info->traits.has_drop) { fprintf(out, "__attribute__((cleanup(%s__Drop_glue))) ", tname); } diff --git a/src/codegen/codegen.h b/src/codegen/codegen.h index c6e2836..f8fe318 100644 --- a/src/codegen/codegen.h +++ b/src/codegen/codegen.h @@ -29,6 +29,7 @@ void emit_func_signature(FILE *out, ASTNode *func, const char *name_override); void emit_preamble(ParserContext *ctx, FILE *out); void emit_includes_and_aliases(ASTNode *node, FILE *out); void emit_type_aliases(ASTNode *node, FILE *out); +void emit_global_aliases(ParserContext *ctx, FILE *out); void emit_struct_defs(ParserContext *ctx, ASTNode *node, FILE *out); void emit_trait_defs(ASTNode *node, FILE *out); void emit_enum_protos(ASTNode *node, FILE *out); diff --git a/src/codegen/codegen_decl.c b/src/codegen/codegen_decl.c index 6f9a1cb..8ada7d6 100644 --- a/src/codegen/codegen_decl.c +++ b/src/codegen/codegen_decl.c @@ -225,6 +225,16 @@ void emit_type_aliases(ASTNode *node, FILE *out) } } +void emit_global_aliases(ParserContext *ctx, FILE *out) +{ + TypeAlias *ta = ctx->type_aliases; + while (ta) + { + fprintf(out, "typedef %s %s;\n", ta->original_type, ta->alias); + ta = ta->next; + } +} + // Emit enum constructor prototypes void emit_enum_protos(ASTNode *node, FILE *out) { @@ -421,6 +431,11 @@ void emit_trait_defs(ASTNode *node, FILE *out) { if (node->type == NODE_TRAIT) { + if (node->trait.generic_param_count > 0) + { + node = node->next; + continue; + } fprintf(out, "typedef struct %s_VTable {\n", node->trait.name); ASTNode *m = node->trait.methods; while (m) @@ -751,6 +766,29 @@ void emit_impl_vtables(ParserContext *ctx, FILE *out) if (node && node->type == NODE_IMPL_TRAIT) { char *trait = node->impl_trait.trait_name; + + // Filter generic traits (VTables for them are not emitted) + int is_generic_trait = 0; + StructRef *search = ctx->parsed_globals_list; + while (search) + { + if (search->node && search->node->type == NODE_TRAIT && + strcmp(search->node->trait.name, trait) == 0) + { + if (search->node->trait.generic_param_count > 0) + { + is_generic_trait = 1; + } + break; + } + search = search->next; + } + if (is_generic_trait) + { + ref = ref->next; + continue; + } + char *strct = node->impl_trait.target_type; // Filter templates diff --git a/src/codegen/codegen_main.c b/src/codegen/codegen_main.c index d5d6bbc..7382827 100644 --- a/src/codegen/codegen_main.c +++ b/src/codegen/codegen_main.c @@ -334,6 +334,8 @@ void codegen_node(ParserContext *ctx, ASTNode *node, FILE *out) print_type_defs(ctx, out, sorted); emit_enum_protos(sorted, out); + emit_global_aliases(ctx, out); // Emit ALL aliases (including imports) + emit_type_aliases(kids, out); // Emit local aliases (redundant but safe) emit_trait_defs(kids, out); // First pass: emit ONLY preprocessor directives before struct defs @@ -382,8 +384,7 @@ void codegen_node(ParserContext *ctx, ASTNode *node, FILE *out) raw_iter = raw_iter->next; } - // Emit type aliases after struct defs (so aliased generic types exist) - emit_type_aliases(kids, out); + // Emit type aliases was here (moved up) ASTNode *merged_globals = NULL; // Head diff --git a/src/parser/parser_stmt.c b/src/parser/parser_stmt.c index 8480716..df490b7 100644 --- a/src/parser/parser_stmt.c +++ b/src/parser/parser_stmt.c @@ -1586,61 +1586,206 @@ ASTNode *parse_for(ParserContext *ctx, Lexer *l) if (in_tok.type == TOK_IDENT && strncmp(in_tok.start, "in", 2) == 0) { ASTNode *start_expr = parse_expression(ctx, l); - int is_inclusive = 0; - if (lexer_peek(l).type == TOK_DOTDOT || lexer_peek(l).type == TOK_DOTDOT_LT) + // Check for Range Loop (.. or ..= or ..<) + TokenType next_tok = lexer_peek(l).type; + if (next_tok == TOK_DOTDOT || next_tok == TOK_DOTDOT_LT || next_tok == TOK_DOTDOT_EQ) { - lexer_next(l); // consume .. or ..< - } - - else if (lexer_peek(l).type == TOK_DOTDOT_EQ) - { - is_inclusive = 1; - lexer_next(l); // consume ..= - } - - if (1) // Block to keep scope for variables - { - ASTNode *end_expr = parse_expression(ctx, l); - - ASTNode *n = ast_create(NODE_FOR_RANGE); - n->for_range.var_name = xmalloc(var.len + 1); - strncpy(n->for_range.var_name, var.start, var.len); - n->for_range.var_name[var.len] = 0; - n->for_range.start = start_expr; - n->for_range.end = end_expr; - n->for_range.is_inclusive = is_inclusive; - - if (lexer_peek(l).type == TOK_IDENT && strncmp(lexer_peek(l).start, "step", 4) == 0) + int is_inclusive = 0; + if (next_tok == TOK_DOTDOT || next_tok == TOK_DOTDOT_LT) { - lexer_next(l); - Token s_tok = lexer_next(l); - char *sval = xmalloc(s_tok.len + 1); - strncpy(sval, s_tok.start, s_tok.len); - sval[s_tok.len] = 0; - n->for_range.step = sval; + lexer_next(l); // consume .. or ..< } - else + else if (next_tok == TOK_DOTDOT_EQ) { - n->for_range.step = NULL; + is_inclusive = 1; + lexer_next(l); // consume ..= } - // Fix: Enter scope to register loop variable + if (1) // Block to keep scope for variables + { + ASTNode *end_expr = parse_expression(ctx, l); + + ASTNode *n = ast_create(NODE_FOR_RANGE); + n->for_range.var_name = xmalloc(var.len + 1); + strncpy(n->for_range.var_name, var.start, var.len); + n->for_range.var_name[var.len] = 0; + n->for_range.start = start_expr; + n->for_range.end = end_expr; + n->for_range.is_inclusive = is_inclusive; + + if (lexer_peek(l).type == TOK_IDENT && + strncmp(lexer_peek(l).start, "step", 4) == 0) + { + lexer_next(l); + Token s_tok = lexer_next(l); + char *sval = xmalloc(s_tok.len + 1); + strncpy(sval, s_tok.start, s_tok.len); + sval[s_tok.len] = 0; + n->for_range.step = sval; + } + else + { + n->for_range.step = NULL; + } + + enter_scope(ctx); + add_symbol(ctx, n->for_range.var_name, "int", type_new(TYPE_INT)); + + if (lexer_peek(l).type == TOK_LBRACE) + { + n->for_range.body = parse_block(ctx, l); + } + else + { + n->for_range.body = parse_statement(ctx, l); + } + exit_scope(ctx); + + return n; + } + } + else + { + // Iterator Loop: for x in obj + // Desugar to: + /* + { + var __it = obj.iterator(); + while (true) { + var __opt = __it.next(); + if (__opt.is_none()) break; + var x = __opt.unwrap(); + <body...> + } + } + */ + + char *var_name = xmalloc(var.len + 1); + strncpy(var_name, var.start, var.len); + var_name[var.len] = 0; + + ASTNode *obj_expr = start_expr; + + // var __it = obj.iterator(); + ASTNode *it_decl = ast_create(NODE_VAR_DECL); + it_decl->var_decl.name = xstrdup("__it"); + it_decl->var_decl.type_str = NULL; // inferred + + // obj.iterator() + ASTNode *call_iter = ast_create(NODE_EXPR_CALL); + ASTNode *memb_iter = ast_create(NODE_EXPR_MEMBER); + memb_iter->member.target = obj_expr; + memb_iter->member.field = xstrdup("iterator"); + call_iter->call.callee = memb_iter; + call_iter->call.args = NULL; + call_iter->call.arg_count = 0; + + it_decl->var_decl.init_expr = call_iter; + + // while(true) + ASTNode *while_loop = ast_create(NODE_WHILE); + ASTNode *true_lit = ast_create(NODE_EXPR_LITERAL); + true_lit->literal.type_kind = TOK_INT; // Treated as bool in conditions + true_lit->literal.int_val = 1; + true_lit->literal.string_val = xstrdup("1"); + while_loop->while_stmt.condition = true_lit; + + ASTNode *loop_body = ast_create(NODE_BLOCK); + ASTNode *stmts_head = NULL; + ASTNode *stmts_tail = NULL; + +#define APPEND_STMT(node) \ + if (!stmts_head) \ + { \ + stmts_head = node; \ + stmts_tail = node; \ + } \ + else \ + { \ + stmts_tail->next = node; \ + stmts_tail = node; \ + } + + // var __opt = __it.next(); + ASTNode *opt_decl = ast_create(NODE_VAR_DECL); + opt_decl->var_decl.name = xstrdup("__opt"); + opt_decl->var_decl.type_str = NULL; + + // __it.next() + ASTNode *call_next = ast_create(NODE_EXPR_CALL); + ASTNode *memb_next = ast_create(NODE_EXPR_MEMBER); + ASTNode *it_ref = ast_create(NODE_EXPR_VAR); + it_ref->var_ref.name = xstrdup("__it"); + memb_next->member.target = it_ref; + memb_next->member.field = xstrdup("next"); + call_next->call.callee = memb_next; + + opt_decl->var_decl.init_expr = call_next; + APPEND_STMT(opt_decl); + + // __opt.is_none() + ASTNode *call_is_none = ast_create(NODE_EXPR_CALL); + ASTNode *memb_is_none = ast_create(NODE_EXPR_MEMBER); + ASTNode *opt_ref1 = ast_create(NODE_EXPR_VAR); + opt_ref1->var_ref.name = xstrdup("__opt"); + memb_is_none->member.target = opt_ref1; + memb_is_none->member.field = xstrdup("is_none"); + call_is_none->call.callee = memb_is_none; + + ASTNode *break_stmt = ast_create(NODE_BREAK); + + ASTNode *if_break = ast_create(NODE_IF); + if_break->if_stmt.condition = call_is_none; + if_break->if_stmt.then_body = break_stmt; + APPEND_STMT(if_break); + + // var <user_var> = __opt.unwrap(); + ASTNode *user_var_decl = ast_create(NODE_VAR_DECL); + user_var_decl->var_decl.name = var_name; + user_var_decl->var_decl.type_str = NULL; + + // __opt.unwrap() + ASTNode *call_unwrap = ast_create(NODE_EXPR_CALL); + ASTNode *memb_unwrap = ast_create(NODE_EXPR_MEMBER); + ASTNode *opt_ref2 = ast_create(NODE_EXPR_VAR); + opt_ref2->var_ref.name = xstrdup("__opt"); + memb_unwrap->member.target = opt_ref2; + memb_unwrap->member.field = xstrdup("unwrap"); + call_unwrap->call.callee = memb_unwrap; + + user_var_decl->var_decl.init_expr = call_unwrap; + APPEND_STMT(user_var_decl); + + // User Body enter_scope(ctx); - // Register loop variable so body can see it - add_symbol(ctx, n->for_range.var_name, "int", type_new(TYPE_INT)); + add_symbol(ctx, var_name, NULL, NULL); - // Handle body (brace or single stmt) + ASTNode *user_body_node; if (lexer_peek(l).type == TOK_LBRACE) { - n->for_range.body = parse_block(ctx, l); + user_body_node = parse_block(ctx, l); } else { - n->for_range.body = parse_statement(ctx, l); + ASTNode *stmt = parse_statement(ctx, l); + ASTNode *blk = ast_create(NODE_BLOCK); + blk->block.statements = stmt; + user_body_node = blk; } exit_scope(ctx); - return n; + // Append user body statements to our loop body + APPEND_STMT(user_body_node); + + loop_body->block.statements = stmts_head; + while_loop->while_stmt.body = loop_body; + + // Wrap entire thing in a block to scope _it + ASTNode *outer_block = ast_create(NODE_BLOCK); + it_decl->next = while_loop; + outer_block->block.statements = it_decl; + + return outer_block; } } l->pos = saved_pos; // Restore @@ -2936,11 +3081,11 @@ ASTNode *parse_block(ParserContext *ctx, Lexer *l) } // RAII: Don't warn if type implements Drop (it is used implicitly) - int has_drop = (sym->type_info && sym->type_info->has_drop); + int has_drop = (sym->type_info && sym->type_info->traits.has_drop); if (!has_drop && sym->type_info && sym->type_info->name) { ASTNode *def = find_struct_def(ctx, sym->type_info->name); - if (def && def->type_info && def->type_info->has_drop) + if (def && def->type_info && def->type_info->traits.has_drop) { has_drop = 1; } @@ -2974,6 +3119,51 @@ ASTNode *parse_trait(ParserContext *ctx, Lexer *l) strncpy(name, n.start, n.len); name[n.len] = 0; + // Generics <T> + char **generic_params = NULL; + int generic_count = 0; + if (lexer_peek(l).type == TOK_LANGLE) + { + lexer_next(l); // eat < + generic_params = xmalloc(sizeof(char *) * 8); // simplified + while (1) + { + Token p = lexer_next(l); + if (p.type != TOK_IDENT) + { + zpanic_at(p, "Expected generic parameter name"); + } + generic_params[generic_count] = xmalloc(p.len + 1); + strncpy(generic_params[generic_count], p.start, p.len); + generic_params[generic_count][p.len] = 0; + generic_count++; + + Token sep = lexer_peek(l); + if (sep.type == TOK_COMMA) + { + lexer_next(l); + continue; + } + else if (sep.type == TOK_RANGLE) + { + lexer_next(l); + break; + } + else + { + zpanic_at(sep, "Expected , or > in generic params"); + } + } + } + + if (generic_count > 0) + { + for (int i = 0; i < generic_count; i++) + { + register_generic(ctx, generic_params[i]); + } + } + lexer_next(l); // eat { ASTNode *methods = NULL, *tail = NULL; @@ -2987,11 +3177,6 @@ ASTNode *parse_trait(ParserContext *ctx, Lexer *l) } // Parse method signature: fn name(args...) -> ret; - // Re-use parse_function but stop at semicolon? - // Actually trait methods might have default impls later, but for now just - // signatures. Let's parse full function but body might be empty/null? Or - // simpler: just parse signature manually. - Token ft = lexer_next(l); if (ft.type != TOK_IDENT || strncmp(ft.start, "fn", 2) != 0) { @@ -3049,6 +3234,8 @@ ASTNode *parse_trait(ParserContext *ctx, Lexer *l) ASTNode *n_node = ast_create(NODE_TRAIT); n_node->trait.name = name; n_node->trait.methods = methods; + n_node->trait.generic_params = generic_params; + n_node->trait.generic_param_count = generic_count; register_trait(name); return n_node; } @@ -3097,7 +3284,26 @@ ASTNode *parse_impl(ParserContext *ctx, Lexer *l) Symbol *s = find_symbol_entry(ctx, name2); if (s && s->type_info) { - s->type_info->has_drop = 1; + s->type_info->traits.has_drop = 1; + } + else + { + // Try finding struct definition + ASTNode *def = find_struct_def(ctx, name2); + if (def && def->type_info) + { + def->type_info->traits.has_drop = 1; + } + } + } + + // Iterator: Check for "Iterable" trait implementation + else if (strcmp(name1, "Iterable") == 0) + { + Symbol *s = find_symbol_entry(ctx, name2); + if (s && s->type_info) + { + s->type_info->traits.has_iterable = 1; } else { @@ -3105,7 +3311,7 @@ ASTNode *parse_impl(ParserContext *ctx, Lexer *l) ASTNode *def = find_struct_def(ctx, name2); if (def && def->type_info) { - def->type_info->has_drop = 1; + def->type_info->traits.has_iterable = 1; } } } diff --git a/src/parser/parser_utils.c b/src/parser/parser_utils.c index df55d16..29407b5 100644 --- a/src/parser/parser_utils.c +++ b/src/parser/parser_utils.c @@ -1857,30 +1857,38 @@ void instantiate_methods(ParserContext *ctx, GenericImplTemplate *it, } // Handle generic return types in methods (e.g., Option<T> -> Option_int) - if (meth->func.ret_type && strchr(meth->func.ret_type, '_')) + if (meth->func.ret_type && + (strchr(meth->func.ret_type, '_') || strchr(meth->func.ret_type, '<'))) { - char *ret_copy = xstrdup(meth->func.ret_type); - char *underscore = strrchr(ret_copy, '_'); - if (underscore && underscore > ret_copy) - { - *underscore = '\0'; - char *template_name = ret_copy; + GenericTemplate *gt = ctx->templates; - // Check if this looks like a generic (e.g., "Option_V" or "Result_V") - GenericTemplate *gt = ctx->templates; - while (gt) + while (gt) + { + size_t tlen = strlen(gt->name); + char delim = meth->func.ret_type[tlen]; + if (strncmp(meth->func.ret_type, gt->name, tlen) == 0 && + (delim == '_' || delim == '<')) { - if (strcmp(gt->name, template_name) == 0) + // Found matching template prefix + const char *arg = meth->func.ret_type + tlen + 1; + + // Simple approach: instantiate 'Template' with 'Arg'. + // If delimited by <, we need to extract the inside. + char *clean_arg = xstrdup(arg); + if (delim == '<') { - // Found matching template, instantiate it - const char *subst_arg = unmangled_arg ? unmangled_arg : arg; - instantiate_generic(ctx, template_name, arg, subst_arg, meth->token); - break; + char *closer = strrchr(clean_arg, '>'); + if (closer) + { + *closer = 0; + } } - gt = gt->next; + + instantiate_generic(ctx, gt->name, clean_arg, clean_arg, meth->token); + free(clean_arg); } + gt = gt->next; } - free(ret_copy); } meth = meth->next; |
