Added src/ folder. Now I will add the rest.

author: Zuhaitz Méndez Fernández de Aránguiz <zuhaitz@debian> 2026-01-11 15:11:00 +0000
committer: Zuhaitz Méndez Fernández de Aránguiz <zuhaitz@debian> 2026-01-11 15:11:00 +0000
commit: 55247a3f12a9eee7ba3fd7ca6d8fcea7a82c20f3 (patch)
tree: a2a71e2eb8ca0b2c483518c1902d89d18709c9ab /src/lexer/token.c
parent: 2e7abed7cfe84a2c0df371cde35f8f68cfdca16c (diff)
1 files changed, 414 insertions, 0 deletions
diff --git a/src/lexer/token.c b/src/lexer/token.c
new file mode 100644
index 0000000..ebed001
--- /dev/null
+++ b/src/lexer/token.c
@@ -0,0 +1,414 @@
+
+#include "zprep.h"
+
+void lexer_init(Lexer *l, const char *src)
+{
+    l->src = src;
+    l->pos = 0;
+    l->line = 1;
+    l->col = 1;
+}
+
+static int is_ident_start(char c)
+{
+    return isalpha(c) || c == '_';
+}
+
+static int is_ident_char(char c)
+{
+    return isalnum(c) || c == '_';
+}
+
+Token lexer_next(Lexer *l)
+{
+    const char *s = l->src + l->pos;
+    int start_line = l->line;
+    int start_col = l->col;
+
+    while (isspace(*s))
+    {
+        if (*s == '\n')
+        {
+            l->line++;
+            l->col = 1;
+        }
+        else
+        {
+            l->col++;
+        }
+        l->pos++;
+        s++;
+        start_line = l->line;
+        start_col = l->col;
+    }
+
+    // Check for EOF.
+    if (!*s)
+    {
+        return (Token){TOK_EOF, s, 0, start_line, start_col};
+    }
+
+    // C preprocessor directives.
+    if (*s == '#')
+    {
+        int len = 0;
+        while (s[len] && s[len] != '\n')
+        {
+            if (s[len] == '\\' && s[len + 1] == '\n')
+            {
+                len += 2;
+                l->line++;
+            }
+            else
+            {
+                len++;
+            }
+        }
+        l->pos += len;
+
+        return (Token){TOK_PREPROC, s, len, start_line, start_col};
+    }
+
+    // Comments.
+    if (s[0] == '/' && s[1] == '/')
+    {
+        int len = 2;
+        while (s[len] && s[len] != '\n')
+        {
+            len++;
+        }
+        l->pos += len;
+        l->col += len;
+        return (Token){TOK_COMMENT, s, len, start_line, start_col};
+    }
+
+    // Identifiers.
+    if (is_ident_start(*s))
+    {
+        int len = 0;
+        while (is_ident_char(s[len]))
+        {
+            len++;
+        }
+
+        l->pos += len;
+        l->col += len;
+
+        if (len == 4 && strncmp(s, "test", 4) == 0)
+        {
+            return (Token){TOK_TEST, s, 4, start_line, start_col};
+        }
+        if (len == 6 && strncmp(s, "assert", 6) == 0)
+        {
+            return (Token){TOK_ASSERT, s, 6, start_line, start_col};
+        }
+        if (len == 6 && strncmp(s, "sizeof", 6) == 0)
+        {
+            return (Token){TOK_SIZEOF, s, 6, start_line, start_col};
+        }
+        if (len == 5 && strncmp(s, "defer", 5) == 0)
+        {
+            return (Token){TOK_DEFER, s, 5, start_line, start_col};
+        }
+        if (len == 8 && strncmp(s, "autofree", 8) == 0)
+        {
+            return (Token){TOK_AUTOFREE, s, 8, start_line, start_col};
+        }
+        if (len == 3 && strncmp(s, "use", 3) == 0)
+        {
+            return (Token){TOK_USE, s, 3, start_line, start_col};
+        }
+        if (len == 3 && strncmp(s, "use", 3) == 0)
+        {
+            return (Token){TOK_USE, s, 3, start_line, start_col};
+        }
+        if (len == 8 && strncmp(s, "comptime", 8) == 0)
+        {
+            return (Token){TOK_COMPTIME, s, 8, start_line, start_col};
+        }
+        if (len == 5 && strncmp(s, "union", 5) == 0)
+        {
+            return (Token){TOK_UNION, s, 5, start_line, start_col};
+        }
+        if (len == 3 && strncmp(s, "asm", 3) == 0)
+        {
+            return (Token){TOK_ASM, s, 3, start_line, start_col};
+        }
+        if (len == 8 && strncmp(s, "volatile", 8) == 0)
+        {
+            return (Token){TOK_VOLATILE, s, 8, start_line, start_col};
+        }
+        if (len == 3 && strncmp(s, "mut", 3) == 0)
+        {
+            return (Token){TOK_MUT, s, 3, start_line, start_col};
+        }
+        if (len == 5 && strncmp(s, "async", 5) == 0)
+        {
+            return (Token){TOK_ASYNC, s, 5, start_line, start_col};
+        }
+        if (len == 5 && strncmp(s, "await", 5) == 0)
+        {
+            return (Token){TOK_AWAIT, s, 5, start_line, start_col};
+        }
+
+        // F-Strings
+        if (len == 1 && s[0] == 'f' && s[1] == '"')
+        {
+            // Reset pos/col because we want to parse string
+            l->pos -= len;
+            l->col -= len;
+        }
+        else
+        {
+            return (Token){TOK_IDENT, s, len, start_line, start_col};
+        }
+    }
+
+    if (s[0] == 'f' && s[1] == '"')
+    {
+        int len = 2;
+        while (s[len] && s[len] != '"')
+        {
+            if (s[len] == '\\')
+            {
+                len++;
+            }
+            len++;
+        }
+        if (s[len] == '"')
+        {
+            len++;
+        }
+        l->pos += len;
+        l->col += len;
+        return (Token){TOK_FSTRING, s, len, start_line, start_col};
+    }
+
+    // Numbers
+    if (isdigit(*s))
+    {
+        int len = 0;
+        if (s[0] == '0' && (s[1] == 'x' || s[1] == 'X'))
+        {
+            len = 2;
+            while (isxdigit(s[len]))
+            {
+                len++;
+            }
+        }
+        else if (s[0] == '0' && (s[1] == 'b' || s[1] == 'B'))
+        {
+            len = 2;
+            while (s[len] == '0' || s[len] == '1')
+            {
+                len++;
+            }
+        }
+        else
+        {
+            while (isdigit(s[len]))
+            {
+                len++;
+            }
+            if (s[len] == '.')
+            {
+                if (s[len + 1] != '.')
+                {
+                    len++;
+                    while (isdigit(s[len]))
+                    {
+                        len++;
+                    }
+                    l->pos += len;
+                    l->col += len;
+                    return (Token){TOK_FLOAT, s, len, start_line, start_col};
+                }
+            }
+        }
+        l->pos += len;
+        l->col += len;
+        return (Token){TOK_INT, s, len, start_line, start_col};
+    }
+
+    // Strings
+    if (*s == '"')
+    {
+        int len = 1;
+        while (s[len] && s[len] != '"')
+        {
+            if (s[len] == '\\')
+            {
+                len++;
+            }
+            len++;
+        }
+        if (s[len] == '"')
+        {
+            len++;
+        }
+        l->pos += len;
+        l->col += len;
+        return (Token){TOK_STRING, s, len, start_line, start_col};
+    }
+
+    if (*s == '\'')
+    {
+        int len = 1;
+        // Handle escapes like '\n' or regular 'a'
+        if (s[len] == '\\')
+        {
+            len++;
+            len++;
+        }
+        else
+        {
+            len++;
+        }
+        if (s[len] == '\'')
+        {
+            len++;
+        }
+
+        l->pos += len;
+        l->col += len;
+        return (Token){TOK_CHAR, s, len, start_line, start_col};
+    }
+
+    // Operators.
+    int len = 1;
+    TokenType type = TOK_OP;
+
+    if (s[0] == '?' && s[1] == '.')
+    {
+        len = 2;
+        type = TOK_Q_DOT;
+    }
+    else if (s[0] == '?' && s[1] == '?')
+    {
+        if (s[2] == '=')
+        {
+            len = 3;
+            type = TOK_QQ_EQ;
+        }
+        else
+        {
+            len = 2;
+            type = TOK_QQ;
+        }
+    }
+    else if (*s == '?')
+    {
+        type = TOK_QUESTION;
+    }
+    else if (s[0] == '|' && s[1] == '>')
+    {
+        len = 2;
+        type = TOK_PIPE;
+    }
+    else if (s[0] == ':' && s[1] == ':')
+    {
+        len = 2;
+        type = TOK_DCOLON;
+    }
+    else if (s[0] == '.' && s[1] == '.' && s[2] == '.')
+    {
+        len = 3;
+        type = TOK_ELLIPSIS;
+    }
+    else if (s[0] == '.' && s[1] == '.')
+    {
+        len = 2;
+        type = TOK_DOTDOT;
+    }
+    else if ((s[0] == '-' && s[1] == '>') || (s[0] == '=' && s[1] == '>'))
+    {
+        len = 2;
+        type = TOK_ARROW;
+    }
+
+    else if ((s[0] == '<' && s[1] == '<') || (s[0] == '>' && s[1] == '>'))
+    {
+        len = 2;
+        if (s[2] == '=')
+        {
+            len = 3; // Handle <<= and >>=
+        }
+    }
+    else if ((s[0] == '&' && s[1] == '&') || (s[0] == '|' && s[1] == '|') ||
+             (s[0] == '+' && s[1] == '+') || (s[0] == '-' && s[1] == '-'))
+    {
+        len = 2;
+    }
+    else if (s[1] == '=')
+    {
+        // This catches: == != <= >= += -= *= /= %= |= &= ^=
+        if (strchr("=!<>+-*/%|&^", s[0]))
+        {
+            len = 2;
+        }
+    }
+
+    else
+    {
+        switch (*s)
+        {
+
+        case '(':
+            type = TOK_LPAREN;
+            break;
+        case ')':
+            type = TOK_RPAREN;
+            break;
+        case '{':
+            type = TOK_LBRACE;
+            break;
+        case '}':
+            type = TOK_RBRACE;
+            break;
+        case '[':
+            type = TOK_LBRACKET;
+            break;
+        case ']':
+            type = TOK_RBRACKET;
+            break;
+        case '<':
+            type = TOK_LANGLE;
+            break;
+        case '>':
+            type = TOK_RANGLE;
+            break;
+        case ',':
+            type = TOK_COMMA;
+            break;
+        case ':':
+            type = TOK_COLON;
+            break;
+        case ';':
+            type = TOK_SEMICOLON;
+            break;
+        case '@':
+            type = TOK_AT;
+            break;
+        default:
+            type = TOK_OP;
+            break;
+        }
+    }
+
+    l->pos += len;
+    l->col += len;
+    return (Token){type, s, len, start_line, start_col};
+}
+
+Token lexer_peek(Lexer *l)
+{
+    Lexer saved = *l;
+    return lexer_next(&saved);
+}
+
+Token lexer_peek2(Lexer *l)
+{
+    Lexer saved = *l;
+    lexer_next(&saved);
+    return lexer_next(&saved);
+}
author	Zuhaitz Méndez Fernández de Aránguiz <zuhaitz@debian>	2026-01-11 15:11:00 +0000
committer	Zuhaitz Méndez Fernández de Aránguiz <zuhaitz@debian>	2026-01-11 15:11:00 +0000
commit	55247a3f12a9eee7ba3fd7ca6d8fcea7a82c20f3 (patch)
tree	a2a71e2eb8ca0b2c483518c1902d89d18709c9ab /src/lexer/token.c
parent	2e7abed7cfe84a2c0df371cde35f8f68cfdca16c (diff)