#include "zprep.h"

void lexer_init(Lexer *l, const char *src)
{
    l->src = src;
    l->pos = 0;
    l->line = 1;
    l->col = 1;
}

static int is_ident_start(char c)
{
    return isalpha(c) || c == '_';
}

static int is_ident_char(char c)
{
    return isalnum(c) || c == '_';
}

Token lexer_next(Lexer *l)
{
    const char *s = l->src + l->pos;
    int start_line = l->line;
    int start_col = l->col;

    while (isspace(*s))
    {
        if (*s == '\n')
        {
            l->line++;
            l->col = 1;
        }
        else
        {
            l->col++;
        }
        l->pos++;
        s++;
        start_line = l->line;
        start_col = l->col;
    }

    // Check for EOF.
    if (!*s)
    {
        return (Token){TOK_EOF, s, 0, start_line, start_col};
    }

    // C preprocessor directives.
    if (*s == '#')
    {
        int len = 0;
        while (s[len] && s[len] != '\n')
        {
            if (s[len] == '\\' && s[len + 1] == '\n')
            {
                len += 2;
                l->line++;
            }
            else
            {
                len++;
            }
        }
        l->pos += len;

        return (Token){TOK_PREPROC, s, len, start_line, start_col};
    }

    // Comments.
    if (s[0] == '/' && s[1] == '/')
    {
        int len = 2;
        while (s[len] && s[len] != '\n')
        {
            len++;
        }
        l->pos += len;
        l->col += len;
        return lexer_next(l);
    }

    // Block Comments.
    if (s[0] == '/' && s[1] == '*')
    {
        // skip two start chars
        l->pos += 2;
        s += 2;

        while (s[0])
        {
            // s[len+1] can be at most the null terminator
            if (s[0] == '*' && s[1] == '/')
            {
                // go over */
                l->pos += 2;
                s += 2;
                break;
            }

            if (s[0] == '\n')
            {
                l->line++;
                l->col = 1;
            }
            else
            {
                l->col++;
            }

            l->pos++;
            s++;
        }

        return lexer_next(l);
    }

    // Identifiers.
    if (is_ident_start(*s))
    {
        int len = 0;
        while (is_ident_char(s[len]))
        {
            len++;
        }

        l->pos += len;
        l->col += len;

        if (len == 4 && strncmp(s, "test", 4) == 0)
        {
            return (Token){TOK_TEST, s, 4, start_line, start_col};
        }
        if (len == 6 && strncmp(s, "assert", 6) == 0)
        {
            return (Token){TOK_ASSERT, s, 6, start_line, start_col};
        }
        if (len == 6 && strncmp(s, "sizeof", 6) == 0)
        {
            return (Token){TOK_SIZEOF, s, 6, start_line, start_col};
        }
        if (len == 5 && strncmp(s, "defer", 5) == 0)
        {
            return (Token){TOK_DEFER, s, 5, start_line, start_col};
        }
        if (len == 3 && strncmp(s, "def", 3) == 0)
        {
            return (Token){TOK_DEF, s, 3, start_line, start_col};
        }
        if (len == 8 && strncmp(s, "autofree", 8) == 0)
        {
            return (Token){TOK_AUTOFREE, s, 8, start_line, start_col};
        }
        if (len == 5 && strncmp(s, "alias", 5) == 0)
        {
            return (Token){TOK_ALIAS, s, 5, start_line, start_col};
        }
        if (len == 3 && strncmp(s, "use", 3) == 0)
        {
            return (Token){TOK_USE, s, 3, start_line, start_col};
        }
        if (len == 8 && strncmp(s, "comptime", 8) == 0)
        {
            return (Token){TOK_COMPTIME, s, 8, start_line, start_col};
        }
        if (len == 5 && strncmp(s, "union", 5) == 0)
        {
            return (Token){TOK_UNION, s, 5, start_line, start_col};
        }
        if (len == 3 && strncmp(s, "asm", 3) == 0)
        {
            return (Token){TOK_ASM, s, 3, start_line, start_col};
        }
        if (len == 8 && strncmp(s, "volatile", 8) == 0)
        {
            return (Token){TOK_VOLATILE, s, 8, start_line, start_col};
        }
        if (len == 5 && strncmp(s, "async", 5) == 0)
        {
            return (Token){TOK_ASYNC, s, 5, start_line, start_col};
        }
        if (len == 5 && strncmp(s, "await", 5) == 0)
        {
            return (Token){TOK_AWAIT, s, 5, start_line, start_col};
        }
        if (len == 3 && strncmp(s, "and", 3) == 0)
        {
            return (Token){TOK_AND, s, 3, start_line, start_col};
        }
        if (len == 2 && strncmp(s, "or", 2) == 0)
        {
            return (Token){TOK_OR, s, 2, start_line, start_col};
        }

        // F-Strings
        if (len == 1 && s[0] == 'f' && s[1] == '"')
        {
            // Reset pos/col because we want to parse string
            l->pos -= len;
            l->col -= len;
        }
        else
        {
            return (Token){TOK_IDENT, s, len, start_line, start_col};
        }
    }

    if (s[0] == 'f' && s[1] == '"')
    {
        int len = 2;
        while (s[len] && s[len] != '"')
        {
            if (s[len] == '\\')
            {
                len++;
            }
            len++;
        }
        if (s[len] == '"')
        {
            len++;
        }
        l->pos += len;
        l->col += len;
        return (Token){TOK_FSTRING, s, len, start_line, start_col};
    }

    // Numbers
    if (isdigit(*s))
    {
        int len = 0;
        if (s[0] == '0' && (s[1] == 'x' || s[1] == 'X'))
        {
            len = 2;
            while (isxdigit(s[len]))
            {
                len++;
            }
        }
        else if (s[0] == '0' && (s[1] == 'b' || s[1] == 'B'))
        {
            len = 2;
            while (s[len] == '0' || s[len] == '1')
            {
                len++;
            }
        }
        else
        {
            while (isdigit(s[len]))
            {
                len++;
            }
            if (s[len] == '.')
            {
                if (s[len + 1] != '.')
                {
                    len++;
                    while (isdigit(s[len]))
                    {
                        len++;
                    }
                    // Consume float suffix (e.g. 1.0f)
                    if (is_ident_start(s[len]))
                    {
                        while (is_ident_char(s[len]))
                        {
                            len++;
                        }
                    }
                    l->pos += len;
                    l->col += len;
                    return (Token){TOK_FLOAT, s, len, start_line, start_col};
                }
            }
        }

        // Consume integer suffix (e.g. 1u, 100u64, 1L)
        if (is_ident_start(s[len]))
        {
            while (is_ident_char(s[len]))
            {
                len++;
            }
        }

        l->pos += len;
        l->col += len;
        return (Token){TOK_INT, s, len, start_line, start_col};
    }

    // Strings
    if (*s == '"')
    {
        int len = 1;
        while (s[len] && s[len] != '"')
        {
            if (s[len] == '\\')
            {
                len++;
            }
            len++;
        }
        if (s[len] == '"')
        {
            len++;
        }
        l->pos += len;
        l->col += len;
        return (Token){TOK_STRING, s, len, start_line, start_col};
    }

    if (*s == '\'')
    {
        int len = 1;
        // Handle escapes like '\n' or regular 'a'
        if (s[len] == '\\')
        {
            len++;
            len++;
        }
        else
        {
            len++;
        }
        if (s[len] == '\'')
        {
            len++;
        }

        l->pos += len;
        l->col += len;
        return (Token){TOK_CHAR, s, len, start_line, start_col};
    }

    // Operators.
    int len = 1;
    TokenType type = TOK_OP;

    if (s[0] == '?' && s[1] == '.')
    {
        len = 2;
        type = TOK_Q_DOT;
    }
    else if (s[0] == '?' && s[1] == '?')
    {
        if (s[2] == '=')
        {
            len = 3;
            type = TOK_QQ_EQ;
        }
        else
        {
            len = 2;
            type = TOK_QQ;
        }
    }
    else if (*s == '?')
    {
        type = TOK_QUESTION;
    }
    else if (s[0] == '|' && s[1] == '>')
    {
        len = 2;
        type = TOK_PIPE;
    }
    else if (s[0] == ':' && s[1] == ':')
    {
        len = 2;
        type = TOK_DCOLON;
    }
    else if (s[0] == '.' && s[1] == '.' && s[2] == '.')
    {
        len = 3;
        type = TOK_ELLIPSIS;
    }
    else if (s[0] == '.' && s[1] == '.')
    {
        if (s[2] == '=')
        {
            len = 3;
            type = TOK_DOTDOT_EQ;
        }
        else if (s[2] == '<')
        {
            len = 3;
            type = TOK_DOTDOT_LT;
        }
        else
        {
            len = 2;
            type = TOK_DOTDOT;
        }
    }
    else if ((s[0] == '-' && s[1] == '>') || (s[0] == '=' && s[1] == '>'))
    {
        len = 2;
        type = TOK_ARROW;
    }

    else if ((s[0] == '<' && s[1] == '<') || (s[0] == '>' && s[1] == '>'))
    {
        len = 2;
        if (s[2] == '=')
        {
            len = 3; // Handle <<= and >>=
        }
    }
    else if ((s[0] == '&' && s[1] == '&') || (s[0] == '|' && s[1] == '|') ||
             (s[0] == '+' && s[1] == '+') || (s[0] == '-' && s[1] == '-'))
    {
        len = 2;
    }
    else if (s[1] == '=')
    {
        // This catches: == != <= >= += -= *= /= %= |= &= ^=
        if (strchr("=!<>+-*/%|&^", s[0]))
        {
            len = 2;
        }
    }

    else
    {
        switch (*s)
        {

        case '(':
            type = TOK_LPAREN;
            break;
        case ')':
            type = TOK_RPAREN;
            break;
        case '{':
            type = TOK_LBRACE;
            break;
        case '}':
            type = TOK_RBRACE;
            break;
        case '[':
            type = TOK_LBRACKET;
            break;
        case ']':
            type = TOK_RBRACKET;
            break;
        case '<':
            type = TOK_LANGLE;
            break;
        case '>':
            type = TOK_RANGLE;
            break;
        case ',':
            type = TOK_COMMA;
            break;
        case ':':
            type = TOK_COLON;
            break;
        case ';':
            type = TOK_SEMICOLON;
            break;
        case '@':
            type = TOK_AT;
            break;
        default:
            type = TOK_OP;
            break;
        }
    }

    l->pos += len;
    l->col += len;
    return (Token){type, s, len, start_line, start_col};
}

Token lexer_peek(Lexer *l)
{
    Lexer saved = *l;
    return lexer_next(&saved);
}

Token lexer_peek2(Lexer *l)
{
    Lexer saved = *l;
    lexer_next(&saved);
    return lexer_next(&saved);
}