Working on reducing function pollution

author: Zuhaitz Méndez Fernández de Aránguiz <zuhaitz@debian> 2026-01-16 00:19:37 +0000
committer: Zuhaitz Méndez Fernández de Aránguiz <zuhaitz@debian> 2026-01-16 00:19:37 +0000
commit: 23d18925df02157e9330c3612992e40553bb5da1 (patch)
tree: 89a893944aa7555c59b7700aa608f39680c0a120 /src/lexer/token.c
parent: 301d9582884ec7d180791e5c9c6ec649dc01ff68 (diff)
1 files changed, 315 insertions, 415 deletions
diff --git a/src/lexer/token.c b/src/lexer/token.c
index cd662d9..01e414f 100644
--- a/src/lexer/token.c
+++ b/src/lexer/token.c
@@ -1,453 +1,353 @@
 
 #include "zprep.h"
 
-void lexer_init(Lexer *l, const char *src)
-{
-    l->src = src;
-    l->pos = 0;
-    l->line = 1;
-    l->col = 1;
+void lexer_init(Lexer *l, const char *src) {
+  l->src = src;
+  l->pos = 0;
+  l->line = 1;
+  l->col = 1;
 }
 
-static int is_ident_start(char c)
-{
-    return isalpha(c) || c == '_';
-}
+static int is_ident_start(char c) { return isalpha(c) || c == '_'; }
 
-static int is_ident_char(char c)
-{
-    return isalnum(c) || c == '_';
-}
+static int is_ident_char(char c) { return isalnum(c) || c == '_'; }
 
-Token lexer_next(Lexer *l)
-{
-    const char *s = l->src + l->pos;
-    int start_line = l->line;
-    int start_col = l->col;
-
-    while (isspace(*s))
-    {
-        if (*s == '\n')
-        {
-            l->line++;
-            l->col = 1;
-        }
-        else
-        {
-            l->col++;
-        }
-        l->pos++;
-        s++;
-        start_line = l->line;
-        start_col = l->col;
-    }
-
-    // Check for EOF.
-    if (!*s)
-    {
-        return (Token){TOK_EOF, s, 0, start_line, start_col};
-    }
-
-    // C preprocessor directives.
-    if (*s == '#')
-    {
-        int len = 0;
-        while (s[len] && s[len] != '\n')
-        {
-            if (s[len] == '\\' && s[len + 1] == '\n')
-            {
-                len += 2;
-                l->line++;
-            }
-            else
-            {
-                len++;
-            }
-        }
-        l->pos += len;
+Token lexer_next(Lexer *l) {
+  const char *s = l->src + l->pos;
+  int start_line = l->line;
+  int start_col = l->col;
 
-        return (Token){TOK_PREPROC, s, len, start_line, start_col};
+  while (isspace(*s)) {
+    if (*s == '\n') {
+      l->line++;
+      l->col = 1;
+    } else {
+      l->col++;
     }
-
-    // Comments.
-    if (s[0] == '/' && s[1] == '/')
-    {
-        int len = 2;
-        while (s[len] && s[len] != '\n')
-        {
-            len++;
-        }
-        l->pos += len;
-        l->col += len;
-        return lexer_next(l);
+    l->pos++;
+    s++;
+    start_line = l->line;
+    start_col = l->col;
+  }
+
+  // Check for EOF.
+  if (!*s) {
+    return (Token){TOK_EOF, s, 0, start_line, start_col};
+  }
+
+  // C preprocessor directives.
+  if (*s == '#') {
+    int len = 0;
+    while (s[len] && s[len] != '\n') {
+      if (s[len] == '\\' && s[len + 1] == '\n') {
+        len += 2;
+        l->line++;
+      } else {
+        len++;
+      }
     }
+    l->pos += len;
 
-    // Block Comments.
-    if (s[0] == '/' && s[1] == '*')
-    {
-        // skip two start chars
+    return (Token){TOK_PREPROC, s, len, start_line, start_col};
+  }
+
+  // Comments.
+  if (s[0] == '/' && s[1] == '/') {
+    int len = 2;
+    while (s[len] && s[len] != '\n') {
+      len++;
+    }
+    l->pos += len;
+    l->col += len;
+    return lexer_next(l);
+  }
+
+  // Block Comments.
+  if (s[0] == '/' && s[1] == '*') {
+    // skip two start chars
+    l->pos += 2;
+    s += 2;
+
+    while (s[0]) {
+      // s[len+1] can be at most the null terminator
+      if (s[0] == '*' && s[1] == '/') {
+        // go over */
         l->pos += 2;
         s += 2;
-
-        while (s[0])
-        {
-            // s[len+1] can be at most the null terminator
-            if (s[0] == '*' && s[1] == '/')
-            {
-                // go over */
-                l->pos += 2;
-                s += 2;
-                break;
-            }
-
-            if (s[0] == '\n')
-            {
-                l->line++;
-                l->col = 1;
-            }
-            else
-            {
-                l->col++;
-            }
-
-            l->pos++;
-            s++;
-        }
-
-        return lexer_next(l);
+        break;
+      }
+
+      if (s[0] == '\n') {
+        l->line++;
+        l->col = 1;
+      } else {
+        l->col++;
+      }
+
+      l->pos++;
+      s++;
     }
 
-    // Identifiers.
-    if (is_ident_start(*s))
-    {
-        int len = 0;
-        while (is_ident_char(s[len]))
-        {
-            len++;
-        }
+    return lexer_next(l);
+  }
 
-        l->pos += len;
-        l->col += len;
+  // Identifiers.
+  if (is_ident_start(*s)) {
+    int len = 0;
+    while (is_ident_char(s[len])) {
+      len++;
+    }
 
-        if (len == 4 && strncmp(s, "test", 4) == 0)
-        {
-            return (Token){TOK_TEST, s, 4, start_line, start_col};
-        }
-        if (len == 6 && strncmp(s, "assert", 6) == 0)
-        {
-            return (Token){TOK_ASSERT, s, 6, start_line, start_col};
-        }
-        if (len == 6 && strncmp(s, "sizeof", 6) == 0)
-        {
-            return (Token){TOK_SIZEOF, s, 6, start_line, start_col};
-        }
-        if (len == 5 && strncmp(s, "defer", 5) == 0)
-        {
-            return (Token){TOK_DEFER, s, 5, start_line, start_col};
-        }
-        if (len == 8 && strncmp(s, "autofree", 8) == 0)
-        {
-            return (Token){TOK_AUTOFREE, s, 8, start_line, start_col};
-        }
-        if (len == 3 && strncmp(s, "use", 3) == 0)
-        {
-            return (Token){TOK_USE, s, 3, start_line, start_col};
-        }
-        if (len == 8 && strncmp(s, "comptime", 8) == 0)
-        {
-            return (Token){TOK_COMPTIME, s, 8, start_line, start_col};
-        }
-        if (len == 5 && strncmp(s, "union", 5) == 0)
-        {
-            return (Token){TOK_UNION, s, 5, start_line, start_col};
-        }
-        if (len == 3 && strncmp(s, "asm", 3) == 0)
-        {
-            return (Token){TOK_ASM, s, 3, start_line, start_col};
-        }
-        if (len == 8 && strncmp(s, "volatile", 8) == 0)
-        {
-            return (Token){TOK_VOLATILE, s, 8, start_line, start_col};
-        }
-        if (len == 3 && strncmp(s, "mut", 3) == 0)
-        {
-            return (Token){TOK_MUT, s, 3, start_line, start_col};
-        }
-        if (len == 5 && strncmp(s, "async", 5) == 0)
-        {
-            return (Token){TOK_ASYNC, s, 5, start_line, start_col};
-        }
-        if (len == 5 && strncmp(s, "await", 5) == 0)
-        {
-            return (Token){TOK_AWAIT, s, 5, start_line, start_col};
-        }
-        if (len == 3 && strncmp(s, "and", 3) == 0)
-        {
-            return (Token){TOK_AND, s, 3, start_line, start_col};
-        }
-        if (len == 2 && strncmp(s, "or", 2) == 0)
-        {
-            return (Token){TOK_OR, s, 2, start_line, start_col};
-        }
+    l->pos += len;
+    l->col += len;
 
-        // F-Strings
-        if (len == 1 && s[0] == 'f' && s[1] == '"')
-        {
-            // Reset pos/col because we want to parse string
-            l->pos -= len;
-            l->col -= len;
-        }
-        else
-        {
-            return (Token){TOK_IDENT, s, len, start_line, start_col};
-        }
+    if (len == 4 && strncmp(s, "test", 4) == 0) {
+      return (Token){TOK_TEST, s, 4, start_line, start_col};
     }
-
-    if (s[0] == 'f' && s[1] == '"')
-    {
-        int len = 2;
-        while (s[len] && s[len] != '"')
-        {
-            if (s[len] == '\\')
-            {
-                len++;
-            }
-            len++;
-        }
-        if (s[len] == '"')
-        {
-            len++;
-        }
-        l->pos += len;
-        l->col += len;
-        return (Token){TOK_FSTRING, s, len, start_line, start_col};
-    }
-
-    // Numbers
-    if (isdigit(*s))
-    {
-        int len = 0;
-        if (s[0] == '0' && (s[1] == 'x' || s[1] == 'X'))
-        {
-            len = 2;
-            while (isxdigit(s[len]))
-            {
-                len++;
-            }
-        }
-        else if (s[0] == '0' && (s[1] == 'b' || s[1] == 'B'))
-        {
-            len = 2;
-            while (s[len] == '0' || s[len] == '1')
-            {
-                len++;
-            }
-        }
-        else
-        {
-            while (isdigit(s[len]))
-            {
-                len++;
-            }
-            if (s[len] == '.')
-            {
-                if (s[len + 1] != '.')
-                {
-                    len++;
-                    while (isdigit(s[len]))
-                    {
-                        len++;
-                    }
-                    l->pos += len;
-                    l->col += len;
-                    return (Token){TOK_FLOAT, s, len, start_line, start_col};
-                }
-            }
-        }
-        l->pos += len;
-        l->col += len;
-        return (Token){TOK_INT, s, len, start_line, start_col};
-    }
-
-    // Strings
-    if (*s == '"')
-    {
-        int len = 1;
-        while (s[len] && s[len] != '"')
-        {
-            if (s[len] == '\\')
-            {
-                len++;
-            }
-            len++;
-        }
-        if (s[len] == '"')
-        {
-            len++;
-        }
-        l->pos += len;
-        l->col += len;
-        return (Token){TOK_STRING, s, len, start_line, start_col};
+    if (len == 6 && strncmp(s, "assert", 6) == 0) {
+      return (Token){TOK_ASSERT, s, 6, start_line, start_col};
+    }
+    if (len == 6 && strncmp(s, "sizeof", 6) == 0) {
+      return (Token){TOK_SIZEOF, s, 6, start_line, start_col};
+    }
+    if (len == 5 && strncmp(s, "defer", 5) == 0) {
+      return (Token){TOK_DEFER, s, 5, start_line, start_col};
+    }
+    if (len == 8 && strncmp(s, "autofree", 8) == 0) {
+      return (Token){TOK_AUTOFREE, s, 8, start_line, start_col};
+    }
+    if (len == 3 && strncmp(s, "use", 3) == 0) {
+      return (Token){TOK_USE, s, 3, start_line, start_col};
+    }
+    if (len == 8 && strncmp(s, "comptime", 8) == 0) {
+      return (Token){TOK_COMPTIME, s, 8, start_line, start_col};
+    }
+    if (len == 5 && strncmp(s, "union", 5) == 0) {
+      return (Token){TOK_UNION, s, 5, start_line, start_col};
+    }
+    if (len == 3 && strncmp(s, "asm", 3) == 0) {
+      return (Token){TOK_ASM, s, 3, start_line, start_col};
+    }
+    if (len == 8 && strncmp(s, "volatile", 8) == 0) {
+      return (Token){TOK_VOLATILE, s, 8, start_line, start_col};
+    }
+    if (len == 3 && strncmp(s, "mut", 3) == 0) {
+      return (Token){TOK_MUT, s, 3, start_line, start_col};
+    }
+    if (len == 5 && strncmp(s, "async", 5) == 0) {
+      return (Token){TOK_ASYNC, s, 5, start_line, start_col};
+    }
+    if (len == 5 && strncmp(s, "await", 5) == 0) {
+      return (Token){TOK_AWAIT, s, 5, start_line, start_col};
+    }
+    if (len == 3 && strncmp(s, "and", 3) == 0) {
+      return (Token){TOK_AND, s, 3, start_line, start_col};
+    }
+    if (len == 2 && strncmp(s, "or", 2) == 0) {
+      return (Token){TOK_OR, s, 2, start_line, start_col};
     }
 
-    if (*s == '\'')
-    {
-        int len = 1;
-        // Handle escapes like '\n' or regular 'a'
-        if (s[len] == '\\')
-        {
-            len++;
-            len++;
-        }
-        else
-        {
-            len++;
-        }
-        if (s[len] == '\'')
-        {
+    // F-Strings
+    if (len == 1 && s[0] == 'f' && s[1] == '"') {
+      // Reset pos/col because we want to parse string
+      l->pos -= len;
+      l->col -= len;
+    } else {
+      return (Token){TOK_IDENT, s, len, start_line, start_col};
+    }
+  }
+
+  if (s[0] == 'f' && s[1] == '"') {
+    int len = 2;
+    while (s[len] && s[len] != '"') {
+      if (s[len] == '\\') {
+        len++;
+      }
+      len++;
+    }
+    if (s[len] == '"') {
+      len++;
+    }
+    l->pos += len;
+    l->col += len;
+    return (Token){TOK_FSTRING, s, len, start_line, start_col};
+  }
+
+  // Numbers
+  if (isdigit(*s)) {
+    int len = 0;
+    if (s[0] == '0' && (s[1] == 'x' || s[1] == 'X')) {
+      len = 2;
+      while (isxdigit(s[len])) {
+        len++;
+      }
+    } else if (s[0] == '0' && (s[1] == 'b' || s[1] == 'B')) {
+      len = 2;
+      while (s[len] == '0' || s[len] == '1') {
+        len++;
+      }
+    } else {
+      while (isdigit(s[len])) {
+        len++;
+      }
+      if (s[len] == '.') {
+        if (s[len + 1] != '.') {
+          len++;
+          while (isdigit(s[len])) {
             len++;
+          }
+          l->pos += len;
+          l->col += len;
+          return (Token){TOK_FLOAT, s, len, start_line, start_col};
         }
-
-        l->pos += len;
-        l->col += len;
-        return (Token){TOK_CHAR, s, len, start_line, start_col};
+      }
     }
+    l->pos += len;
+    l->col += len;
+    return (Token){TOK_INT, s, len, start_line, start_col};
+  }
 
-    // Operators.
+  // Strings
+  if (*s == '"') {
     int len = 1;
-    TokenType type = TOK_OP;
-
-    if (s[0] == '?' && s[1] == '.')
-    {
-        len = 2;
-        type = TOK_Q_DOT;
-    }
-    else if (s[0] == '?' && s[1] == '?')
-    {
-        if (s[2] == '=')
-        {
-            len = 3;
-            type = TOK_QQ_EQ;
-        }
-        else
-        {
-            len = 2;
-            type = TOK_QQ;
-        }
-    }
-    else if (*s == '?')
-    {
-        type = TOK_QUESTION;
-    }
-    else if (s[0] == '|' && s[1] == '>')
-    {
-        len = 2;
-        type = TOK_PIPE;
-    }
-    else if (s[0] == ':' && s[1] == ':')
-    {
-        len = 2;
-        type = TOK_DCOLON;
-    }
-    else if (s[0] == '.' && s[1] == '.' && s[2] == '.')
-    {
-        len = 3;
-        type = TOK_ELLIPSIS;
-    }
-    else if (s[0] == '.' && s[1] == '.')
-    {
-        len = 2;
-        type = TOK_DOTDOT;
-    }
-    else if ((s[0] == '-' && s[1] == '>') || (s[0] == '=' && s[1] == '>'))
-    {
-        len = 2;
-        type = TOK_ARROW;
-    }
-
-    else if ((s[0] == '<' && s[1] == '<') || (s[0] == '>' && s[1] == '>'))
-    {
-        len = 2;
-        if (s[2] == '=')
-        {
-            len = 3; // Handle <<= and >>=
-        }
+    while (s[len] && s[len] != '"') {
+      if (s[len] == '\\') {
+        len++;
+      }
+      len++;
     }
-    else if ((s[0] == '&' && s[1] == '&') || (s[0] == '|' && s[1] == '|') ||
-             (s[0] == '+' && s[1] == '+') || (s[0] == '-' && s[1] == '-'))
-    {
-        len = 2;
-    }
-    else if (s[1] == '=')
-    {
-        // This catches: == != <= >= += -= *= /= %= |= &= ^=
-        if (strchr("=!<>+-*/%|&^", s[0]))
-        {
-            len = 2;
-        }
+    if (s[len] == '"') {
+      len++;
     }
+    l->pos += len;
+    l->col += len;
+    return (Token){TOK_STRING, s, len, start_line, start_col};
+  }
 
-    else
-    {
-        switch (*s)
-        {
-
-        case '(':
-            type = TOK_LPAREN;
-            break;
-        case ')':
-            type = TOK_RPAREN;
-            break;
-        case '{':
-            type = TOK_LBRACE;
-            break;
-        case '}':
-            type = TOK_RBRACE;
-            break;
-        case '[':
-            type = TOK_LBRACKET;
-            break;
-        case ']':
-            type = TOK_RBRACKET;
-            break;
-        case '<':
-            type = TOK_LANGLE;
-            break;
-        case '>':
-            type = TOK_RANGLE;
-            break;
-        case ',':
-            type = TOK_COMMA;
-            break;
-        case ':':
-            type = TOK_COLON;
-            break;
-        case ';':
-            type = TOK_SEMICOLON;
-            break;
-        case '@':
-            type = TOK_AT;
-            break;
-        default:
-            type = TOK_OP;
-            break;
-        }
+  if (*s == '\'') {
+    int len = 1;
+    // Handle escapes like '\n' or regular 'a'
+    if (s[len] == '\\') {
+      len++;
+      len++;
+    } else {
+      len++;
+    }
+    if (s[len] == '\'') {
+      len++;
     }
 
     l->pos += len;
     l->col += len;
-    return (Token){type, s, len, start_line, start_col};
+    return (Token){TOK_CHAR, s, len, start_line, start_col};
+  }
+
+  // Operators.
+  int len = 1;
+  TokenType type = TOK_OP;
+
+  if (s[0] == '?' && s[1] == '.') {
+    len = 2;
+    type = TOK_Q_DOT;
+  } else if (s[0] == '?' && s[1] == '?') {
+    if (s[2] == '=') {
+      len = 3;
+      type = TOK_QQ_EQ;
+    } else {
+      len = 2;
+      type = TOK_QQ;
+    }
+  } else if (*s == '?') {
+    type = TOK_QUESTION;
+  } else if (s[0] == '|' && s[1] == '>') {
+    len = 2;
+    type = TOK_PIPE;
+  } else if (s[0] == ':' && s[1] == ':') {
+    len = 2;
+    type = TOK_DCOLON;
+  } else if (s[0] == '.' && s[1] == '.' && s[2] == '.') {
+    len = 3;
+    type = TOK_ELLIPSIS;
+  } else if (s[0] == '.' && s[1] == '.') {
+    len = 2;
+    type = TOK_DOTDOT;
+  } else if ((s[0] == '-' && s[1] == '>') || (s[0] == '=' && s[1] == '>')) {
+    len = 2;
+    type = TOK_ARROW;
+  }
+
+  else if ((s[0] == '<' && s[1] == '<') || (s[0] == '>' && s[1] == '>')) {
+    len = 2;
+    if (s[2] == '=') {
+      len = 3; // Handle <<= and >>=
+    }
+  } else if ((s[0] == '&' && s[1] == '&') || (s[0] == '|' && s[1] == '|') ||
+             (s[0] == '+' && s[1] == '+') || (s[0] == '-' && s[1] == '-')) {
+    len = 2;
+  } else if (s[1] == '=') {
+    // This catches: == != <= >= += -= *= /= %= |= &= ^=
+    if (strchr("=!<>+-*/%|&^", s[0])) {
+      len = 2;
+    }
+  }
+
+  else {
+    switch (*s) {
+
+    case '(':
+      type = TOK_LPAREN;
+      break;
+    case ')':
+      type = TOK_RPAREN;
+      break;
+    case '{':
+      type = TOK_LBRACE;
+      break;
+    case '}':
+      type = TOK_RBRACE;
+      break;
+    case '[':
+      type = TOK_LBRACKET;
+      break;
+    case ']':
+      type = TOK_RBRACKET;
+      break;
+    case '<':
+      type = TOK_LANGLE;
+      break;
+    case '>':
+      type = TOK_RANGLE;
+      break;
+    case ',':
+      type = TOK_COMMA;
+      break;
+    case ':':
+      type = TOK_COLON;
+      break;
+    case ';':
+      type = TOK_SEMICOLON;
+      break;
+    case '@':
+      type = TOK_AT;
+      break;
+    default:
+      type = TOK_OP;
+      break;
+    }
+  }
+
+  l->pos += len;
+  l->col += len;
+  return (Token){type, s, len, start_line, start_col};
 }
 
-Token lexer_peek(Lexer *l)
-{
-    Lexer saved = *l;
-    return lexer_next(&saved);
+Token lexer_peek(Lexer *l) {
+  Lexer saved = *l;
+  return lexer_next(&saved);
 }
 
-Token lexer_peek2(Lexer *l)
-{
-    Lexer saved = *l;
-    lexer_next(&saved);
-    return lexer_next(&saved);
+Token lexer_peek2(Lexer *l) {
+  Lexer saved = *l;
+  lexer_next(&saved);
+  return lexer_next(&saved);
 }
author	Zuhaitz Méndez Fernández de Aránguiz <zuhaitz@debian>	2026-01-16 00:19:37 +0000
committer	Zuhaitz Méndez Fernández de Aránguiz <zuhaitz@debian>	2026-01-16 00:19:37 +0000
commit	23d18925df02157e9330c3612992e40553bb5da1 (patch)
tree	89a893944aa7555c59b7700aa608f39680c0a120 /src/lexer/token.c
parent	301d9582884ec7d180791e5c9c6ec649dc01ff68 (diff)