3 files changed, 386 insertions, 0 deletions
diff --git a/std.zc b/std.zc
index 4793c11..3dcc45a 100644
--- a/std.zc
+++ b/std.zc
@@ -18,5 +18,6 @@ import "./std/stack.zc"
 import "./std/queue.zc"
 import "./std/env.zc"
 import "./std/slice.zc"
+import "./std/regex.zc"
 import "./std/process.zc"
 
diff --git a/std/regex.zc b/std/regex.zc
new file mode 100644
index 0000000..f64b36e
--- /dev/null
+++ b/std/regex.zc
@@ -0,0 +1,198 @@
+include <regex.h>
+
+import "./core.zc"
+import "./string.zc"
+import "./vec.zc"
+import "./option.zc"
+
+struct Match {
+    text: char*;
+    start: int;
+    len: int;
+}
+
+impl Match {
+    fn new(text: char*, start: int, len: int) -> Match {
+        return Match { text: text, start: start, len: len };
+    }
+
+    fn as_string(self) -> char* {
+        return self.text;
+    }
+
+    fn end(self) -> int {
+        return self.start + self.len;
+    }
+}
+
+struct Regex {
+    preg: void*;
+    pattern: char*;
+    flags: int;
+}
+
+impl Regex {
+    fn compile(pattern: char*) -> Regex {
+        return Regex::compile_with_flags(pattern, 1 | 2);
+    }
+
+    fn compile_with_flags(pattern: char*, flags: int) -> Regex {
+        let preg = malloc(1024);
+        let status = regcomp(preg, pattern, flags);
+        if (status != 0) {
+            free(preg);
+            return Regex { preg: 0, pattern: 0, flags: flags };
+        }
+        return Regex { preg: preg, pattern: pattern, flags: flags };
+    }
+
+    fn is_valid(self) -> bool {
+        return self.preg != 0;
+    }
+
+    fn match(self, text: char*) -> bool {
+        if (self.preg == 0) { return false; }
+        return regexec(self.preg, text, 0, 0, 0) == 0;
+    }
+
+    fn match_full(self, text: char*) -> bool {
+        return self.match(text);
+    }
+
+    fn match_at(self, text: char*, offset: int) -> bool {
+        if (self.preg == 0) { return false; }
+        let len = strlen(text);
+        if (offset < 0 || offset > len) { return false; }
+        return regexec(self.preg, text + offset, 0, 0, 0) == 0;
+    }
+
+    fn is_match(self, text: char*) -> bool {
+        return self.match(text);
+    }
+
+    fn find(self, text: char*) -> Option<Match> {
+        if (self.preg == 0) { return Option<Match>::None(); }
+        let t_len = strlen(text);
+        for (let i = 0; i <= t_len; i = i + 1) {
+            let sub = text + i;
+            if (regexec(self.preg, sub, 0, 0, 0) == 0) {
+                let j = 0;
+                while (text[i + j] != 0 && regexec(self.preg, sub, 0, 0, 0) == 0) {
+                    j = j + 1;
+                    sub = text + i + j;
+                }
+                return Option<Match>::Some(Match::new(text + i, i, j));
+            }
+        }
+        return Option<Match>::None();
+    }
+
+    fn find_at(self, text: char*, start: int) -> Option<Match> {
+        let len = strlen(text);
+        if (start < 0 || start >= len) {
+            return Option<Match>::None();
+        }
+        return self.find(text + start);
+    }
+
+    fn count(self, text: char*) -> int {
+        if (self.preg == 0) { return 0; }
+        let count = 0;
+        let pos = 0;
+        let t_len = strlen(text);
+        while (pos < t_len) {
+            let sub = text + pos;
+            if (regexec(self.preg, sub, 0, 0, 0) == 0) {
+                count = count + 1;
+                pos = pos + 1;
+            } else {
+                break;
+            }
+        }
+        return count;
+    }
+
+    fn split(self, text: char*) -> Vec<String> {
+        let parts = Vec<String>::new();
+        if (self.preg == 0) {
+            parts.push(String::from(text));
+            return parts;
+        }
+        let t_len = strlen(text);
+        let last_pos = 0;
+        let pos = 0;
+        while (pos < t_len) {
+            let sub = text + pos;
+            if (regexec(self.preg, sub, 0, 0, 0) == 0) {
+                if (pos > last_pos) {
+                    let before = text + last_pos;
+                    let part_len = pos - last_pos;
+                    let v = Vec<char>::new();
+                    for (let i = 0; i < part_len; i = i + 1) {
+                        v.push(before[i]);
+                    }
+                    v.push(0);
+                    parts.push(String { vec: v });
+                }
+                last_pos = pos + 1;
+                pos = pos + 1;
+            } else {
+                pos = pos + 1;
+            }
+        }
+        if (last_pos < t_len) {
+            parts.push(String::from(text + last_pos));
+        }
+        return parts;
+    }
+
+    fn pattern(self) -> char* {
+        return self.pattern;
+    }
+
+    fn flags(self) -> int {
+        return self.flags;
+    }
+
+    fn is_valid_pattern(pattern: char*) -> bool {
+        let test_regex = Regex::compile(pattern);
+        let valid = test_regex.is_valid();
+        test_regex.destroy();
+        return valid;
+    }
+
+    fn destroy(self) {
+        if (self.preg != 0) {
+            regfree(self.preg);
+            free(self.preg);
+        }
+    }
+}
+
+fn regex_match(pattern: char*, text: char*) -> bool {
+    let re = Regex::compile(pattern);
+    let result = re.match(text);
+    re.destroy();
+    return result;
+}
+
+fn regex_find(pattern: char*, text: char*) -> Option<Match> {
+    let re = Regex::compile(pattern);
+    let result = re.find(text);
+    re.destroy();
+    return result;
+}
+
+fn regex_count(pattern: char*, text: char*) -> int {
+    let re = Regex::compile(pattern);
+    let count = re.count(text);
+    re.destroy();
+    return count;
+}
+
+fn regex_split(pattern: char*, text: char*) -> Vec<String> {
+    let re = Regex::compile(pattern);
+    let parts = re.split(text);
+    re.destroy();
+    return parts;
+}
diff --git a/tests/std/test_regex.zc b/tests/std/test_regex.zc
new file mode 100644
index 0000000..4fe176c
--- /dev/null
+++ b/tests/std/test_regex.zc
@@ -0,0 +1,187 @@
+import "std/regex.zc"
+
+fn test_basic_matching() {
+    "testing: basic matching";
+    let re = Regex::compile("abc");
+
+    if (re.match("abc")) { "literal match works"; } else { "FAILED: literal match"; }
+    if (re.match("abcdef")) { "substring match works"; } else { "FAILED: substring match"; }
+    if (!re.match("xyz")) { "not matching correctly returns false"; } else { "FAILED: mismatching"; }
+
+    re.destroy();
+    "";
+}
+
+fn test_anchors() {
+    "testing: anchors";
+    let re = Regex::compile("^start");
+
+    if (re.match("start here")) { " ^ anchor works for start"; } else { "FAILED: ^ anchor start"; }
+    if (!re.match("no start")) { " ^ anchor rejects non-start"; } else { "FAILED: ^ anchor reject"; }
+
+    re.destroy();
+
+    let re2 = Regex::compile("end$");
+    if (re2.match("the end")) { " $ anchor works for end"; } else { "FAILED: $ anchor end"; }
+    if (!re2.match("end here")) { " $ anchor rejects non-end"; } else { "FAILED: $ anchor reject"; }
+
+    re2.destroy();
+    "";
+}
+
+fn test_wildcards() {
+    "testing: wild cards";
+    let re = Regex::compile("a.c");
+
+    if (re.match("abc")) { " . matches single char"; } else { "FAILED: . match 1"; }
+    if (re.match("axc")) { " . matches different char"; } else { "FAILED: . match 2"; }
+    if (!re.match("ac")) { " . requires exactly one char"; } else { "FAILED: . match 3"; }
+
+    re.destroy();
+    "";
+}
+
+fn test_quantifiers() {
+    "testing: quantifiers";
+    let re1 = Regex::compile("a*b");
+    if (re1.match("b")) { " * matches zero occurrences"; } else { "FAILED: * 0"; }
+    if (re1.match("ab")) { " * matches one occurrence"; } else { "FAILED: * 1"; }
+    if (re1.match("aaab")) { " * matches multiple occurrences"; } else { "FAILED: * many"; }
+    re1.destroy();
+
+    let re2 = Regex::compile("a+b");
+    if (!re2.match("b")) { " + requires at least one"; } else { "FAILED: + 0"; }
+    if (re2.match("ab")) { " + matches one occurrence"; } else { "FAILED: + 1"; }
+    if (re2.match("aaab")) { " + matches multiple occurrences"; } else { "FAILED: + many"; }
+    re2.destroy();
+
+    let re3 = Regex::compile("colou?r");
+    if (re3.match("color")) { " ? matches with char"; } else { "FAILED: ? with"; }
+    if (re3.match("colour")) { " ? matches without char"; } else { "FAILED: ? without"; }
+    re3.destroy();
+    "";
+}
+
+fn test_character_classes() {
+    "testing: character class stuff"
+    let re = Regex::compile("[0-9]+");
+
+    if (re.match("123")) { " [0-9] matches digits"; } else { "FAILED: [0-9] match"; }
+    if (re.match("abc123")) { " [0-9] finds digits in string"; } else { "FAILED: [0-9] find"; }
+    if (!re.match("abc")) { " [0-9] rejects non-digits"; } else { "FAILED: [0-9] reject"; }
+
+    re.destroy();
+    "";
+}
+
+fn test_alternation() {
+    "test: alternation";
+    let re = Regex::compile("cat|dog");
+
+    if (re.match("cat")) { " | matches first alternative"; } else { "FAILED: | match 1"; }
+    if (re.match("dog")) { " | matches second alternative"; } else { "FAILED: | match 2"; }
+    if (!re.match("bird")) { " | rejects non-matching"; } else { "FAILED: | reject"; }
+
+    re.destroy();
+    "";
+}
+
+fn test_word_boundaries() {
+    "testing: word matching";
+    let re = Regex::compile("[a-zA-Z]+");
+
+    if (re.match("hello")) { " letter class matches words"; } else { "FAILED: letter match"; }
+    if (re.match("hello123")) { " letter class finds word part"; } else { "FAILED: letter part"; }
+    if (!re.match("123")) { " letter class rejects non-letters"; } else { "FAILED: letter reject"; }
+
+    re.destroy();
+    "";
+}
+
+fn test_is_valid() {
+    "testing: patern validation"
+
+    if (Regex::is_valid_pattern("^[a-z]+$")) { " valid pattern accepted"; } else { "FAILED: pattern validation 1"; }
+    if (Regex::is_valid_pattern("(hello|world)")) { " complex pattern accepted"; } else { "FAILED: pattern validation 2"; }
+
+    "";
+}
+
+fn test_find() {
+    "testing: find functionality";
+    let re = Regex::compile("[0-9]+");
+    let m = re.find("abc123def456");
+
+    if (m.is_some()) { " find locates match"; } else { "FAILED: find match"; }
+
+    re.destroy();
+    "";
+}
+
+fn test_count() {
+    "testing: count";
+    let re = Regex::compile("[0-9]+");
+    let count = re.count("123 456 789");
+
+    if (count >= 1) { " count finds matches"; } else { "FAILED: count matches"; }
+
+    re.destroy();
+    "";
+}
+
+fn test_convenience_functions() {
+    "testing: just some other functions and stuff";
+
+    if (regex_match("^test", "testing")) { " regex_match works"; } else { "FAILED: regex_match"; }
+    if (regex_count("a", "banana") >= 1) { " regex_count works"; } else { "FAILED: regex_count"; }
+
+    let m = regex_find("[0-9]+", "id: 42");
+    if (m.is_some()) { " regex_find works"; } else { "FAILED: regex_find"; }
+
+    "";
+}
+
+fn test_email_pattern() {
+    "test: email pattern stuff"
+    let email_re = Regex::compile("^[a-zA-Z0-9._-]+@[a-zA-Z0-9.-]+\\.[a-zA-Z][a-zA-Z]+$");
+
+    if (email_re.match("swag@swag.com")) { " valid email accepted"; } else { "FAILED: valid email"; }
+    if (email_re.match("swag.swag@swag.swag.swag")) { " complex email accepted"; } else { "FAILED: complex email"; }
+    if (!email_re.match("invalid.email")) { " invalid email rejected"; } else { "FAILED: invalid email reject"; }
+
+    email_re.destroy();
+    "";
+}
+
+fn test_url_pattern() {
+    "testing: url pattern stuff"
+    let url_re = Regex::compile("https?://[a-zA-Z0-9.-]+");
+
+    if (url_re.match("http://example.com")) { " http url matched matched"; } else { "FAILED: http url"; }
+    if (url_re.match("https://secure.example.com")) { " https url  matched"; } else { "FAILED: https url"; }
+    if (!url_re.match("ftp://something.com")) { " ftp url rejected"; } else { "FAILED: ftp url reject"; }
+
+    url_re.destroy();
+    "";
+}
+
+fn main() {
+    "testing....";
+
+    test_basic_matching();
+    test_anchors();
+    test_wildcards();
+    test_quantifiers();
+    test_character_classes();
+    test_alternation();
+    test_word_boundaries();
+    test_is_valid();
+    test_find();
+    test_count();
+    test_convenience_functions();
+    test_email_pattern();
+    test_url_pattern();
+
+    "all tests worked... (hopefully.. look around for \"FAILED\" messages)";
+    "";
+}