summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
-rw-r--r--std.zc1
-rw-r--r--std/regex.zc198
-rw-r--r--tests/std/test_regex.zc187
3 files changed, 386 insertions, 0 deletions
diff --git a/std.zc b/std.zc
index 4793c11..3dcc45a 100644
--- a/std.zc
+++ b/std.zc
@@ -18,5 +18,6 @@ import "./std/stack.zc"
import "./std/queue.zc"
import "./std/env.zc"
import "./std/slice.zc"
+import "./std/regex.zc"
import "./std/process.zc"
diff --git a/std/regex.zc b/std/regex.zc
new file mode 100644
index 0000000..f64b36e
--- /dev/null
+++ b/std/regex.zc
@@ -0,0 +1,198 @@
+include <regex.h>
+
+import "./core.zc"
+import "./string.zc"
+import "./vec.zc"
+import "./option.zc"
+
+struct Match {
+ text: char*;
+ start: int;
+ len: int;
+}
+
+impl Match {
+ fn new(text: char*, start: int, len: int) -> Match {
+ return Match { text: text, start: start, len: len };
+ }
+
+ fn as_string(self) -> char* {
+ return self.text;
+ }
+
+ fn end(self) -> int {
+ return self.start + self.len;
+ }
+}
+
+struct Regex {
+ preg: void*;
+ pattern: char*;
+ flags: int;
+}
+
+impl Regex {
+ fn compile(pattern: char*) -> Regex {
+ return Regex::compile_with_flags(pattern, 1 | 2);
+ }
+
+ fn compile_with_flags(pattern: char*, flags: int) -> Regex {
+ let preg = malloc(1024);
+ let status = regcomp(preg, pattern, flags);
+ if (status != 0) {
+ free(preg);
+ return Regex { preg: 0, pattern: 0, flags: flags };
+ }
+ return Regex { preg: preg, pattern: pattern, flags: flags };
+ }
+
+ fn is_valid(self) -> bool {
+ return self.preg != 0;
+ }
+
+ fn match(self, text: char*) -> bool {
+ if (self.preg == 0) { return false; }
+ return regexec(self.preg, text, 0, 0, 0) == 0;
+ }
+
+ fn match_full(self, text: char*) -> bool {
+ return self.match(text);
+ }
+
+ fn match_at(self, text: char*, offset: int) -> bool {
+ if (self.preg == 0) { return false; }
+ let len = strlen(text);
+ if (offset < 0 || offset > len) { return false; }
+ return regexec(self.preg, text + offset, 0, 0, 0) == 0;
+ }
+
+ fn is_match(self, text: char*) -> bool {
+ return self.match(text);
+ }
+
+ fn find(self, text: char*) -> Option<Match> {
+ if (self.preg == 0) { return Option<Match>::None(); }
+ let t_len = strlen(text);
+ for (let i = 0; i <= t_len; i = i + 1) {
+ let sub = text + i;
+ if (regexec(self.preg, sub, 0, 0, 0) == 0) {
+ let j = 0;
+ while (text[i + j] != 0 && regexec(self.preg, sub, 0, 0, 0) == 0) {
+ j = j + 1;
+ sub = text + i + j;
+ }
+ return Option<Match>::Some(Match::new(text + i, i, j));
+ }
+ }
+ return Option<Match>::None();
+ }
+
+ fn find_at(self, text: char*, start: int) -> Option<Match> {
+ let len = strlen(text);
+ if (start < 0 || start >= len) {
+ return Option<Match>::None();
+ }
+ return self.find(text + start);
+ }
+
+ fn count(self, text: char*) -> int {
+ if (self.preg == 0) { return 0; }
+ let count = 0;
+ let pos = 0;
+ let t_len = strlen(text);
+ while (pos < t_len) {
+ let sub = text + pos;
+ if (regexec(self.preg, sub, 0, 0, 0) == 0) {
+ count = count + 1;
+ pos = pos + 1;
+ } else {
+ break;
+ }
+ }
+ return count;
+ }
+
+ fn split(self, text: char*) -> Vec<String> {
+ let parts = Vec<String>::new();
+ if (self.preg == 0) {
+ parts.push(String::from(text));
+ return parts;
+ }
+ let t_len = strlen(text);
+ let last_pos = 0;
+ let pos = 0;
+ while (pos < t_len) {
+ let sub = text + pos;
+ if (regexec(self.preg, sub, 0, 0, 0) == 0) {
+ if (pos > last_pos) {
+ let before = text + last_pos;
+ let part_len = pos - last_pos;
+ let v = Vec<char>::new();
+ for (let i = 0; i < part_len; i = i + 1) {
+ v.push(before[i]);
+ }
+ v.push(0);
+ parts.push(String { vec: v });
+ }
+ last_pos = pos + 1;
+ pos = pos + 1;
+ } else {
+ pos = pos + 1;
+ }
+ }
+ if (last_pos < t_len) {
+ parts.push(String::from(text + last_pos));
+ }
+ return parts;
+ }
+
+ fn pattern(self) -> char* {
+ return self.pattern;
+ }
+
+ fn flags(self) -> int {
+ return self.flags;
+ }
+
+ fn is_valid_pattern(pattern: char*) -> bool {
+ let test_regex = Regex::compile(pattern);
+ let valid = test_regex.is_valid();
+ test_regex.destroy();
+ return valid;
+ }
+
+ fn destroy(self) {
+ if (self.preg != 0) {
+ regfree(self.preg);
+ free(self.preg);
+ }
+ }
+}
+
+fn regex_match(pattern: char*, text: char*) -> bool {
+ let re = Regex::compile(pattern);
+ let result = re.match(text);
+ re.destroy();
+ return result;
+}
+
+fn regex_find(pattern: char*, text: char*) -> Option<Match> {
+ let re = Regex::compile(pattern);
+ let result = re.find(text);
+ re.destroy();
+ return result;
+}
+
+fn regex_count(pattern: char*, text: char*) -> int {
+ let re = Regex::compile(pattern);
+ let count = re.count(text);
+ re.destroy();
+ return count;
+}
+
+fn regex_split(pattern: char*, text: char*) -> Vec<String> {
+ let re = Regex::compile(pattern);
+ let parts = re.split(text);
+ re.destroy();
+ return parts;
+}
diff --git a/tests/std/test_regex.zc b/tests/std/test_regex.zc
new file mode 100644
index 0000000..4fe176c
--- /dev/null
+++ b/tests/std/test_regex.zc
@@ -0,0 +1,187 @@
+import "std/regex.zc"
+
+fn test_basic_matching() {
+ "testing: basic matching";
+ let re = Regex::compile("abc");
+
+ if (re.match("abc")) { "literal match works"; } else { "FAILED: literal match"; }
+ if (re.match("abcdef")) { "substring match works"; } else { "FAILED: substring match"; }
+ if (!re.match("xyz")) { "not matching correctly returns false"; } else { "FAILED: mismatching"; }
+
+ re.destroy();
+ "";
+}
+
+fn test_anchors() {
+ "testing: anchors";
+ let re = Regex::compile("^start");
+
+ if (re.match("start here")) { " ^ anchor works for start"; } else { "FAILED: ^ anchor start"; }
+ if (!re.match("no start")) { " ^ anchor rejects non-start"; } else { "FAILED: ^ anchor reject"; }
+
+ re.destroy();
+
+ let re2 = Regex::compile("end$");
+ if (re2.match("the end")) { " $ anchor works for end"; } else { "FAILED: $ anchor end"; }
+ if (!re2.match("end here")) { " $ anchor rejects non-end"; } else { "FAILED: $ anchor reject"; }
+
+ re2.destroy();
+ "";
+}
+
+fn test_wildcards() {
+ "testing: wild cards";
+ let re = Regex::compile("a.c");
+
+ if (re.match("abc")) { " . matches single char"; } else { "FAILED: . match 1"; }
+ if (re.match("axc")) { " . matches different char"; } else { "FAILED: . match 2"; }
+ if (!re.match("ac")) { " . requires exactly one char"; } else { "FAILED: . match 3"; }
+
+ re.destroy();
+ "";
+}
+
+fn test_quantifiers() {
+ "testing: quantifiers";
+ let re1 = Regex::compile("a*b");
+ if (re1.match("b")) { " * matches zero occurrences"; } else { "FAILED: * 0"; }
+ if (re1.match("ab")) { " * matches one occurrence"; } else { "FAILED: * 1"; }
+ if (re1.match("aaab")) { " * matches multiple occurrences"; } else { "FAILED: * many"; }
+ re1.destroy();
+
+ let re2 = Regex::compile("a+b");
+ if (!re2.match("b")) { " + requires at least one"; } else { "FAILED: + 0"; }
+ if (re2.match("ab")) { " + matches one occurrence"; } else { "FAILED: + 1"; }
+ if (re2.match("aaab")) { " + matches multiple occurrences"; } else { "FAILED: + many"; }
+ re2.destroy();
+
+ let re3 = Regex::compile("colou?r");
+ if (re3.match("color")) { " ? matches with char"; } else { "FAILED: ? with"; }
+ if (re3.match("colour")) { " ? matches without char"; } else { "FAILED: ? without"; }
+ re3.destroy();
+ "";
+}
+
+fn test_character_classes() {
+ "testing: character class stuff"
+ let re = Regex::compile("[0-9]+");
+
+ if (re.match("123")) { " [0-9] matches digits"; } else { "FAILED: [0-9] match"; }
+ if (re.match("abc123")) { " [0-9] finds digits in string"; } else { "FAILED: [0-9] find"; }
+ if (!re.match("abc")) { " [0-9] rejects non-digits"; } else { "FAILED: [0-9] reject"; }
+
+ re.destroy();
+ "";
+}
+
+fn test_alternation() {
+ "test: alternation";
+ let re = Regex::compile("cat|dog");
+
+ if (re.match("cat")) { " | matches first alternative"; } else { "FAILED: | match 1"; }
+ if (re.match("dog")) { " | matches second alternative"; } else { "FAILED: | match 2"; }
+ if (!re.match("bird")) { " | rejects non-matching"; } else { "FAILED: | reject"; }
+
+ re.destroy();
+ "";
+}
+
+fn test_word_boundaries() {
+ "testing: word matching";
+ let re = Regex::compile("[a-zA-Z]+");
+
+ if (re.match("hello")) { " letter class matches words"; } else { "FAILED: letter match"; }
+ if (re.match("hello123")) { " letter class finds word part"; } else { "FAILED: letter part"; }
+ if (!re.match("123")) { " letter class rejects non-letters"; } else { "FAILED: letter reject"; }
+
+ re.destroy();
+ "";
+}
+
+fn test_is_valid() {
+ "testing: patern validation"
+
+ if (Regex::is_valid_pattern("^[a-z]+$")) { " valid pattern accepted"; } else { "FAILED: pattern validation 1"; }
+ if (Regex::is_valid_pattern("(hello|world)")) { " complex pattern accepted"; } else { "FAILED: pattern validation 2"; }
+
+ "";
+}
+
+fn test_find() {
+ "testing: find functionality";
+ let re = Regex::compile("[0-9]+");
+ let m = re.find("abc123def456");
+
+ if (m.is_some()) { " find locates match"; } else { "FAILED: find match"; }
+
+ re.destroy();
+ "";
+}
+
+fn test_count() {
+ "testing: count";
+ let re = Regex::compile("[0-9]+");
+ let count = re.count("123 456 789");
+
+ if (count >= 1) { " count finds matches"; } else { "FAILED: count matches"; }
+
+ re.destroy();
+ "";
+}
+
+fn test_convenience_functions() {
+ "testing: just some other functions and stuff";
+
+ if (regex_match("^test", "testing")) { " regex_match works"; } else { "FAILED: regex_match"; }
+ if (regex_count("a", "banana") >= 1) { " regex_count works"; } else { "FAILED: regex_count"; }
+
+ let m = regex_find("[0-9]+", "id: 42");
+ if (m.is_some()) { " regex_find works"; } else { "FAILED: regex_find"; }
+
+ "";
+}
+
+fn test_email_pattern() {
+ "test: email pattern stuff"
+ let email_re = Regex::compile("^[a-zA-Z0-9._-]+@[a-zA-Z0-9.-]+\\.[a-zA-Z][a-zA-Z]+$");
+
+ if (email_re.match("swag@swag.com")) { " valid email accepted"; } else { "FAILED: valid email"; }
+ if (email_re.match("swag.swag@swag.swag.swag")) { " complex email accepted"; } else { "FAILED: complex email"; }
+ if (!email_re.match("invalid.email")) { " invalid email rejected"; } else { "FAILED: invalid email reject"; }
+
+ email_re.destroy();
+ "";
+}
+
+fn test_url_pattern() {
+ "testing: url pattern stuff"
+ let url_re = Regex::compile("https?://[a-zA-Z0-9.-]+");
+
+ if (url_re.match("http://example.com")) { " http url matched matched"; } else { "FAILED: http url"; }
+ if (url_re.match("https://secure.example.com")) { " https url matched"; } else { "FAILED: https url"; }
+ if (!url_re.match("ftp://something.com")) { " ftp url rejected"; } else { "FAILED: ftp url reject"; }
+
+ url_re.destroy();
+ "";
+}
+
+fn main() {
+ "testing....";
+
+ test_basic_matching();
+ test_anchors();
+ test_wildcards();
+ test_quantifiers();
+ test_character_classes();
+ test_alternation();
+ test_word_boundaries();
+ test_is_valid();
+ test_find();
+ test_count();
+ test_convenience_functions();
+ test_email_pattern();
+ test_url_pattern();
+
+ "all tests worked... (hopefully.. look around for \"FAILED\" messages)";
+ "";
+}