summaryrefslogtreecommitdiff
path: root/regex.c
diff options
context:
space:
mode:
authorBobby Bingham <koorogi@koorogi.info>2017-07-23 23:07:17 -0500
committerBobby Bingham <koorogi@koorogi.info>2017-07-24 21:19:10 -0500
commit4459545e6528c8f072de79b799b9ae9b85d01dfb (patch)
tree03e144fabc0e830662e40566a2eb7f98eafdf3a8 /regex.c
parentc8a0b7157d544f2359f2373160dcc69cdef8f4de (diff)
Output regex instead of debugging format
Diffstat (limited to 'regex.c')
-rw-r--r--regex.c140
1 files changed, 140 insertions, 0 deletions
diff --git a/regex.c b/regex.c
new file mode 100644
index 0000000..154a5df
--- /dev/null
+++ b/regex.c
@@ -0,0 +1,140 @@
+#include <stdio.h>
+#include <string.h>
+
+#include "ast.h"
+
+static const int precedence[ATOM_MAX] = {
+ [ATOM_ALPHABETIC] = 1,
+ [ATOM_NUMERIC] = 1,
+ [ATOM_UPPERCASE] = 1,
+ [ATOM_LOWERCASE] = 1,
+ [ATOM_CONTROL] = 1,
+ [ATOM_PUNCTUATION] = 1,
+ [ATOM_EVERYTHING] = 1,
+ [ATOM_REPETITION] = 2,
+ [ATOM_SEQUENCE] = 3,
+ [ATOM_LITERAL] = 3,
+ [ATOM_ALTERNATION] = 4,
+};
+
+static void print_repeat(const struct repeat *r)
+{
+ if (r->max < 0) {
+ switch (r->min) {
+ case 0: printf("*"); break;
+ case 1: printf("+"); break;
+ default: printf("{%ld,}", r->min); break;
+ }
+ } else if (r->min == 0 && r->max == 1) {
+ printf("?");
+ } else if (r->min == r->max) {
+ printf("{%ld}", r->min);
+ } else {
+ printf("{%ld,%ld}", r->min, r->max);
+ }
+}
+
+static void print_literal(const char *s)
+{
+ for (; *s; s++) {
+ switch (*s) {
+ case '.':
+ case '[':
+ case '{':
+ case '}':
+ case '(':
+ case ')':
+ case '\\':
+ case '*':
+ case '+':
+ case '?':
+ case '|':
+ case '^':
+ case '$':
+ putchar('\\');
+
+ default:
+ putchar(*s);
+ }
+ }
+}
+
+static int get_precedence(const struct atom *a)
+{
+ const struct repeat *r;
+
+ switch (a->type) {
+ case ATOM_LITERAL:
+ switch (strlen(a->u.literal)) {
+ case 0: return -1;
+ case 1: return precedence[ATOM_EVERYTHING];
+ default: return precedence[ATOM_SEQUENCE];
+ }
+
+ case ATOM_REPETITION:
+ r = &a->u.repeat.counts;
+ if (r->min == 1 && r->max == 1)
+ return get_precedence(a->u.repeat.child);
+ break;
+ }
+
+ return precedence[a->type];
+}
+
+static void print_atom(const struct atom *a);
+
+static void print_child_atom(const struct atom *child, const struct atom *parent)
+{
+ int group = get_precedence(child) > get_precedence(parent);
+ if (group) printf("(");
+ print_atom(child);
+ if (group) printf(")");
+}
+
+static void print_atom(const struct atom *a)
+{
+ const struct repeat *r;
+
+ switch (a->type) {
+ case ATOM_ALTERNATION:
+ print_child_atom(a->u.children[0], a);
+ printf("|");
+ print_child_atom(a->u.children[1], a);
+ break;
+
+ case ATOM_SEQUENCE:
+ print_child_atom(a->u.children[0], a);
+ print_child_atom(a->u.children[1], a);
+ break;
+
+ case ATOM_REPETITION:
+ r = &a->u.repeat.counts;
+ if (r->min == 1 && r->max == 1) {
+ print_atom(a->u.repeat.child);
+ } else {
+ print_child_atom(a->u.repeat.child, a);
+ print_repeat(r);
+ }
+ break;
+
+ case ATOM_LITERAL:
+ print_literal(a->u.literal);
+ break;
+
+ case ATOM_ALPHABETIC: printf("[:alpha:]"); break;
+ case ATOM_NUMERIC: printf("[:digit:]"); break;
+ case ATOM_UPPERCASE: printf("[:upper:]"); break;
+ case ATOM_LOWERCASE: printf("[:lower:]"); break;
+ case ATOM_CONTROL: printf("[:cntrl:]"); break;
+ case ATOM_PUNCTUATION: printf("[:punct:]"); break;
+ case ATOM_EVERYTHING: printf("."); break;
+ }
+}
+
+void print_regex(const struct atom *a)
+{
+ printf("^");
+ print_atom(a);
+ printf("$\n");
+}
+