summaryrefslogtreecommitdiff
path: root/print-regex.c
diff options
context:
space:
mode:
Diffstat (limited to 'print-regex.c')
-rw-r--r--print-regex.c124
1 files changed, 124 insertions, 0 deletions
diff --git a/print-regex.c b/print-regex.c
new file mode 100644
index 0000000..02a58fa
--- /dev/null
+++ b/print-regex.c
@@ -0,0 +1,124 @@
+#include <stdio.h>
+#include <string.h>
+
+#include "ast.h"
+
+static const int precedence[ATOM_MAX] = {
+ [ATOM_ALPHABETIC] = 1,
+ [ATOM_NUMERIC] = 1,
+ [ATOM_UPPERCASE] = 1,
+ [ATOM_LOWERCASE] = 1,
+ [ATOM_CONTROL] = 1,
+ [ATOM_PUNCTUATION] = 1,
+ [ATOM_EVERYTHING] = 1,
+ [ATOM_REPETITION] = 2,
+ [ATOM_SEQUENCE] = 3,
+ [ATOM_LITERAL] = 3,
+ [ATOM_ALTERNATION] = 4,
+};
+
+static void print_repeat(const struct repeat *r)
+{
+ if (r->max < 0) {
+ switch (r->min) {
+ case 0: printf("*"); break;
+ case 1: printf("+"); break;
+ default: printf("{%ld,}", r->min); break;
+ }
+ } else if (r->min == 0 && r->max == 1) {
+ printf("?");
+ } else if (r->min == r->max) {
+ printf("{%ld}", r->min);
+ } else {
+ printf("{%ld,%ld}", r->min, r->max);
+ }
+}
+
+static void print_literal(const char *s)
+{
+ for (; *s; s++) {
+ switch (*s) {
+ case '.':
+ case '[':
+ case '{':
+ case '}':
+ case '(':
+ case ')':
+ case '\\':
+ case '*':
+ case '+':
+ case '?':
+ case '|':
+ case '^':
+ case '$':
+ putchar('\\');
+
+ default:
+ putchar(*s);
+ }
+ }
+}
+
+static int get_precedence(const struct atom *a)
+{
+ if (a->type == ATOM_LITERAL) {
+ switch (strlen(a->u.literal)) {
+ case 0: return -1;
+ case 1: return precedence[ATOM_EVERYTHING];
+ default: return precedence[ATOM_SEQUENCE];
+ }
+ }
+
+ return precedence[a->type];
+}
+
+static void print_atom(const struct atom *a);
+
+static void print_child_atom(const struct atom *child, const struct atom *parent)
+{
+ int group = get_precedence(child) > get_precedence(parent);
+ if (group) printf("(");
+ print_atom(child);
+ if (group) printf(")");
+}
+
+static void print_atom(const struct atom *a)
+{
+ switch (a->type) {
+ case ATOM_ALTERNATION:
+ print_child_atom(a->u.children[0], a);
+ printf("|");
+ print_child_atom(a->u.children[1], a);
+ break;
+
+ case ATOM_SEQUENCE:
+ print_child_atom(a->u.children[0], a);
+ print_child_atom(a->u.children[1], a);
+ break;
+
+ case ATOM_REPETITION:
+ print_child_atom(a->u.repeat.child, a);
+ print_repeat(&a->u.repeat.counts);
+ break;
+
+ case ATOM_LITERAL:
+ print_literal(a->u.literal);
+ break;
+
+ case ATOM_ALPHABETIC: printf("[:alpha:]"); break;
+ case ATOM_NUMERIC: printf("[:digit:]"); break;
+ case ATOM_UPPERCASE: printf("[:upper:]"); break;
+ case ATOM_LOWERCASE: printf("[:lower:]"); break;
+ case ATOM_CONTROL: printf("[:cntrl:]"); break;
+ case ATOM_PUNCTUATION: printf("[:punct:]"); break;
+ case ATOM_EVERYTHING: printf("."); break;
+ }
+}
+
+void print_regex(const struct atom *a)
+{
+ printf("^");
+ print_atom(a);
+ printf("$\n");
+}
+