summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
-rw-r--r--.gitignore1
-rw-r--r--Makefile2
-rw-r--r--ast.h4
-rw-r--r--pattern2regex.c2
-rw-r--r--regex.c140
5 files changed, 147 insertions, 2 deletions
diff --git a/.gitignore b/.gitignore
index 7677c59..ddcf82a 100644
--- a/.gitignore
+++ b/.gitignore
@@ -1,3 +1,4 @@
+pattern2regex
*.o
lex-*.c
lex-*.h
diff --git a/Makefile b/Makefile
index e62c90c..91c36ad 100644
--- a/Makefile
+++ b/Makefile
@@ -25,7 +25,7 @@ parse-%.c parse-%.h: parse-%.y
$(CC) $(CFLAGS) -c $< -o $@
pattern2regex.o: lex-pattern.h parse-pattern.h
-pattern2regex: pattern2regex.o lex-pattern.o parse-pattern.o ast.o
+pattern2regex: pattern2regex.o lex-pattern.o parse-pattern.o ast.o regex.o
$(TARGETS):
$(CC) $(CFLAGS) $^ -o $@
diff --git a/ast.h b/ast.h
index 4694c33..f8183f9 100644
--- a/ast.h
+++ b/ast.h
@@ -17,6 +17,8 @@ enum {
ATOM_PUNCTUATION,
ATOM_EVERYTHING,
ATOM_LITERAL,
+
+ ATOM_MAX
};
struct atom {
@@ -34,6 +36,8 @@ struct atom {
struct atom *mkatom(const struct atom *src);
void dump_atom(const struct atom *a);
+void print_regex(const struct atom *a);
+
extern struct atom *ast;
#endif
diff --git a/pattern2regex.c b/pattern2regex.c
index 669206a..646b007 100644
--- a/pattern2regex.c
+++ b/pattern2regex.c
@@ -12,7 +12,7 @@ int main(int argc, char **argv)
yyin = stdin;
}
- if (!yyparse()) dump_atom(ast);
+ if (!yyparse()) print_regex(ast);
fclose(yyin);
return 0;
diff --git a/regex.c b/regex.c
new file mode 100644
index 0000000..154a5df
--- /dev/null
+++ b/regex.c
@@ -0,0 +1,140 @@
+#include <stdio.h>
+#include <string.h>
+
+#include "ast.h"
+
+static const int precedence[ATOM_MAX] = {
+ [ATOM_ALPHABETIC] = 1,
+ [ATOM_NUMERIC] = 1,
+ [ATOM_UPPERCASE] = 1,
+ [ATOM_LOWERCASE] = 1,
+ [ATOM_CONTROL] = 1,
+ [ATOM_PUNCTUATION] = 1,
+ [ATOM_EVERYTHING] = 1,
+ [ATOM_REPETITION] = 2,
+ [ATOM_SEQUENCE] = 3,
+ [ATOM_LITERAL] = 3,
+ [ATOM_ALTERNATION] = 4,
+};
+
+static void print_repeat(const struct repeat *r)
+{
+ if (r->max < 0) {
+ switch (r->min) {
+ case 0: printf("*"); break;
+ case 1: printf("+"); break;
+ default: printf("{%ld,}", r->min); break;
+ }
+ } else if (r->min == 0 && r->max == 1) {
+ printf("?");
+ } else if (r->min == r->max) {
+ printf("{%ld}", r->min);
+ } else {
+ printf("{%ld,%ld}", r->min, r->max);
+ }
+}
+
+static void print_literal(const char *s)
+{
+ for (; *s; s++) {
+ switch (*s) {
+ case '.':
+ case '[':
+ case '{':
+ case '}':
+ case '(':
+ case ')':
+ case '\\':
+ case '*':
+ case '+':
+ case '?':
+ case '|':
+ case '^':
+ case '$':
+ putchar('\\');
+
+ default:
+ putchar(*s);
+ }
+ }
+}
+
+static int get_precedence(const struct atom *a)
+{
+ const struct repeat *r;
+
+ switch (a->type) {
+ case ATOM_LITERAL:
+ switch (strlen(a->u.literal)) {
+ case 0: return -1;
+ case 1: return precedence[ATOM_EVERYTHING];
+ default: return precedence[ATOM_SEQUENCE];
+ }
+
+ case ATOM_REPETITION:
+ r = &a->u.repeat.counts;
+ if (r->min == 1 && r->max == 1)
+ return get_precedence(a->u.repeat.child);
+ break;
+ }
+
+ return precedence[a->type];
+}
+
+static void print_atom(const struct atom *a);
+
+static void print_child_atom(const struct atom *child, const struct atom *parent)
+{
+ int group = get_precedence(child) > get_precedence(parent);
+ if (group) printf("(");
+ print_atom(child);
+ if (group) printf(")");
+}
+
+static void print_atom(const struct atom *a)
+{
+ const struct repeat *r;
+
+ switch (a->type) {
+ case ATOM_ALTERNATION:
+ print_child_atom(a->u.children[0], a);
+ printf("|");
+ print_child_atom(a->u.children[1], a);
+ break;
+
+ case ATOM_SEQUENCE:
+ print_child_atom(a->u.children[0], a);
+ print_child_atom(a->u.children[1], a);
+ break;
+
+ case ATOM_REPETITION:
+ r = &a->u.repeat.counts;
+ if (r->min == 1 && r->max == 1) {
+ print_atom(a->u.repeat.child);
+ } else {
+ print_child_atom(a->u.repeat.child, a);
+ print_repeat(r);
+ }
+ break;
+
+ case ATOM_LITERAL:
+ print_literal(a->u.literal);
+ break;
+
+ case ATOM_ALPHABETIC: printf("[:alpha:]"); break;
+ case ATOM_NUMERIC: printf("[:digit:]"); break;
+ case ATOM_UPPERCASE: printf("[:upper:]"); break;
+ case ATOM_LOWERCASE: printf("[:lower:]"); break;
+ case ATOM_CONTROL: printf("[:cntrl:]"); break;
+ case ATOM_PUNCTUATION: printf("[:punct:]"); break;
+ case ATOM_EVERYTHING: printf("."); break;
+ }
+}
+
+void print_regex(const struct atom *a)
+{
+ printf("^");
+ print_atom(a);
+ printf("$\n");
+}
+