From 0e96b8551b5293ffeadfebc31b85a165b0a74b99 Mon Sep 17 00:00:00 2001 From: Bobby Bingham Date: Tue, 25 Jul 2017 21:08:44 -0500 Subject: create general regex simplification pass Move the previous special-case logic for removing 1-1 repeats into this pass. --- Makefile | 2 +- ast.h | 4 +++- parse-pattern.y | 8 +------- pattern2regex.c | 5 ++++- regex.c | 35 +++++++++++++++++++++++++++++++++++ 5 files changed, 44 insertions(+), 10 deletions(-) create mode 100644 regex.c diff --git a/Makefile b/Makefile index 79099ba..b061629 100644 --- a/Makefile +++ b/Makefile @@ -25,7 +25,7 @@ parse-%.c parse-%.h: parse-%.y $(CC) $(CFLAGS) -c $< -o $@ pattern2regex.o: lex-pattern.h parse-pattern.h -pattern2regex: pattern2regex.o lex-pattern.o parse-pattern.o ast.o print-regex.o +pattern2regex: pattern2regex.o lex-pattern.o parse-pattern.o ast.o print-regex.o regex.o $(TARGETS): $(CC) $(CFLAGS) $^ -o $@ diff --git a/ast.h b/ast.h index f8183f9..dac6e8a 100644 --- a/ast.h +++ b/ast.h @@ -28,7 +28,7 @@ struct atom { struct repeat counts; struct atom *child; } repeat; - const struct atom *children[2]; + struct atom *children[2]; const char *literal; } u; }; @@ -38,6 +38,8 @@ void dump_atom(const struct atom *a); void print_regex(const struct atom *a); +void simplify(struct atom *a); + extern struct atom *ast; #endif diff --git a/parse-pattern.y b/parse-pattern.y index 2c0556a..e5acd3c 100644 --- a/parse-pattern.y +++ b/parse-pattern.y @@ -62,13 +62,7 @@ input: ; molecule: - repeat sequence { - if ($1.min == 1 && $1.max == 1) { - $$ = $2; - } else { - $$ = mkatom(&(struct atom) { .type = ATOM_REPETITION, .u = { .repeat = { .counts = $1, .child = $2 } } }); - } - } + repeat sequence { $$ = mkatom(&(struct atom) { .type = ATOM_REPETITION, .u = { .repeat = { .counts = $1, .child = $2 } } }); } ; repeat: diff --git a/pattern2regex.c b/pattern2regex.c index 646b007..c9b1743 100644 --- a/pattern2regex.c +++ b/pattern2regex.c @@ -12,7 +12,10 @@ int main(int argc, char **argv) yyin = stdin; } - if (!yyparse()) print_regex(ast); + if (!yyparse()) { + simplify(ast); + print_regex(ast); + } fclose(yyin); return 0; diff --git a/regex.c b/regex.c new file mode 100644 index 0000000..63bd748 --- /dev/null +++ b/regex.c @@ -0,0 +1,35 @@ +#include + +#include "ast.h" + +static void repeat(struct atom *a) +{ + struct atom *child = a->u.repeat.child; + simplify(child); + + const struct repeat *r = &a->u.repeat.counts; + if (r->min == 1 && r->max == 1) { + *a = *child; + free(child); + } +} + +static void children(struct atom *a) +{ + simplify(a->u.children[0]); + simplify(a->u.children[1]); +} + +void simplify(struct atom *a) +{ + switch (a->type) { + case ATOM_REPETITION: + repeat(a); + break; + + case ATOM_ALTERNATION: + case ATOM_SEQUENCE: + children(a); + } +} + -- cgit v1.2.3