summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorBobby Bingham <koorogi@koorogi.info>2017-07-25 21:08:44 -0500
committerBobby Bingham <koorogi@koorogi.info>2017-07-25 21:09:45 -0500
commit0e96b8551b5293ffeadfebc31b85a165b0a74b99 (patch)
tree1057a6bc62f64b9172d3d73a2d4a6cf422cc2f7e
parentee78102672afdede839489fa0b1932b64335eaaf (diff)
create general regex simplification passHEADmaster
Move the previous special-case logic for removing 1-1 repeats into this pass.
-rw-r--r--Makefile2
-rw-r--r--ast.h4
-rw-r--r--parse-pattern.y8
-rw-r--r--pattern2regex.c5
-rw-r--r--regex.c35
5 files changed, 44 insertions, 10 deletions
diff --git a/Makefile b/Makefile
index 79099ba..b061629 100644
--- a/Makefile
+++ b/Makefile
@@ -25,7 +25,7 @@ parse-%.c parse-%.h: parse-%.y
$(CC) $(CFLAGS) -c $< -o $@
pattern2regex.o: lex-pattern.h parse-pattern.h
-pattern2regex: pattern2regex.o lex-pattern.o parse-pattern.o ast.o print-regex.o
+pattern2regex: pattern2regex.o lex-pattern.o parse-pattern.o ast.o print-regex.o regex.o
$(TARGETS):
$(CC) $(CFLAGS) $^ -o $@
diff --git a/ast.h b/ast.h
index f8183f9..dac6e8a 100644
--- a/ast.h
+++ b/ast.h
@@ -28,7 +28,7 @@ struct atom {
struct repeat counts;
struct atom *child;
} repeat;
- const struct atom *children[2];
+ struct atom *children[2];
const char *literal;
} u;
};
@@ -38,6 +38,8 @@ void dump_atom(const struct atom *a);
void print_regex(const struct atom *a);
+void simplify(struct atom *a);
+
extern struct atom *ast;
#endif
diff --git a/parse-pattern.y b/parse-pattern.y
index 2c0556a..e5acd3c 100644
--- a/parse-pattern.y
+++ b/parse-pattern.y
@@ -62,13 +62,7 @@ input:
;
molecule:
- repeat sequence {
- if ($1.min == 1 && $1.max == 1) {
- $$ = $2;
- } else {
- $$ = mkatom(&(struct atom) { .type = ATOM_REPETITION, .u = { .repeat = { .counts = $1, .child = $2 } } });
- }
- }
+ repeat sequence { $$ = mkatom(&(struct atom) { .type = ATOM_REPETITION, .u = { .repeat = { .counts = $1, .child = $2 } } }); }
;
repeat:
diff --git a/pattern2regex.c b/pattern2regex.c
index 646b007..c9b1743 100644
--- a/pattern2regex.c
+++ b/pattern2regex.c
@@ -12,7 +12,10 @@ int main(int argc, char **argv)
yyin = stdin;
}
- if (!yyparse()) print_regex(ast);
+ if (!yyparse()) {
+ simplify(ast);
+ print_regex(ast);
+ }
fclose(yyin);
return 0;
diff --git a/regex.c b/regex.c
new file mode 100644
index 0000000..63bd748
--- /dev/null
+++ b/regex.c
@@ -0,0 +1,35 @@
+#include <stdlib.h>
+
+#include "ast.h"
+
+static void repeat(struct atom *a)
+{
+ struct atom *child = a->u.repeat.child;
+ simplify(child);
+
+ const struct repeat *r = &a->u.repeat.counts;
+ if (r->min == 1 && r->max == 1) {
+ *a = *child;
+ free(child);
+ }
+}
+
+static void children(struct atom *a)
+{
+ simplify(a->u.children[0]);
+ simplify(a->u.children[1]);
+}
+
+void simplify(struct atom *a)
+{
+ switch (a->type) {
+ case ATOM_REPETITION:
+ repeat(a);
+ break;
+
+ case ATOM_ALTERNATION:
+ case ATOM_SEQUENCE:
+ children(a);
+ }
+}
+