diff options
author | Bobby Bingham <koorogi@koorogi.info> | 2017-07-23 23:07:17 -0500 |
---|---|---|
committer | Bobby Bingham <koorogi@koorogi.info> | 2017-07-24 21:19:10 -0500 |
commit | 4459545e6528c8f072de79b799b9ae9b85d01dfb (patch) | |
tree | 03e144fabc0e830662e40566a2eb7f98eafdf3a8 | |
parent | c8a0b7157d544f2359f2373160dcc69cdef8f4de (diff) |
Output regex instead of debugging format
-rw-r--r-- | .gitignore | 1 | ||||
-rw-r--r-- | Makefile | 2 | ||||
-rw-r--r-- | ast.h | 4 | ||||
-rw-r--r-- | pattern2regex.c | 2 | ||||
-rw-r--r-- | regex.c | 140 |
5 files changed, 147 insertions, 2 deletions
@@ -1,3 +1,4 @@ +pattern2regex *.o lex-*.c lex-*.h @@ -25,7 +25,7 @@ parse-%.c parse-%.h: parse-%.y $(CC) $(CFLAGS) -c $< -o $@ pattern2regex.o: lex-pattern.h parse-pattern.h -pattern2regex: pattern2regex.o lex-pattern.o parse-pattern.o ast.o +pattern2regex: pattern2regex.o lex-pattern.o parse-pattern.o ast.o regex.o $(TARGETS): $(CC) $(CFLAGS) $^ -o $@ @@ -17,6 +17,8 @@ enum { ATOM_PUNCTUATION, ATOM_EVERYTHING, ATOM_LITERAL, + + ATOM_MAX }; struct atom { @@ -34,6 +36,8 @@ struct atom { struct atom *mkatom(const struct atom *src); void dump_atom(const struct atom *a); +void print_regex(const struct atom *a); + extern struct atom *ast; #endif diff --git a/pattern2regex.c b/pattern2regex.c index 669206a..646b007 100644 --- a/pattern2regex.c +++ b/pattern2regex.c @@ -12,7 +12,7 @@ int main(int argc, char **argv) yyin = stdin; } - if (!yyparse()) dump_atom(ast); + if (!yyparse()) print_regex(ast); fclose(yyin); return 0; @@ -0,0 +1,140 @@ +#include <stdio.h> +#include <string.h> + +#include "ast.h" + +static const int precedence[ATOM_MAX] = { + [ATOM_ALPHABETIC] = 1, + [ATOM_NUMERIC] = 1, + [ATOM_UPPERCASE] = 1, + [ATOM_LOWERCASE] = 1, + [ATOM_CONTROL] = 1, + [ATOM_PUNCTUATION] = 1, + [ATOM_EVERYTHING] = 1, + [ATOM_REPETITION] = 2, + [ATOM_SEQUENCE] = 3, + [ATOM_LITERAL] = 3, + [ATOM_ALTERNATION] = 4, +}; + +static void print_repeat(const struct repeat *r) +{ + if (r->max < 0) { + switch (r->min) { + case 0: printf("*"); break; + case 1: printf("+"); break; + default: printf("{%ld,}", r->min); break; + } + } else if (r->min == 0 && r->max == 1) { + printf("?"); + } else if (r->min == r->max) { + printf("{%ld}", r->min); + } else { + printf("{%ld,%ld}", r->min, r->max); + } +} + +static void print_literal(const char *s) +{ + for (; *s; s++) { + switch (*s) { + case '.': + case '[': + case '{': + case '}': + case '(': + case ')': + case '\\': + case '*': + case '+': + case '?': + case '|': + case '^': + case '$': + putchar('\\'); + + default: + putchar(*s); + } + } +} + +static int get_precedence(const struct atom *a) +{ + const struct repeat *r; + + switch (a->type) { + case ATOM_LITERAL: + switch (strlen(a->u.literal)) { + case 0: return -1; + case 1: return precedence[ATOM_EVERYTHING]; + default: return precedence[ATOM_SEQUENCE]; + } + + case ATOM_REPETITION: + r = &a->u.repeat.counts; + if (r->min == 1 && r->max == 1) + return get_precedence(a->u.repeat.child); + break; + } + + return precedence[a->type]; +} + +static void print_atom(const struct atom *a); + +static void print_child_atom(const struct atom *child, const struct atom *parent) +{ + int group = get_precedence(child) > get_precedence(parent); + if (group) printf("("); + print_atom(child); + if (group) printf(")"); +} + +static void print_atom(const struct atom *a) +{ + const struct repeat *r; + + switch (a->type) { + case ATOM_ALTERNATION: + print_child_atom(a->u.children[0], a); + printf("|"); + print_child_atom(a->u.children[1], a); + break; + + case ATOM_SEQUENCE: + print_child_atom(a->u.children[0], a); + print_child_atom(a->u.children[1], a); + break; + + case ATOM_REPETITION: + r = &a->u.repeat.counts; + if (r->min == 1 && r->max == 1) { + print_atom(a->u.repeat.child); + } else { + print_child_atom(a->u.repeat.child, a); + print_repeat(r); + } + break; + + case ATOM_LITERAL: + print_literal(a->u.literal); + break; + + case ATOM_ALPHABETIC: printf("[:alpha:]"); break; + case ATOM_NUMERIC: printf("[:digit:]"); break; + case ATOM_UPPERCASE: printf("[:upper:]"); break; + case ATOM_LOWERCASE: printf("[:lower:]"); break; + case ATOM_CONTROL: printf("[:cntrl:]"); break; + case ATOM_PUNCTUATION: printf("[:punct:]"); break; + case ATOM_EVERYTHING: printf("."); break; + } +} + +void print_regex(const struct atom *a) +{ + printf("^"); + print_atom(a); + printf("$\n"); +} + |