summaryrefslogtreecommitdiff
path: root/regex.c
blob: 02a58fad02dcda33772f993435db27437ce17ca6 (plain)
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
#include <stdio.h>
#include <string.h>

#include "ast.h"

static const int precedence[ATOM_MAX] = {
	[ATOM_ALPHABETIC]  = 1,
	[ATOM_NUMERIC]     = 1,
	[ATOM_UPPERCASE]   = 1,
	[ATOM_LOWERCASE]   = 1,
	[ATOM_CONTROL]     = 1,
	[ATOM_PUNCTUATION] = 1,
	[ATOM_EVERYTHING]  = 1,
	[ATOM_REPETITION]  = 2,
	[ATOM_SEQUENCE]    = 3,
	[ATOM_LITERAL]     = 3,
	[ATOM_ALTERNATION] = 4,
};

static void print_repeat(const struct repeat *r)
{
	if (r->max < 0) {
		switch (r->min) {
		case 0:  printf("*"); break;
		case 1:  printf("+"); break;
		default: printf("{%ld,}", r->min); break;
		}
	} else if (r->min == 0 && r->max == 1) {
		printf("?");
	} else if (r->min == r->max) {
		printf("{%ld}", r->min);
	} else {
		printf("{%ld,%ld}", r->min, r->max);
	}
}

static void print_literal(const char *s)
{
	for (; *s; s++) {
		switch (*s) {
		case '.':
		case '[':
		case '{':
		case '}':
		case '(':
		case ')':
		case '\\':
		case '*':
		case '+':
		case '?':
		case '|':
		case '^':
		case '$':
			putchar('\\');

		default:
			putchar(*s);
		}
	}
}

static int get_precedence(const struct atom *a)
{
	if (a->type == ATOM_LITERAL) {
		switch (strlen(a->u.literal)) {
		case 0:  return -1;
		case 1:  return precedence[ATOM_EVERYTHING];
		default: return precedence[ATOM_SEQUENCE];
		}
	}

	return precedence[a->type];
}

static void print_atom(const struct atom *a);

static void print_child_atom(const struct atom *child, const struct atom *parent)
{
	int group = get_precedence(child) > get_precedence(parent);
	if (group) printf("(");
	print_atom(child);
	if (group) printf(")");
}

static void print_atom(const struct atom *a)
{
	switch (a->type) {
	case ATOM_ALTERNATION:
		print_child_atom(a->u.children[0], a);
		printf("|");
		print_child_atom(a->u.children[1], a);
		break;

	case ATOM_SEQUENCE:
		print_child_atom(a->u.children[0], a);
		print_child_atom(a->u.children[1], a);
		break;

	case ATOM_REPETITION:
		print_child_atom(a->u.repeat.child, a);
		print_repeat(&a->u.repeat.counts);
		break;

	case ATOM_LITERAL:
		print_literal(a->u.literal);
		break;

	case ATOM_ALPHABETIC:  printf("[:alpha:]"); break;
	case ATOM_NUMERIC:     printf("[:digit:]"); break;
	case ATOM_UPPERCASE:   printf("[:upper:]"); break;
	case ATOM_LOWERCASE:   printf("[:lower:]"); break;
	case ATOM_CONTROL:     printf("[:cntrl:]"); break;
	case ATOM_PUNCTUATION: printf("[:punct:]"); break;
	case ATOM_EVERYTHING:  printf(".");         break;
	}
}

void print_regex(const struct atom *a)
{
	printf("^");
	print_atom(a);
	printf("$\n");
}