-
Notifications
You must be signed in to change notification settings - Fork 1k
/
os_pcre2_compile.c
167 lines (143 loc) · 5.02 KB
/
os_pcre2_compile.c
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
/* Copyright (C) 2009 Trend Micro Inc.
* All right reserved.
*
* This program is a free software; you can redistribute it
* and/or modify it under the terms of the GNU General Public
* License (version 2) as published by the FSF - Free Software
* Foundation
*/
#include <ctype.h>
#include <stdio.h>
#include <stdlib.h>
#include <string.h>
#include "os_regex.h"
const char *OSPcre2_Execute_pcre2_match(const char *str, OSPcre2 *reg);
const char *OSPcre2_Execute_strncmp(const char *subject, OSPcre2 *reg);
const char *OSPcre2_Execute_strrcmp(const char *subject, OSPcre2 *reg);
const char *OSPcre2_Execute_strcasecmp(const char *subject, OSPcre2 *reg);
const char *OSPcre2_Execute_strncasecmp(const char *subject, OSPcre2 *reg);
const char *OSPcre2_Execute_strrcasecmp(const char *subject, OSPcre2 *reg);
const char *OSPcre2_Execute_strcmp(const char *subject, OSPcre2 *reg);
int OSPcre2_CouldBeOptimized(const char *pattern);
int OSPcre2_Compile(const char *pattern, OSPcre2 *reg, int flags)
{
int error = 0;
PCRE2_SIZE erroroffset;
size_t pattern_len = 0UL;
char first_char, last_char;
uint32_t count, i;
/* Check for references not initialized */
if (reg == NULL) {
return (0);
}
/* Initialize OSRegex structure */
reg->error = 0;
reg->sub_strings = NULL;
reg->regex = NULL;
reg->match_data = NULL;
reg->pattern_len = 0UL;
reg->pattern = NULL;
reg->exec_function = NULL;
/* The pattern can't be null */
if (pattern == NULL) {
reg->error = OS_REGEX_PATTERN_NULL;
goto compile_error;
}
/* Maximum size of the pattern */
pattern_len = strlen(pattern);
#if 0
if (pattern_len > OS_PATTERN_MAXSIZE) {
reg->error = OS_REGEX_MAXSIZE;
goto compile_error;
}
#endif
if (OSPcre2_CouldBeOptimized(pattern)) {
first_char = pattern[0];
last_char = pattern[pattern_len - 1];
if (first_char == '^') {
if (last_char == '$') {
reg->pattern = strdup(&pattern[1]);
reg->pattern_len = pattern_len - 2;
reg->pattern[reg->pattern_len] = '\0';
if (flags & PCRE2_CASELESS) {
reg->exec_function = OSPcre2_Execute_strcasecmp;
} else {
reg->exec_function = OSPcre2_Execute_strcmp;
}
return (1);
} else {
reg->pattern = strdup(&pattern[1]);
reg->pattern_len = pattern_len - 1;
if (flags & PCRE2_CASELESS) {
reg->exec_function = OSPcre2_Execute_strncasecmp;
} else {
reg->exec_function = OSPcre2_Execute_strncmp;
}
return (1);
}
} else {
if (last_char == '$') {
reg->pattern = strdup(pattern);
reg->pattern_len = pattern_len - 1;
reg->pattern[reg->pattern_len] = '\0';
if (flags & PCRE2_CASELESS) {
reg->exec_function = OSPcre2_Execute_strrcasecmp;
} else {
reg->exec_function = OSPcre2_Execute_strrcmp;
}
return (1);
}
}
}
reg->exec_function = OSPcre2_Execute_pcre2_match;
reg->regex = pcre2_compile((PCRE2_SPTR)pattern, pattern_len, flags, &error, &erroroffset, NULL);
if (reg->regex == NULL) {
reg->error = OS_REGEX_BADREGEX;
goto compile_error;
}
reg->match_data = pcre2_match_data_create_from_pattern(reg->regex, NULL);
if (reg->match_data == NULL) {
reg->error = OS_REGEX_OUTOFMEMORY;
goto compile_error;
}
pcre2_pattern_info(reg->regex, PCRE2_INFO_CAPTURECOUNT, (void *)&count);
count++; // to store NULL pointer at the end
reg->sub_strings = calloc(count, sizeof(char *));
if (reg->sub_strings == NULL) {
reg->error = OS_REGEX_OUTOFMEMORY;
goto compile_error;
}
for (i = 0; i < count; i++) {
reg->sub_strings[i] = NULL;
}
#ifdef USE_PCRE2_JIT
/* Just In Time compilation for faster execution */
if (pcre2_jit_compile(reg->regex, PCRE2_JIT_COMPLETE) != 0) {
reg->error = OS_REGEX_NO_JIT;
goto compile_error;
}
#endif
return (1);
compile_error:
/* Error handling */
OSPcre2_FreePattern(reg);
return (0);
}
int OSPcre2_CouldBeOptimized(const char *pattern)
{
pcre2_code *preg = NULL;
pcre2_match_data *md = NULL;
PCRE2_SPTR re = (PCRE2_SPTR) "^\\^?[a-zA-Z0-9 !\"#%&',/:;<=>@_`~-]*\\$?$";
int error = 0;
PCRE2_SIZE erroroffset = 0;
int should_be_optimized = 0;
preg = pcre2_compile(re, PCRE2_ZERO_TERMINATED, PCRE2_UTF | PCRE2_NO_UTF_CHECK, &error,
&erroroffset, NULL);
md = pcre2_match_data_create_from_pattern(preg, NULL);
if (pcre2_match(preg, (PCRE2_SPTR)pattern, strlen(pattern), 0, 0, md, NULL) >= 0) {
should_be_optimized = 1;
}
pcre2_match_data_free(md);
pcre2_code_free(preg);
return should_be_optimized;
}