Skip to content

Commit

Permalink
progress
Browse files Browse the repository at this point in the history
  • Loading branch information
GerbenAaltink committed Oct 4, 2024
1 parent 846726b commit 7921929
Show file tree
Hide file tree
Showing 6 changed files with 127 additions and 23 deletions.
10 changes: 6 additions & 4 deletions Makefile
Original file line number Diff line number Diff line change
@@ -1,8 +1,10 @@
all: rmatch run
all: build_rmatch run build_rmatch_so

build_rmatch:
gcc rmatch.c -Wall -Wextra -Ofast -o rmatch -Werror
build_rmatch_so:
gcc rmatch.so.c -fPIC -shared -o librmatch.so -Ofast -Werror

rmatch: rmatch.c rmatch.h rmatch.so.c
gcc rmatch.c -Wall -Wextra -Ofast -o rmatch
gcc rmatch.so.c -fPIC -shared -o librmatch.so -Ofast

run:
./rmatch
Expand Down
7 changes: 4 additions & 3 deletions bmatch.h
Original file line number Diff line number Diff line change
Expand Up @@ -4,8 +4,8 @@
#include <stdio.h>
#include <assert.h>
int bmatchhere(char *regexp, char *text);
inline int bmatchstar(int c, char *regexp, char *text);
inline int bmatch(char *regexp, char *text)
int bmatchstar(int c, char *regexp, char *text);
int bmatch(char *regexp, char *text)
{
if (regexp[0] == '^')
return bmatchhere(regexp + 1, text) == 0 ? -1 : 0;
Expand All @@ -31,7 +31,8 @@ int bmatchhere(char *regexp, char *text)
return bmatchhere(regexp + 1, text + 1);
return 0;
}
inline int bmatchstar(int c, char *regexp, char *text)

int bmatchstar(int c, char *regexp, char *text)
{
do
{
Expand Down
Binary file modified librmatch.so
Binary file not shown.
115 changes: 101 additions & 14 deletions rmatch.h
Original file line number Diff line number Diff line change
Expand Up @@ -77,6 +77,7 @@ int rmatchhere(rmatch_t * rm,unsigned char *regexp, unsigned char *text);
inline int rmatchgreedy(rmatch_t * rm, int c, unsigned char *regexp, unsigned char *text, int required);
rmatch_t *rmatch(char *regexp, char **txt)
{
rmatch_optimize = 0;
char * compiled = rmatch_optimize ? rmatch_compile(regexp) : NULL;
if(compiled)
regexp = compiled;
Expand Down Expand Up @@ -115,23 +116,45 @@ rmatch_t *rmatch(char *regexp, char **txt)
do
{
steps_total++;
rm.start = steps_total;
rm.position = rm.start;
/* if(!*text){
if(*regexp && *regexp != '$')
{
printf("HIERRS\n");
rm.valid = 0;
}
return &rm;
}*/

if (!(result[2] = rmatchhere(&rm, (unsigned char *)regexp, text)))
continue;

result[2]--;
result[0] = result[2] > 0;
result[1] = steps_total;
*txt += result[1] + result[2];
if(result[0])
result[1] = rm.start;
else
result[1] = 0;
printf("%d\n",rm.length + rm.position);
*txt += result[1] + result[2];
rm.valid = result[0] ? 1 : 0;
rm.position = result[0] ? result[1] + result[2] : 0;
rm.length = result[0] ? result[2] : 0;
rm.start = result[0] ? result[1] : 0;
if(!result[0])
rm.start = 0;

if(compiled)
free(compiled);
return &rm;

} while (*text++ != '\0');
if(compiled)
free(compiled);
rm.valid = 0;
rm.start = 0;
// rm->position = 0;
rm.length = 0;
return &rm;
}

Expand Down Expand Up @@ -184,11 +207,28 @@ int rmatchhere(rmatch_t * rm,unsigned char *regexp, unsigned char *text)

unsigned int res = 0;
rm->position += 1;
rm->length += 1;

if(rm->binary && rm->position == rm->size - 1)
{
return 1;
}
if(!*text){
return regexp[0] == '$' ? 2 : 1;
}
if(regexp[0] == '('){

rm->start = rm->position - 1;
rm->length = 1;
return rmatchhere(rm, regexp+1, text) + 1;;
}
if(regexp[0] == ')'){
regexp++;
//rm->length = rm->position - rm->start + 1;
rm->length = rmatchhere(rm, regexp+1, text) + rm->start + 1;
//return rm->length + rmatchhere(rm, regexp+1, text) + 1 ;
return rm->length + rm->start; // rm->position - rm->start + 1;
}
if (regexp[0] == '\0')
return 1;
if (regexp[0] == '\\' && regexp[1] == 'd'){
Expand Down Expand Up @@ -243,9 +283,9 @@ int rmatchhere(rmatch_t * rm,unsigned char *regexp, unsigned char *text)
}
if (regexp[0] == '$' && regexp[1] == '\0')
return *text == '\0';
//if (regexp[0] == '\\')
// return *text == '.' && rmatchhere(rm, regexp + 2, text + 1) + 1;
if ((*text != '\0' && !rm->binary) && (regexp[0] == '.' || regexp[0] == *text))
if (regexp[0] == '\\' && regexp[1] == '.')
return *text == '.' && rmatchhere(rm, regexp + 2, text + 1) + 1;
if (( ((!rm->binary && *text != '\0' ) && !(rm->binary && rm->position == rm->size) ) && !rm->binary) && (regexp[0] == '.' || regexp[0] == *text))
if ((res = rmatchhere(rm, regexp + 1, text + 1)))
{
rm->position = res + 1;
Expand All @@ -265,17 +305,23 @@ int rmatchgreedy(rmatch_t * rm, int c, unsigned char *regexp, unsigned char *tex
res = rmatch_success + res;
if (!required)
{
rm->position = res;
// rm->length += res;
//rm->position = res;
return res;
}
else if (required && res > 1){
rm->position = res;
//rm->length = res;
//rm->position = res;
return res;
}
} while (*text != '\0' && ((*text++ == c) || c == '.'));
return 0;

} while (*text != '\0' && ((*text++ == c) || c == ')' || c == '(' || c == '.'));
printf("HIERR?");
return rmatch_success;
}



char * rmatch_replace(char *regexpr, char ** txt, char *replacement){
char * txt_ptr = *txt;
rmatch_t * match_info = rmatch(regexpr, &txt_ptr);
Expand All @@ -299,12 +345,20 @@ char * rmatch_replace(char *regexpr, char ** txt, char *replacement){
}
char *rmatch_extract(char *regexp, char **txt)
{

printf("A:%s\n",*txt);
rmatch_t * result = rmatch(regexp, txt);
if (result->valid)
{
printf("A1:start:%d\n",result->start);
printf("A1:position:%d\n",result->position);
printf("A1:length:%d\n",result->length);
char *extracted = *txt - result->length;
char *str = (char *)malloc(result->length + 1);
strncpy(str, extracted, result->length);
printf("B:%s\n", extracted);
*txt = extracted + result->length;
printf("C:%s\n", *txt);
str[result->length] = 0;
return str;
}
Expand Down Expand Up @@ -379,8 +433,28 @@ void rmatch_example_two()
assert(!result);
}
void rmatch_tests_compile(){
printf("<%s>\n",rmatch_compile("a*bb*.*[a][b][0-9a-zA-ZA-B1-2a-b]"));
assert(!strcmp("a*bb*.*ab[\\d\\l\\uA-B1-2a-b]", rmatch_compile("a*bb*.*[a][b][0-9a-zA-ZA-B1-2a-b]")));
// printf("<%s>\n",rmatch_compile("a*bb*.*[a][b][0-9a-zA-ZA-B1-2a-b]"));
// assert(!strcmp("a*bb*.*ab[\\d\\l\\uA-B1-2a-b]", rmatch_compile("a*bb*.*[a][b][0-9a-zA-ZA-B1-2a-b]")));
}


void rmatch_tests_extract() {
char * expr = "src=\"(.*)\"";;
char * str = "<script langugage=\"javascript\" src=\"script.js\"src=\"some-longer-script-name.js\" tag-behind=\"value\"></script>";
char * res = rmatch_extract(expr,&str);
assert(!strcmp(res, "script.js"));
free(res);
res = rmatch_extract(expr,&str);
printf("%s\n", res);
assert(!strcmp(res, "some-longer-script-name.js"));
free(res);
printf("Test with match end. (Edge case)\n");
res = rmatch_extract(expr,&str);
assert(res == NULL);
res = rmatch_extract(expr,&str);
assert(res == NULL);
res = rmatch_extract(expr,&str);
assert(res == NULL);
}
void rmatch_tests_slash(){
rmatch_t * result ;
Expand Down Expand Up @@ -415,7 +489,6 @@ void rmatch_tests_replace(){
strcpy(str, "testxtest");
char * ptr = str;
char * result = rmatch_replace("x", &ptr, "y");
printf("%s\n",result);
assert(!strcmp(result, "testytest"));
assert(!strcmp(str, "testytest"));
assert(!strcmp(ptr,"test"));
Expand Down Expand Up @@ -447,7 +520,15 @@ void rmatch_tests()
rmatch_example_one();
printf("Testing example two.\n");
rmatch_example_two();
printf("Testing extracting.\n");
// Roof
printf("Testing roof.\n");
rmatch_test("testje","testje", (char *[]){NULL});
rmatch_test("^testje","testje", (char *[]){NULL});
// Dollar
printf("Testing dollar\n");
rmatch_test("^testje$","testje", (char *[]){NULL});
rmatch_test("testje$","testjetestje", (char *[]){NULL});

// Star
printf("Testing star.\n");
rmatch_test(".*H.*ry P.*rS.*la", "Harry PotterSim SalaHarry PotterSimSalaHarry PotterSimSalaHarry PotterSimSala",
Expand All @@ -474,8 +555,14 @@ void rmatch_tests()
rmatch_test("T.*e q.*k b.*n f+.*x j.*s o.*r t.*e l[oa][az].*y d[ao]g.", text_fox4, (char *[]){"The quick brown fooox jumps over the lazy dog.", NULL});
rmatch_test("T.*e q.*k b.*n fo+x j.*s o.*r t.*e l[oa][az].*y d[ao]g.", text_fox4, (char *[]){"The quick brown fooox jumps over the lazy dog.", NULL});
rmatch_test("T.*e q.*k b.*n f+x j.*s o.*r t.*e l[oa][az].*y d[ao]g.", text_fox4, (char *[]){NULL});
printf("Testing replace.\n");
rmatch_tests_replace();
printf("Testing compile.\n");
rmatch_tests_compile();
printf("Testing slash commands.\n");
rmatch_tests_slash();
printf("Testing extract.\n");
rmatch_tests_extract();
printf("Tests succesfully completed!\n");
}
#endif
4 changes: 2 additions & 2 deletions rmatch.so.c
Original file line number Diff line number Diff line change
Expand Up @@ -7,7 +7,7 @@
* Returns offset if found
*/
extern int matchr(char * expr, char * str){
rmatch_t *result = rmatch(expr,str);
rmatch_t *result = rmatch(expr,&str);
return result->valid ? result->start : -1;
}
/*
Expand All @@ -16,5 +16,5 @@ extern int matchr(char * expr, char * str){
* Returns offset if found
*/
extern int matchb(char * expr, char * str){
return rbmatch(expr,str);
return bmatch(expr,str);
}
14 changes: 14 additions & 0 deletions rmatch2.c
Original file line number Diff line number Diff line change
@@ -0,0 +1,14 @@


typedef struct rmatch2_t {
unsigned int valid;
unsigned int start;
unsigned int length;
unsigned int position;

unsigned int binary;
long size;

} rmatch2_t;

rmatch2_t * rmatch2(char *regexp, char **txt)

0 comments on commit 7921929

Please sign in to comment.