-
Notifications
You must be signed in to change notification settings - Fork 0
/
Copy pathtext_processing.c
109 lines (102 loc) · 2.63 KB
/
text_processing.c
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
#include <stdlib.h>
#include "utils.h"
#include "words.h"
#include <assert.h>
#include <stdio.h>
#include <stdlib.h>
#include <time.h>
#include <string.h>
void
get_input_from_file(char **text, char *name)
{
size_t buff_size = 2;
size_t curr_buff_size = 0;
char *ptr = malloc(buff_size * sizeof(char) + 1);
assert(ptr != 0 && "malloc failed!\n");
char *temp_ptr = ptr;
char *temp_temp_ptr;
FILE *input_file = fopen(name, "r");
assert(input_file!=0 && "can't open file!\n");
while (fread(temp_ptr, 1, 1, input_file) == 1)
{
if (*temp_ptr == '\n')
{
*temp_ptr = ' ';
}
if ((curr_buff_size == 0) && (*temp_ptr == ' '))
{
continue;
}
if ((curr_buff_size > 1) &&
(*temp_ptr == ' ') &&
(*(temp_ptr - 1) == ' '))
{
continue;
}
curr_buff_size += 1;
if (curr_buff_size == buff_size-1)
{
buff_size *= 2;
temp_temp_ptr = realloc(ptr, buff_size*sizeof(char));
assert(temp_temp_ptr !=0 && "realloc failed!");
ptr = temp_temp_ptr;
temp_ptr = ptr;
temp_ptr += curr_buff_size;
}
else temp_ptr++;
}
fclose(input_file);
if (curr_buff_size == 0)
{
free(ptr);
*text = NULL;
return;
}
temp_ptr--; // final new line
while(*temp_ptr == ' ') temp_ptr--; //remove trailing white spaces
//at most one
*(++temp_ptr) = (char )0;
*text = ptr;
}
void
get_default_text(char **text)
{
const char def_text[] = "The quick brown fox jumps over the lazy dog.";
char *ptr = (char *)malloc((strlen(def_text)+1) * sizeof(char));
assert(ptr != 0 && "malloc failed!\n");
strcpy(ptr, def_text);
*text = ptr;
}
void
get_random_text(char **text, size_t num)
{
srand(time(NULL));
size_t buff_size = 16 * num; // max word size 16 char
size_t end = 0;
char *buffer = (char *)calloc(buff_size, sizeof(char));
char *buffer_ptr = buffer;
const char *sample;
char space = ' ';
size_t sample_len;
for (size_t i=0; i<num; i++)
{
sample = freq_words_100[random() % 1000];
sample_len = strlen(sample);
end += strlen(sample)+1;
if (end < buff_size)
{
memcpy(buffer_ptr, sample, sample_len);
buffer_ptr += sample_len;
memcpy(buffer_ptr, &space, 1);
buffer_ptr++;
}
else return;
}
*(buffer_ptr-1) = (char)0;
*text = buffer;
}
void
free_text(char *text)
{
if (text) free(text);
}