-
Notifications
You must be signed in to change notification settings - Fork 340
/
Copy pathrun.c
194 lines (166 loc) · 5.04 KB
/
run.c
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
/**
* @file
* @brief This file uses Google style formatting.
*/
/**
* This program implements the Levenshtein distance algorithm and provides
* functionality to benchmark it with the following features:
* - Reads words from an input file
* - Calculates Levenshtein distances between all unique pairs
* - Returns sum of all distances as final result
* - Provides benchmark statistics in CSV format
*
* The program takes two command line arguments:
* 1. run_ms: How long to run the benchmark in milliseconds
* 2. input_file: Path to file containing space-separated words
*
* Output format: mean_ms,std_dev_ms,min_ms,max_ms,runs,result
*/
#include <stdio.h>
#include <stdlib.h>
#include <string.h>
#include "benchmark.h"
// Can either define your own min function
// or use a language / standard library function
int min(int a, int b, int c) {
int min = a;
if (b < min) min = b;
if (c < min) min = c;
return min;
}
/**
* Calculates the Levenshtein distance between two strings using an optimized
* version of Wagner-Fischer algorithm that uses O(min(m,n)) space.
*
* @param s1 The first string to compare
* @param s2 The second string to compare
* @return The Levenshtein distance between s1 and s2
*/
int levenshtein_distance(const char* s1, const char* s2) {
// Get lengths of both strings
int mt = strlen(s1);
int nt = strlen(s2);
// Assign shorter one to str1, longer one to str2
const char* str1 = mt <= nt ? s1 : s2;
const char* str2 = mt <= nt ? s2 : s1;
// store the lengths of shorter in m, longer in n
int m = str1 == s1 ? mt : nt;
int n = str1 == s1 ? nt : mt;
// Create two rows, previous and current
int prev[m + 1];
int curr[m + 1];
// initialize the previous row
for (int i = 0; i <= m; i++) {
prev[i] = i;
}
// Iterate and compute distance
for (int i = 1; i <= n; i++) {
curr[0] = i;
for (int j = 1; j <= m; j++) {
int cost = (str1[j - 1] == str2[i - 1]) ? 0 : 1;
curr[j] = min(prev[j] + 1, // Deletion
curr[j - 1] + 1, // Insertion
prev[j - 1] + cost // Substitution
);
}
for (int j = 0; j <= m; j++) {
prev[j] = curr[j];
}
}
// Return final distance, stored in prev[m]
return prev[m];
}
static char** read_words(const char* filename, int* word_count) {
// First read entire file content
FILE* file = fopen(filename, "r");
if (!file) {
fprintf(stderr, "Could not open file: %s\n", filename);
exit(1);
}
// Get file size
fseek(file, 0, SEEK_END);
long file_size = ftell(file);
fseek(file, 0, SEEK_SET);
// Read entire file into buffer
char* content = malloc(file_size + 1);
fread(content, 1, file_size, file);
content[file_size] = '\0';
fclose(file);
// Count words (space separated)
int capacity = 100;
char** words = malloc(capacity * sizeof(char*));
*word_count = 0;
// Split on lines
char* word = strtok(content, "\n");
while (word != NULL) {
if (*word_count == capacity) {
capacity *= 2;
words = realloc(words, capacity * sizeof(char*));
}
words[*word_count] = strdup(word);
(*word_count)++;
word = strtok(NULL, "\n");
}
free(content);
return words;
}
typedef struct {
long* distances;
int count;
} distances_result_t;
static distances_result_t* calculate_distances(char** words, int word_count) {
distances_result_t* result = malloc(sizeof(distances_result_t));
result->count = (word_count * (word_count - 1)) / 2;
result->distances = malloc(result->count * sizeof(long));
int idx = 0;
for (int i = 0; i < word_count; i++) {
for (int j = i + 1; j < word_count; j++) {
result->distances[idx++] = levenshtein_distance(words[i], words[j]);
}
}
return result;
}
typedef struct {
char** words;
int count;
} word_data_t;
// The work function that benchmark will time
static benchmark_result_t work(void* data) {
word_data_t* word_data = (word_data_t*)data;
distances_result_t* distances =
calculate_distances(word_data->words, word_data->count);
benchmark_result_t result = {.value.ptr = distances};
return result;
}
int main(int argc, char* argv[]) {
if (argc != 4) {
fprintf(stderr, "Usage: %s <run_ms> <warmup_ms> <input_file>\n", argv[0]);
return 1;
}
int run_ms = atoi(argv[1]);
int warmup_ms = atoi(argv[2]);
int word_count;
char** words = read_words(argv[3], &word_count);
word_data_t data = {words, word_count};
benchmark_run(work, &data, warmup_ms);
benchmark_stats_t stats = benchmark_run(work, &data, run_ms);
// Sum the distances outside the benchmarked function
distances_result_t* distances =
(distances_result_t*)stats.last_result.value.ptr;
long sum = 0;
for (int i = 0; i < distances->count; i++) {
sum += distances->distances[i];
}
stats.last_result.value.number = sum;
char buffer[1024];
benchmark_format_results(stats, buffer, sizeof(buffer));
printf("%s\n", buffer);
// Clean up everything
free(distances->distances);
free(distances);
for (int i = 0; i < word_count; i++) {
free(words[i]);
}
free(words);
return 0;
}