From a13e0de9499b1ec414ca78da84fc6f4e91749d69 Mon Sep 17 00:00:00 2001 From: voidbert <50591320+voidbert@users.noreply.github.com> Date: Tue, 24 Oct 2023 10:58:45 +0100 Subject: [PATCH 1/5] Confirmed global variables can be used --- trabalho-pratico/src/utils/date.c | 2 -- trabalho-pratico/src/utils/date_and_time.c | 2 -- trabalho-pratico/src/utils/daytime.c | 2 -- 3 files changed, 6 deletions(-) diff --git a/trabalho-pratico/src/utils/date.c b/trabalho-pratico/src/utils/date.c index d694cbf..d91cf7c 100644 --- a/trabalho-pratico/src/utils/date.c +++ b/trabalho-pratico/src/utils/date.c @@ -34,8 +34,6 @@ /** * @brief Grammar for parsing dates. * @details Shall not be modified apart from its creation. - * - * TODO - ask professors if we can use global variables */ fixed_n_delimiter_parser_grammar_t *date_grammar = NULL; diff --git a/trabalho-pratico/src/utils/date_and_time.c b/trabalho-pratico/src/utils/date_and_time.c index ab4f250..312e2e7 100644 --- a/trabalho-pratico/src/utils/date_and_time.c +++ b/trabalho-pratico/src/utils/date_and_time.c @@ -35,8 +35,6 @@ /** * @brief Grammar for parsing timed dates. * @details Shall not be modified apart from its creation. - * - * TODO - ask professors if we can use global variables */ fixed_n_delimiter_parser_grammar_t *date_and_time_grammar = NULL; diff --git a/trabalho-pratico/src/utils/daytime.c b/trabalho-pratico/src/utils/daytime.c index 6a7479b..57068f1 100644 --- a/trabalho-pratico/src/utils/daytime.c +++ b/trabalho-pratico/src/utils/daytime.c @@ -34,8 +34,6 @@ /** * @brief Grammar for parsing times. * @details Shall not be modified apart from its creation. - * - * TODO - ask professors if we can use global variables */ fixed_n_delimiter_parser_grammar_t *daytime_grammar = NULL; From 2f7738120971d5b1822fb6ac86ee8dc66311b1ea Mon Sep 17 00:00:00 2001 From: voidbert <50591320+voidbert@users.noreply.github.com> Date: Tue, 24 Oct 2023 11:10:39 +0100 Subject: [PATCH 2/5] string_duplicate -> stdlib's strdup --- trabalho-pratico/include/utils/dataset_parser.h | 2 +- .../include/utils/fixed_n_delimiter_parser.h | 2 +- trabalho-pratico/include/utils/string_utils.h | 7 ------- trabalho-pratico/src/test.c | 3 +-- trabalho-pratico/src/utils/date.c | 2 +- trabalho-pratico/src/utils/date_and_time.c | 2 +- trabalho-pratico/src/utils/daytime.c | 2 +- .../src/utils/fixed_n_delimiter_parser.c | 2 +- trabalho-pratico/src/utils/string_utils.c | 12 +----------- 9 files changed, 8 insertions(+), 26 deletions(-) diff --git a/trabalho-pratico/include/utils/dataset_parser.h b/trabalho-pratico/include/utils/dataset_parser.h index ee54bfe..f76dca1 100644 --- a/trabalho-pratico/include/utils/dataset_parser.h +++ b/trabalho-pratico/include/utils/dataset_parser.h @@ -69,7 +69,7 @@ * person_dataset_t *dataset = (person_dataset_t *) user_data; * * // Copy string to another buffer, as it will be destroyed when the next line is parsed - * char *name_copy = string_duplicate(token); + * char *name_copy = strdup(token); * dataset->current_person.name = name_copy; * return 0; * } diff --git a/trabalho-pratico/include/utils/fixed_n_delimiter_parser.h b/trabalho-pratico/include/utils/fixed_n_delimiter_parser.h index 9aacab7..7d7920f 100644 --- a/trabalho-pratico/include/utils/fixed_n_delimiter_parser.h +++ b/trabalho-pratico/include/utils/fixed_n_delimiter_parser.h @@ -43,7 +43,7 @@ * (void) ntoken; * * // Copy string to another buffer - * char *name_copy = string_duplicate(token); // No failure check in this example + * char *name_copy = strdup(token); // No failure check in this example * ((person_t *) user_data)->name = name_copy; * return 0; * } diff --git a/trabalho-pratico/include/utils/string_utils.h b/trabalho-pratico/include/utils/string_utils.h index b678426..99c3691 100644 --- a/trabalho-pratico/include/utils/string_utils.h +++ b/trabalho-pratico/include/utils/string_utils.h @@ -83,13 +83,6 @@ #ifndef STRING_UTILS_H #define STRING_UTILS_H -/** - * @brief Allocates a temporary buffer for @p input and copies @p input there. - * @param Input to be duplicated. - * @return A buffer owned by the caller (`NULL` on failure). - */ -char *string_duplicate(const char *input); - /** * @brief Callback method for ::string_tokenize, called for every token read. * diff --git a/trabalho-pratico/src/test.c b/trabalho-pratico/src/test.c index 0c9f102..9a13d57 100644 --- a/trabalho-pratico/src/test.c +++ b/trabalho-pratico/src/test.c @@ -26,7 +26,6 @@ #include "utils/dataset_parser.h" #include "utils/fixed_n_delimiter_parser.h" -#include "utils/string_utils.h" /** @brief Path to the file to be opened for this test. */ #define TEST_FILE "testfile.txt" @@ -50,7 +49,7 @@ int parse_name(void *user_data, char *token, size_t ntoken) { person_dataset_t *dataset = (person_dataset_t *) user_data; /* Copy string to another buffer */ - char *name_copy = string_duplicate(token); + char *name_copy = strdup(token); dataset->current_person.name = name_copy; return 0; } diff --git a/trabalho-pratico/src/utils/date.c b/trabalho-pratico/src/utils/date.c index d91cf7c..9d90388 100644 --- a/trabalho-pratico/src/utils/date.c +++ b/trabalho-pratico/src/utils/date.c @@ -150,7 +150,7 @@ int date_from_string(date_t *output, char *input) { } int date_from_string_const(date_t *output, const char *input) { - char *buffer = string_duplicate(input); + char *buffer = strdup(input); if (!buffer) return 1; diff --git a/trabalho-pratico/src/utils/date_and_time.c b/trabalho-pratico/src/utils/date_and_time.c index 312e2e7..8766a17 100644 --- a/trabalho-pratico/src/utils/date_and_time.c +++ b/trabalho-pratico/src/utils/date_and_time.c @@ -113,7 +113,7 @@ int date_and_time_from_string(date_and_time_t *output, char *input) { } int date_and_time_from_string_const(date_and_time_t *output, const char *input) { - char *buffer = string_duplicate(input); + char *buffer = strdup(input); if (!buffer) return 1; diff --git a/trabalho-pratico/src/utils/daytime.c b/trabalho-pratico/src/utils/daytime.c index 57068f1..c4ee33a 100644 --- a/trabalho-pratico/src/utils/daytime.c +++ b/trabalho-pratico/src/utils/daytime.c @@ -128,7 +128,7 @@ int daytime_from_string(daytime_t *output, char *input) { } int daytime_from_string_const(daytime_t *output, const char *input) { - char *buffer = string_duplicate(input); + char *buffer = strdup(input); if (!buffer) return 1; diff --git a/trabalho-pratico/src/utils/fixed_n_delimiter_parser.c b/trabalho-pratico/src/utils/fixed_n_delimiter_parser.c index 1fbeb76..40a6509 100644 --- a/trabalho-pratico/src/utils/fixed_n_delimiter_parser.c +++ b/trabalho-pratico/src/utils/fixed_n_delimiter_parser.c @@ -151,7 +151,7 @@ int fixed_n_delimiter_parser_parse_string(char *in int fixed_n_delimiter_parser_parse_string_const(const char *input, fixed_n_delimiter_parser_grammar_t *grammar, void *user_data) { - char *buffer = string_duplicate(input); + char *buffer = strdup(input); if (!buffer) return FIXED_N_DELIMITER_PARSER_PARSE_STRING_CONST_RET_MALLOC_FAILURE; diff --git a/trabalho-pratico/src/utils/string_utils.c b/trabalho-pratico/src/utils/string_utils.c index 633a934..61ddd87 100644 --- a/trabalho-pratico/src/utils/string_utils.c +++ b/trabalho-pratico/src/utils/string_utils.c @@ -32,16 +32,6 @@ #include "utils/string_utils.h" -char *string_duplicate(const char *input) { - size_t buffer_size = strlen(input) + 1; - char *buffer = malloc(buffer_size); - if (!buffer) - return NULL; - - (void) memcpy(buffer, input, buffer_size); - return buffer; -} - int string_tokenize(char *input, char delimiter, tokenize_iter_callback_t callback, @@ -64,7 +54,7 @@ int string_const_tokenize(const char *input, tokenize_iter_callback_t callback, void *user_data) { - char *buffer = string_duplicate(input); + char *buffer = strdup(input); if (!buffer) return STRING_CONST_TOKENIZE_FAILED_MALLOC; From 9a21f4fe26f3a32550240096b4e8baadff8a1111 Mon Sep 17 00:00:00 2001 From: voidbert <50591320+voidbert@users.noreply.github.com> Date: Tue, 24 Oct 2023 11:28:38 +0100 Subject: [PATCH 3/5] Add before_parse callback to dataset parser Will be required for error reporting --- .../include/utils/dataset_parser.h | 36 ++++++++++++++++--- trabalho-pratico/src/test.c | 12 +++++-- trabalho-pratico/src/utils/dataset_parser.c | 27 +++++++++----- 3 files changed, 60 insertions(+), 15 deletions(-) diff --git a/trabalho-pratico/include/utils/dataset_parser.h b/trabalho-pratico/include/utils/dataset_parser.h index f76dca1..e0c8f17 100644 --- a/trabalho-pratico/include/utils/dataset_parser.h +++ b/trabalho-pratico/include/utils/dataset_parser.h @@ -92,6 +92,13 @@ * return 0; * } * + * // Print line before parsing (debug purposes) + * int before_parse_token(void *user_data, char *token) { + * (void) user_data; + * printf("Parsing line: %s\n", token); + * return 0; + * } + * * // Gets called once for line, when each line is done parsing * int add_to_reject_from_database(void *user_data, int retcode) { * person_dataset_t *dataset = (person_dataset_t *) user_data; @@ -127,7 +134,10 @@ * fixed_n_delimiter_parser_grammar_t *token_grammar = * fixed_n_delimiter_parser_grammar_new(';', 3, token_grammar_callbacks); * dataset_parser_grammar_t *grammar = - * dataset_parser_grammar_new('\n', token_grammar, add_to_reject_from_database); + * dataset_parser_grammar_new('\n', + * token_grammar, + * before_parse_token, + * add_to_reject_from_database); * * person_dataset_t dataset = {0}; * int parse_result = dataset_parser_parse(file, grammar, &dataset); @@ -184,6 +194,22 @@ */ typedef struct dataset_parser_grammar_t dataset_parser_grammar_t; +/** + * @brief Callback for each token delimited by the first-order delimiter in a dataset parser + * (e.g.: CSV line). + * @details This is called before each token is parsed by ::fixed_n_delimiter_parser_parse_string. + * Its purpose is generally to store the token before it is modified by the parser. + * + * @param user_data Pointer provided to ::dataset_parser_parse, so that this callback can modify + * the program's state. + * @param unparsed Token to be parsed. + * + * @return `0` on success, other value for immediate termination of parsing. It's recommeneded that + * these values are positive, as negative values have special meanings (see + * ::DATASET_PARSER_PARSE_RET_ALLOCATION_FAILURE). + */ +typedef int (*dataset_parser_token_before_parse_callback)(void *user_data, char *unparsed); + /** * @brief Callback for each token delimited by the first-order delimiter in a dataset parser * (e.g.: CSV line). @@ -208,6 +234,7 @@ typedef int (*dataset_parser_token_callback)(void *user_data, int retcode); * @param token_grammar Grammar for ::fixed_n_delimiter_parser_parse_string, used to parse * each token delimited by @p first_order_delimiter. Ownership of this * value will be taken by this function. + * @param before_parse_callback Callback called before parsing each token. * @param token_callback Callback called after processing each token with @p token_grammar. * * @return `malloc`-allocated ::dataset_parser_grammar_t (or `NULL` on allocation failure). This @@ -218,9 +245,10 @@ typedef int (*dataset_parser_token_callback)(void *user_data, int retcode); * See [the header file's documentation](@ref dataset_parser_examples). */ dataset_parser_grammar_t * - dataset_parser_grammar_new(char first_order_delimiter, - fixed_n_delimiter_parser_grammar_t *token_grammar, - dataset_parser_token_callback token_callback); + dataset_parser_grammar_new(char first_order_delimiter, + fixed_n_delimiter_parser_grammar_t *token_grammar, + dataset_parser_token_before_parse_callback before_parse_callback, + dataset_parser_token_callback token_callback); /** * @brief Frees memory allocated by ::dataset_parser_grammar_new. diff --git a/trabalho-pratico/src/test.c b/trabalho-pratico/src/test.c index 9a13d57..0a3eda0 100644 --- a/trabalho-pratico/src/test.c +++ b/trabalho-pratico/src/test.c @@ -71,6 +71,12 @@ int parse_int(void *user_data, char *token, size_t ntoken) { return 0; } +int before_parse_token(void *user_data, char *token) { + (void) user_data; + printf("Parsing line: %s\n", token); + return 0; +} + int add_to_reject_from_database(void *user_data, int retcode) { person_dataset_t *dataset = (person_dataset_t *) user_data; @@ -104,8 +110,10 @@ int main(void) { parse_int}; fixed_n_delimiter_parser_grammar_t *token_grammar = fixed_n_delimiter_parser_grammar_new(';', 3, token_grammar_callbacks); - dataset_parser_grammar_t *grammar = - dataset_parser_grammar_new('\n', token_grammar, add_to_reject_from_database); + dataset_parser_grammar_t *grammar = dataset_parser_grammar_new('\n', + token_grammar, + before_parse_token, + add_to_reject_from_database); person_dataset_t dataset = {0}; int parse_result = dataset_parser_parse(file, grammar, &dataset); diff --git a/trabalho-pratico/src/utils/dataset_parser.c b/trabalho-pratico/src/utils/dataset_parser.c index 4c6ac9e..ea58bec 100644 --- a/trabalho-pratico/src/utils/dataset_parser.c +++ b/trabalho-pratico/src/utils/dataset_parser.c @@ -34,15 +34,18 @@ * * @var dataset_parser_grammar_t::token_grammar * @brief Grammar to use to parse single tokens with ::fixed_n_delimiter_parser_parse_string. + * @var dataset_parser_grammar_t::before_parse_callback + * @brief Callback called before parsing each token. * @var dataset_parser_grammar_t::token_callback * @brief Callback called after processing each token. * @var dataset_parser_grammar_t::delimiter * @brief Separator between first-order tokens (e.g.: ``'\n'`` for CSV files). */ struct dataset_parser_grammar_t { - fixed_n_delimiter_parser_grammar_t *token_grammar; - dataset_parser_token_callback token_callback; - char delimiter; + fixed_n_delimiter_parser_grammar_t *token_grammar; + dataset_parser_token_before_parse_callback before_parse_callback; + dataset_parser_token_callback token_callback; + char delimiter; }; /** @@ -65,18 +68,20 @@ typedef struct { } dataset_parser_t; dataset_parser_grammar_t * - dataset_parser_grammar_new(char first_order_delimiter, - fixed_n_delimiter_parser_grammar_t *token_grammar, - dataset_parser_token_callback token_callback) { + dataset_parser_grammar_new(char first_order_delimiter, + fixed_n_delimiter_parser_grammar_t *token_grammar, + dataset_parser_token_before_parse_callback before_parse_callback, + dataset_parser_token_callback token_callback) { dataset_parser_grammar_t *grammar = malloc(sizeof(struct dataset_parser_grammar_t)); if (!grammar) { return NULL; } - grammar->delimiter = first_order_delimiter; - grammar->token_grammar = token_grammar; - grammar->token_callback = token_callback; + grammar->delimiter = first_order_delimiter; + grammar->token_grammar = token_grammar; + grammar->before_parse_callback = before_parse_callback; + grammar->token_callback = token_callback; return grammar; } @@ -102,6 +107,10 @@ int __parse_stream_iter(void *user_data, char *token) { return 0; } + int before_parse_ret = parser->grammar->before_parse_callback(parser->user_data, token); + if (before_parse_ret) + return before_parse_ret; + int parser_ret = fixed_n_delimiter_parser_parse_string(token, parser->grammar->token_grammar, parser->user_data); From 93d80a9c3b7402612001c1c1eac2bc4504bf66b5 Mon Sep 17 00:00:00 2001 From: voidbert <50591320+voidbert@users.noreply.github.com> Date: Tue, 24 Oct 2023 11:41:05 +0100 Subject: [PATCH 4/5] Finish memory optimization part of report --- trabalho-pratico/reports/relatorio-fase1.tex | 11 ++++++++--- 1 file changed, 8 insertions(+), 3 deletions(-) diff --git a/trabalho-pratico/reports/relatorio-fase1.tex b/trabalho-pratico/reports/relatorio-fase1.tex index 0af19a0..436d2dd 100755 --- a/trabalho-pratico/reports/relatorio-fase1.tex +++ b/trabalho-pratico/reports/relatorio-fase1.tex @@ -40,9 +40,14 @@ \section{Otimização do uso de memória} quantidade de memória utilizada, de modo não sermos futuramente obrigados a reescrever partes significativas do nosso código. -\subsection{Observação do \emph{dataset}} - -TODO - como usámos tipos que não strings nas entidades por observação do \emph{dataset}. +\subsection{Observação do \emph{dataset} e das \emph{queries}} + +Pudemos observar que alguns campos do \emph{dataset} nunca precisavam de estar presentes no +\emph{output} de nenhuma \emph{query} (ex: o \emph{email} de um utilizador), pelo que era escusado +o seu armazenamento na base de dados, sendo apenas necessária a sua validação durante o +\emph{parsing}. Ademais, por observação dos \emph{datasets} em si, pudemos concluir que é possível +armazenar certos campos como estruturas de dados mais compactas (ex: o identificador de um voo +pode ser armazenado como um inteiro, em vez de uma \emph{string}). \subsection{Tipos opacos} From e4ee34efe62f618cbdd14127d518c609b5d55dd2 Mon Sep 17 00:00:00 2001 From: voidbert <50591320+voidbert@users.noreply.github.com> Date: Tue, 24 Oct 2023 14:24:07 +0100 Subject: [PATCH 5/5] Tokenizer now restores string to previous state --- trabalho-pratico/include/utils/dataset_parser.h | 1 - trabalho-pratico/include/utils/date.h | 2 +- trabalho-pratico/include/utils/date_and_time.h | 2 +- trabalho-pratico/include/utils/daytime.h | 2 +- trabalho-pratico/include/utils/fixed_n_delimiter_parser.h | 3 ++- trabalho-pratico/include/utils/string_utils.h | 3 ++- trabalho-pratico/src/utils/string_utils.c | 3 +++ 7 files changed, 10 insertions(+), 6 deletions(-) diff --git a/trabalho-pratico/include/utils/dataset_parser.h b/trabalho-pratico/include/utils/dataset_parser.h index e0c8f17..e35e058 100644 --- a/trabalho-pratico/include/utils/dataset_parser.h +++ b/trabalho-pratico/include/utils/dataset_parser.h @@ -198,7 +198,6 @@ typedef struct dataset_parser_grammar_t dataset_parser_grammar_t; * @brief Callback for each token delimited by the first-order delimiter in a dataset parser * (e.g.: CSV line). * @details This is called before each token is parsed by ::fixed_n_delimiter_parser_parse_string. - * Its purpose is generally to store the token before it is modified by the parser. * * @param user_data Pointer provided to ::dataset_parser_parse, so that this callback can modify * the program's state. diff --git a/trabalho-pratico/include/utils/date.h b/trabalho-pratico/include/utils/date.h index 6ca3bd4..1a2cbbe 100644 --- a/trabalho-pratico/include/utils/date.h +++ b/trabalho-pratico/include/utils/date.h @@ -100,7 +100,7 @@ int date_from_values(date_t *output, uint16_t year, uint8_t month, uint8_t day); * @brief Parses a **MODIFIABLE** string containing a date. * * @param output Where the parsed date is placed. Won't be modified on failure. - * @param input String to parse, that will be modified. Must be in the format `"YYYY/MM/DD"`. + * @param input String to parse, that won't be modified. Must be in the format `"YYYY/MM/DD"`. * * @retval 0 Parsing success. * @retval 1 Parsing failure. diff --git a/trabalho-pratico/include/utils/date_and_time.h b/trabalho-pratico/include/utils/date_and_time.h index f25517b..40e0b28 100644 --- a/trabalho-pratico/include/utils/date_and_time.h +++ b/trabalho-pratico/include/utils/date_and_time.h @@ -75,7 +75,7 @@ void date_and_time_from_values(date_and_time_t *output, date_t date, daytime_t t * @brief Parses a **MODIFIABLE** string containing a date and a time. * * @param output Where the parsed timed date is placed. Won't be modified on failure. - * @param input String to parse, that will be modified. Must be in the format + * @param input String to parse, that won't be modified. Must be in the format * `"YYYY/MM/DD HH:MM:SS"`. * * @retval 0 Parsing success. diff --git a/trabalho-pratico/include/utils/daytime.h b/trabalho-pratico/include/utils/daytime.h index 284319c..c8ff28b 100644 --- a/trabalho-pratico/include/utils/daytime.h +++ b/trabalho-pratico/include/utils/daytime.h @@ -89,7 +89,7 @@ int daytime_from_values(daytime_t *output, uint8_t hours, uint8_t minutes, uint8 * @brief Parses a **MODIFIABLE** string containing a time. * * @param output Where the parsed time is placed. Won't be modified on failure. - * @param input String to parse, that will be modified. Must be in the format `"HH:MM:SS"`. + * @param input String to parse, that won't be modified. Must be in the format `"HH:MM:SS"`. * * @retval 0 Parsing success. * @retval 1 Parsing failure. diff --git a/trabalho-pratico/include/utils/fixed_n_delimiter_parser.h b/trabalho-pratico/include/utils/fixed_n_delimiter_parser.h index 7d7920f..631cd2e 100644 --- a/trabalho-pratico/include/utils/fixed_n_delimiter_parser.h +++ b/trabalho-pratico/include/utils/fixed_n_delimiter_parser.h @@ -201,7 +201,8 @@ void fixed_n_delimiter_parser_grammar_free(fixed_n_delimiter_parser_grammar_t *g /** * @brief Parses a **MODIFIABLE** string using a parser defined by @p grammar. * - * @param input String to parse, that that will be modified for this function to work. + * @param input String to parse, that that will be modified for this function to work, but then + * restored to its original form. * @param grammar Grammar that defines the parser to be used. * @param user_data Pointer passed to every callback in @p grammar, so that they can edit the * program's state. diff --git a/trabalho-pratico/include/utils/string_utils.h b/trabalho-pratico/include/utils/string_utils.h index 99c3691..c72009d 100644 --- a/trabalho-pratico/include/utils/string_utils.h +++ b/trabalho-pratico/include/utils/string_utils.h @@ -99,7 +99,8 @@ typedef int (*tokenize_iter_callback_t)(void *user_data, char *token); /** * @brief Splits a **MODIFIABLE** string into tokens, separated by @p delimiter. * - * @param input String to tokenize, that that will be modified for this function to work. + * @param input String to tokenize, that that will be modified for this function to work, but + * later restored to its original form. * @param delimiter Character to separate tokens. It won't be part of those tokens. * @param callback Function called for every token read. * @param user_data Pointer passed to every call of @p callback, so that it can edit program state. diff --git a/trabalho-pratico/src/utils/string_utils.c b/trabalho-pratico/src/utils/string_utils.c index 61ddd87..3687b9f 100644 --- a/trabalho-pratico/src/utils/string_utils.c +++ b/trabalho-pratico/src/utils/string_utils.c @@ -44,6 +44,9 @@ int string_tokenize(char *input, int cb_result = callback(user_data, token); if (cb_result) return cb_result; + + if (input) + *(input - 1) = delimiter; /* Restore string */ } return 0;