diff --git a/_parts/part1.md b/_parts/part1.md index 2535dd6..1d9059c 100644 --- a/_parts/part1.md +++ b/_parts/part1.md @@ -83,12 +83,11 @@ int main(int argc, char* argv[]) { We'll define `InputBuffer` as a small wrapper around the state we need to store to interact with [getline()](http://man7.org/linux/man-pages/man3/getline.3.html). (More on that in a minute) ```c -struct InputBuffer_t { +typedef struct { char* buffer; size_t buffer_length; ssize_t input_length; -}; -typedef struct InputBuffer_t InputBuffer; +} InputBuffer; InputBuffer* new_input_buffer() { InputBuffer* input_buffer = malloc(sizeof(InputBuffer)); @@ -178,12 +177,11 @@ Alright, we've got a working REPL. In the next part, we'll start developing our #include #include -struct InputBuffer_t { +typedef struct { char* buffer; size_t buffer_length; ssize_t input_length; -}; -typedef struct InputBuffer_t InputBuffer; +} InputBuffer; InputBuffer* new_input_buffer() { InputBuffer* input_buffer = malloc(sizeof(InputBuffer)); diff --git a/_parts/part2.md b/_parts/part2.md index 225120a..4b16b7c 100644 --- a/_parts/part2.md +++ b/_parts/part2.md @@ -61,14 +61,12 @@ Lastly, we pass the prepared statement to `execute_statement`. This function wil Notice that two of our new functions return enums indicating success or failure: ```c -enum MetaCommandResult_t { +typedef enum { META_COMMAND_SUCCESS, META_COMMAND_UNRECOGNIZED_COMMAND -}; -typedef enum MetaCommandResult_t MetaCommandResult; +} MetaCommandResult; -enum PrepareResult_t { PREPARE_SUCCESS, PREPARE_UNRECOGNIZED_STATEMENT }; -typedef enum PrepareResult_t PrepareResult; +typedef enum { PREPARE_SUCCESS, PREPARE_UNRECOGNIZED_STATEMENT } PrepareResult; ``` "Unrecognized statement"? That seems a bit like an exception. But [exceptions are bad](https://www.youtube.com/watch?v=EVhCUSgNbzo) (and C doesn't even support them), so I'm using enum result codes wherever practical. The C compiler will complain if my switch statement doesn't handle a member of the enum, so we can feel a little more confident we handle every result of a function. Expect more result codes to be added in the future. @@ -88,13 +86,11 @@ MetaCommandResult do_meta_command(InputBuffer* input_buffer) { Our "prepared statement" right now just contains an enum with two possible values. It will contain more data as we allow parameters in statements: ```c -enum StatementType_t { STATEMENT_INSERT, STATEMENT_SELECT }; -typedef enum StatementType_t StatementType; +typedef enum { STATEMENT_INSERT, STATEMENT_SELECT } StatementType; -struct Statement_t { +typedef struct { StatementType type; -}; -typedef struct Statement_t Statement; +} Statement; ``` `prepare_statement` (our "SQL Compiler") does not understand SQL right now. In fact, it only understands two words: @@ -153,25 +149,20 @@ The skeleton of our database is taking shape... wouldn't it be nice if it stored ```diff @@ -10,6 +10,23 @@ struct InputBuffer_t { - }; - typedef struct InputBuffer_t InputBuffer; + } InputBuffer; -+enum MetaCommandResult_t { ++typedef enum { + META_COMMAND_SUCCESS, + META_COMMAND_UNRECOGNIZED_COMMAND -+}; -+typedef enum MetaCommandResult_t MetaCommandResult; ++} MetaCommandResult; + -+enum PrepareResult_t { PREPARE_SUCCESS, PREPARE_UNRECOGNIZED_STATEMENT }; -+typedef enum PrepareResult_t PrepareResult; ++typedef enum { PREPARE_SUCCESS, PREPARE_UNRECOGNIZED_STATEMENT } PrepareResult; + -+enum StatementType_t { STATEMENT_INSERT, STATEMENT_SELECT }; -+typedef enum StatementType_t StatementType; ++typedef enum { STATEMENT_INSERT, STATEMENT_SELECT } StatementType; + -+struct Statement_t { ++typedef struct { + StatementType type; -+}; -+typedef struct Statement_t Statement; ++} Statement; + InputBuffer* new_input_buffer() { InputBuffer* input_buffer = malloc(sizeof(InputBuffer)); diff --git a/_parts/part3.md b/_parts/part3.md index 205c4c7..cfb0d1e 100644 --- a/_parts/part3.md +++ b/_parts/part3.md @@ -46,18 +46,16 @@ We store those parsed arguments into a new `Row` data structure inside the state ```diff +#define COLUMN_USERNAME_SIZE 32 +#define COLUMN_EMAIL_SIZE 255 -+struct Row_t { ++typedef struct { + uint32_t id; + char username[COLUMN_USERNAME_SIZE]; + char email[COLUMN_EMAIL_SIZE]; -+}; -+typedef struct Row_t Row; ++} Row; + - struct Statement_t { + typedef struct { StatementType type; + Row row_to_insert; // only used by insert statement - }; - typedef struct Statement_t Statement; + } Statement; ``` Now we need to copy that data into some data structure representing the table. SQLite uses a B-tree for fast lookups, inserts and deletes. We'll start with something simpler. Like a B-tree, it will group rows into pages, but instead of arranging those pages as a tree it will arrange them as an array. @@ -114,11 +112,10 @@ Next, a `Table` structure that points to pages of rows and keeps track of how ma +const uint32_t ROWS_PER_PAGE = PAGE_SIZE / ROW_SIZE; +const uint32_t TABLE_MAX_ROWS = ROWS_PER_PAGE * TABLE_MAX_PAGES; + -+struct Table_t { ++typedef struct { + uint32_t num_rows; + void* pages[TABLE_MAX_PAGES]; -+}; -+typedef struct Table_t Table; ++} Table; ``` I'm making our page size 4 kilobytes because it's the same size as a page used in the virtual memory systems of most computer architectures. This means one page in our database corresponds to one page used by the operating system. The operating system will move pages in and out of memory as whole units instead of breaking them up. @@ -263,45 +260,38 @@ We'll address those issues in the next part. For now, here's the complete diff f #include +#include - struct InputBuffer_t { + typedef struct { char* buffer; -@@ -10,6 +11,105 @@ struct InputBuffer_t { - }; - typedef struct InputBuffer_t InputBuffer; +@@ -10,6 +11,105 @@ typedef struct { + } InputBuffer; -+enum ExecuteResult_t { EXECUTE_SUCCESS, EXECUTE_TABLE_FULL }; -+typedef enum ExecuteResult_t ExecuteResult; ++typedef enum { EXECUTE_SUCCESS, EXECUTE_TABLE_FULL } ExecuteResult; + -+enum MetaCommandResult_t { ++typedef enum { + META_COMMAND_SUCCESS, + META_COMMAND_UNRECOGNIZED_COMMAND -+}; -+typedef enum MetaCommandResult_t MetaCommandResult; ++} MetaCommandResult; + -+enum PrepareResult_t { ++typedef enum { + PREPARE_SUCCESS, + PREPARE_SYNTAX_ERROR, + PREPARE_UNRECOGNIZED_STATEMENT -+ }; -+typedef enum PrepareResult_t PrepareResult; ++ } PrepareResult; + -+enum StatementType_t { STATEMENT_INSERT, STATEMENT_SELECT }; -+typedef enum StatementType_t StatementType; ++typedef enum { STATEMENT_INSERT, STATEMENT_SELECT } StatementType; + +#define COLUMN_USERNAME_SIZE 32 +#define COLUMN_EMAIL_SIZE 255 -+struct Row_t { ++typedef struct { + uint32_t id; + char username[COLUMN_USERNAME_SIZE]; + char email[COLUMN_EMAIL_SIZE]; -+}; -+typedef struct Row_t Row; ++} Row; + -+struct Statement_t { ++typedef struct { + StatementType type; + Row row_to_insert; //only used by insert statement -+}; -+typedef struct Statement_t Statement; ++} Statement; + +#define size_of_attribute(Struct, Attribute) sizeof(((Struct*)0)->Attribute) + @@ -318,11 +308,10 @@ We'll address those issues in the next part. For now, here's the complete diff f +const uint32_t ROWS_PER_PAGE = PAGE_SIZE / ROW_SIZE; +const uint32_t TABLE_MAX_ROWS = ROWS_PER_PAGE * TABLE_MAX_PAGES; + -+struct Table_t { ++typedef struct { + uint32_t num_rows; + void* pages[TABLE_MAX_PAGES]; -+}; -+typedef struct Table_t Table; ++} Table; + +void print_row(Row* row) { + printf("(%d, %s, %s)\n", row->id, row->username, row->email); diff --git a/_parts/part4.md b/_parts/part4.md index 1227103..00a462d 100644 --- a/_parts/part4.md +++ b/_parts/part4.md @@ -121,14 +121,13 @@ db > What's going on? If you take a look at our definition of a Row, we allocate exactly 32 bytes for username and exactly 255 bytes for email. But [C strings](http://www.cprogramming.com/tutorial/c/lesson9.html) are supposed to end with a null character, which we didn't allocate space for. The solution is to allocate one additional byte: ```diff const uint32_t COLUMN_EMAIL_SIZE = 255; - struct Row_t { + typedef struct { uint32_t id; - char username[COLUMN_USERNAME_SIZE]; - char email[COLUMN_EMAIL_SIZE]; + char username[COLUMN_USERNAME_SIZE + 1]; + char email[COLUMN_EMAIL_SIZE + 1]; - }; - typedef struct Row_t Row; + } Row; ``` And indeed that fixes it: @@ -305,7 +304,7 @@ It's gonna be great. Here's the complete diff for this part: ```diff -@@ -22,6 +22,8 @@ typedef enum MetaCommandResult_t MetaCommandResult; +@@ -22,6 +22,8 @@ enum PrepareResult_t { PREPARE_SUCCESS, @@ -314,16 +313,15 @@ Here's the complete diff for this part: PREPARE_SYNTAX_ERROR, PREPARE_UNRECOGNIZED_STATEMENT }; -@@ -34,8 +36,8 @@ typedef enum StatementType_t StatementType; +@@ -34,8 +36,8 @@ #define COLUMN_EMAIL_SIZE 255 - struct Row_t { + typedef struct { uint32_t id; - char username[COLUMN_USERNAME_SIZE]; - char email[COLUMN_EMAIL_SIZE]; + char username[COLUMN_USERNAME_SIZE + 1]; + char email[COLUMN_EMAIL_SIZE + 1]; - }; - typedef struct Row_t Row; + } Row; @@ -150,18 +152,40 @@ MetaCommandResult do_meta_command(InputBuffer* input_buffer, Table *table) { } diff --git a/_parts/part5.md b/_parts/part5.md index 8603f84..497f3d0 100644 --- a/_parts/part5.md +++ b/_parts/part5.md @@ -40,18 +40,17 @@ To make this easier, we're going to make an abstraction called the pager. We ask The Pager accesses the page cache and the file. The Table object makes requests for pages through the pager: ```diff -+struct Pager_t { ++typedef struct { + int file_descriptor; + uint32_t file_length; + void* pages[TABLE_MAX_PAGES]; -+}; -+typedef struct Pager_t Pager; ++} Pager; + - struct Table_t { + typedef struct { - void* pages[TABLE_MAX_PAGES]; + Pager* pager; uint32_t num_rows; - }; + } Table; ``` I'm renaming `new_table()` to `db_open()` because it now has the effect of opening a connection to the database. By opening a connection, I mean: @@ -336,19 +335,17 @@ Until then! const uint32_t ROWS_PER_PAGE = PAGE_SIZE / ROW_SIZE; const uint32_t TABLE_MAX_ROWS = ROWS_PER_PAGE * TABLE_MAX_PAGES; -+struct Pager_t { ++typedef struct { + int file_descriptor; + uint32_t file_length; + void* pages[TABLE_MAX_PAGES]; -+}; -+typedef struct Pager_t Pager; ++} Pager; + - struct Table_t { + typedef struct { uint32_t num_rows; - void* pages[TABLE_MAX_PAGES]; + Pager* pager; - }; - typedef struct Table_t Table; + } Table; @@ -84,32 +94,81 @@ void deserialize_row(void *source, Row* destination) { memcpy(&(destination->email), source + EMAIL_OFFSET, EMAIL_SIZE); diff --git a/_parts/part6.md b/_parts/part6.md index 2075761..ee3da27 100644 --- a/_parts/part6.md +++ b/_parts/part6.md @@ -21,12 +21,11 @@ Those are the behaviors we're going to implement now. Later, we will also want t Without further ado, here's the `Cursor` type: ```diff -+struct Cursor_t { ++typedef struct { + Table* table; + uint32_t row_num; + bool end_of_table; // Indicates a position one past the last element -+}; -+typedef struct Cursor_t Cursor; ++} Cursor; ``` Given our current table data structure, all you need to identify a location in a table is the row number. @@ -124,16 +123,14 @@ Alright, that's it! Like I said, this was a shorter refactor that should help us Here's the complete diff to this part: ```diff -@@ -78,6 +78,13 @@ struct Table_t { - }; - typedef struct Table_t Table; +@@ -78,6 +78,13 @@ struct { + } Table; -+struct Cursor_t { ++typedef struct { + Table* table; + uint32_t row_num; + bool end_of_table; // Indicates a position one past the last element -+}; -+typedef struct Cursor_t Cursor; ++} Cursor; + void print_row(Row* row) { printf("(%d, %s, %s)\n", row->id, row->username, row->email); diff --git a/_parts/part8.md b/_parts/part8.md index 1690462..1228a01 100644 --- a/_parts/part8.md +++ b/_parts/part8.md @@ -26,8 +26,7 @@ Instead, we're going with a tree structure. Each node in the tree can contain a Leaf nodes and internal nodes have different layouts. Let's make an enum to keep track of node type: ```diff -+enum NodeType_t { NODE_INTERNAL, NODE_LEAF }; -+typedef enum NodeType_t NodeType; ++typedef enum { NODE_INTERNAL, NODE_LEAF } NodeType; ``` Each node will correspond to one page. Internal nodes will point to their children by storing the page number that stores the child. The btree asks the pager for a particular page number and gets back a pointer into the page cache. Pages are stored in the database file one after the other in order of page number. @@ -175,20 +174,18 @@ Now it makes more sense to store the number of pages in our database rather than -const uint32_t ROWS_PER_PAGE = PAGE_SIZE / ROW_SIZE; -const uint32_t TABLE_MAX_ROWS = ROWS_PER_PAGE * TABLE_MAX_PAGES; - struct Pager_t { + typedef struct { int file_descriptor; uint32_t file_length; + uint32_t num_pages; void* pages[TABLE_MAX_PAGES]; - }; - typedef struct Pager_t Pager; + } Pager; - struct Table_t { + typedef struct { Pager* pager; - uint32_t num_rows; + uint32_t root_page_num; - }; - typedef struct Table_t Table; + } Table; ``` ```diff @@ -226,14 +223,13 @@ Now it makes more sense to store the number of pages in our database rather than A cursor represents a position in the table. When our table was a simple array of rows, we could access a row given just the row number. Now that it's a tree, we identify a position by the page number of the node, and the cell number within that node. ```diff - struct Cursor_t { + typedef struct { Table* table; - uint32_t row_num; + uint32_t page_num; + uint32_t cell_num; bool end_of_table; // Indicates a position one past the last element - }; - typedef struct Cursor_t Cursor; + } Cursor; ``` ```diff @@ -510,32 +506,28 @@ Next time, we'll implement finding a record by primary key, and start storing ro -const uint32_t ROWS_PER_PAGE = PAGE_SIZE / ROW_SIZE; -const uint32_t TABLE_MAX_ROWS = ROWS_PER_PAGE * TABLE_MAX_PAGES; - struct Pager_t { + typedef struct { int file_descriptor; uint32_t file_length; + uint32_t num_pages; void* pages[TABLE_MAX_PAGES]; - }; - typedef struct Pager_t Pager; + } Pager; - struct Table_t { + typedef struct { Pager* pager; - uint32_t num_rows; + uint32_t root_page_num; - }; - typedef struct Table_t Table; + } Table; - struct Cursor_t { + typedef struct { Table* table; - uint32_t row_num; + uint32_t page_num; + uint32_t cell_num; bool end_of_table; // Indicates a position one past the last element - }; - typedef struct Cursor_t Cursor; + } Cursor; -+enum NodeType_t { NODE_INTERNAL, NODE_LEAF }; -+typedef enum NodeType_t NodeType; ++typedef enum { NODE_INTERNAL, NODE_LEAF } NodeType; + +/* + * Common Node Header Layout diff --git a/db.c b/db.c index ec13f75..f58aad7 100644 --- a/db.c +++ b/db.c @@ -7,51 +7,44 @@ #include #include -struct InputBuffer_t { +typedef struct { char* buffer; size_t buffer_length; ssize_t input_length; -}; -typedef struct InputBuffer_t InputBuffer; +} InputBuffer; -enum ExecuteResult_t { +typedef enum { EXECUTE_SUCCESS, EXECUTE_DUPLICATE_KEY, -}; -typedef enum ExecuteResult_t ExecuteResult; +} ExecuteResult; -enum MetaCommandResult_t { +typedef enum { META_COMMAND_SUCCESS, META_COMMAND_UNRECOGNIZED_COMMAND -}; -typedef enum MetaCommandResult_t MetaCommandResult; +} MetaCommandResult; -enum PrepareResult_t { +typedef enum { PREPARE_SUCCESS, PREPARE_NEGATIVE_ID, PREPARE_STRING_TOO_LONG, PREPARE_SYNTAX_ERROR, PREPARE_UNRECOGNIZED_STATEMENT -}; -typedef enum PrepareResult_t PrepareResult; +} PrepareResult; -enum StatementType_t { STATEMENT_INSERT, STATEMENT_SELECT }; -typedef enum StatementType_t StatementType; +typedef enum { STATEMENT_INSERT, STATEMENT_SELECT } StatementType; #define COLUMN_USERNAME_SIZE 32 #define COLUMN_EMAIL_SIZE 255 -struct Row_t { +typedef struct { uint32_t id; char username[COLUMN_USERNAME_SIZE + 1]; char email[COLUMN_EMAIL_SIZE + 1]; -}; -typedef struct Row_t Row; +} Row; -struct Statement_t { +typedef struct { StatementType type; Row row_to_insert; // only used by insert statement -}; -typedef struct Statement_t Statement; +} Statement; #define size_of_attribute(Struct, Attribute) sizeof(((Struct*)0)->Attribute) @@ -66,34 +59,30 @@ const uint32_t ROW_SIZE = ID_SIZE + USERNAME_SIZE + EMAIL_SIZE; const uint32_t PAGE_SIZE = 4096; #define TABLE_MAX_PAGES 100 -struct Pager_t { +typedef struct { int file_descriptor; uint32_t file_length; uint32_t num_pages; void* pages[TABLE_MAX_PAGES]; -}; -typedef struct Pager_t Pager; +} Pager; -struct Table_t { +typedef struct { Pager* pager; uint32_t root_page_num; -}; -typedef struct Table_t Table; +} Table; -struct Cursor_t { +typedef struct { Table* table; uint32_t page_num; uint32_t cell_num; bool end_of_table; // Indicates a position one past the last element -}; -typedef struct Cursor_t Cursor; +} Cursor; void print_row(Row* row) { printf("(%d, %s, %s)\n", row->id, row->username, row->email); } -enum NodeType_t { NODE_INTERNAL, NODE_LEAF }; -typedef enum NodeType_t NodeType; +typedef enum { NODE_INTERNAL, NODE_LEAF } NodeType; /* * Common Node Header Layout