Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Add macho parser for use by C inject_hash #1435

Merged
merged 36 commits into from
Apr 15, 2024
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
Show all changes
36 commits
Select commit Hold shift + click to select a range
2d64aa3
add macho parser
Jan 11, 2024
0a5d6ee
add infra for macho parser tests
Jan 11, 2024
b9048b5
add macho testing file
Jan 11, 2024
8ae5e57
get tests working
Jan 12, 2024
12a1afe
more work on tests
Jan 12, 2024
a6f3890
get read macho file test working
Jan 18, 2024
bf18e42
rework macho free function
Jan 18, 2024
996a19e
remove unused macho print helper function
Jan 18, 2024
6b2b12d
implement get_macho_section_data test
Jan 18, 2024
2df5ec7
fix symbol table not being read correctly sometimes
Jan 19, 2024
d27dfa0
add working symbol table test
Jan 19, 2024
166ed73
refactor string table
Jan 19, 2024
cff1be9
read index correctly in test
Jan 22, 2024
2195181
remove more camelcase
Jan 25, 2024
c25ce90
add better failure handling to macho parser
Jan 25, 2024
27bb119
remove more camel case
Jan 25, 2024
4ed681e
move global variables and defines into main function
Jan 25, 2024
7bb4180
remove todos
Jan 25, 2024
093f28d
start convert C tests to C++ tests utilizing google test
Feb 6, 2024
d629c08
transfer remaining tests and remove legacy test file
Feb 6, 2024
24dd3d0
clean up code
Feb 6, 2024
42f007c
add missing copyright
Feb 6, 2024
0adaeda
add macho parser tests to run_tests target
Feb 6, 2024
622c593
use calloc ensure correct contents of memcmp data
Feb 9, 2024
70b639a
address PR comments
Feb 21, 2024
1d87e45
remove typedef aliases
Feb 27, 2024
2d33602
add section_index counting
Feb 27, 2024
1d1a567
programmatically find symbol indices in expected string table
Feb 28, 2024
f507f51
avoid using memcpy to assign string to arrays
Feb 28, 2024
15f8083
avoid memory leakage in tests
Feb 28, 2024
01cb226
correct return value
Feb 28, 2024
a564221
add missing error handling
Mar 14, 2024
be280e9
use set indices for sections we're looking for
Mar 14, 2024
638ff8b
remove unnecessary macro
Mar 14, 2024
d9e3234
add comment explaining why load_command search works
Mar 21, 2024
a9a1d4c
use size_t where appropriate
Apr 10, 2024
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
11 changes: 11 additions & 0 deletions CMakeLists.txt
Original file line number Diff line number Diff line change
Expand Up @@ -1027,6 +1027,17 @@ if(BUILD_TESTING)
add_custom_target(fips_specific_tests_if_any)
endif()

# Add macho parser tests if FIPS and on MacOS
if(FIPS AND APPLE)
torben-hansen marked this conversation as resolved.
Show resolved Hide resolved
add_custom_target(
macho_parser_tests
justsmth marked this conversation as resolved.
Show resolved Hide resolved
COMMAND ./util/fipstools/inject_hash/macho_parser/tests/test_macho_parser
WORKING_DIRECTORY ${PROJECT_BINARY_DIR}
DEPENDS test_macho_parser
)
add_dependencies(fips_specific_tests_if_any macho_parser_tests)
endif()

# Read util/go_tests.txt into a CMake variable.
file(READ util/go_tests.txt GO_TESTS)
foreach(fips_specific_test ${GO_FIPS_TESTS})
Expand Down
2 changes: 2 additions & 0 deletions util/fipstools/CMakeLists.txt
Original file line number Diff line number Diff line change
Expand Up @@ -6,4 +6,6 @@ if(FIPS AND BUILD_TESTING)
)
target_link_libraries(test_fips crypto)
target_include_directories(test_fips BEFORE PRIVATE ${PROJECT_BINARY_DIR}/symbol_prefix_include)

add_subdirectory(inject_hash/macho_parser/tests)
endif()
17 changes: 17 additions & 0 deletions util/fipstools/inject_hash/macho_parser/common.h
Original file line number Diff line number Diff line change
@@ -0,0 +1,17 @@
// Copyright Amazon.com, Inc. or its affiliates. All Rights Reserved.
// SPDX-License-Identifier: Apache-2.0 OR ISC

#ifndef COMMON_H
#define COMMON_H

#include <stdio.h>
#include <stdlib.h>
#include <string.h>

#define LOG_ERROR(...) do { \
fprintf(stderr, "File: %s, Line: %d, ", __FILE__, __LINE__); \
fprintf(stderr, __VA_ARGS__); \
fprintf(stderr, "\n"); \
} while(0)

#endif
219 changes: 219 additions & 0 deletions util/fipstools/inject_hash/macho_parser/macho_parser.c
Original file line number Diff line number Diff line change
@@ -0,0 +1,219 @@
// Copyright Amazon.com, Inc. or its affiliates. All Rights Reserved.
// SPDX-License-Identifier: Apache-2.0 OR ISC

#include <stdint.h>

#include "common.h"
#include "macho_parser.h"

#define TEXT_INDEX 0
#define CONST_INDEX 1
#define SYMTABLE_INDEX 2
#define STRTABLE_INDEX 3

// Documentation for the Mach-O structs can be found in macho-o/loader.h and mach-o/nlist.h
int read_macho_file(const char *filename, machofile *macho) {
FILE *file = NULL;
struct load_command *load_commands = NULL;
uint32_t bytes_read;
int ret = 0;

file = fopen(filename, "rb");
if (file == NULL) {
LOG_ERROR("Error opening file %s", filename);
goto end;
}

bytes_read = fread(&macho->macho_header, 1, sizeof(struct mach_header_64), file);
if (bytes_read != sizeof(struct mach_header_64)) {
LOG_ERROR("Error reading macho_header from file %s", filename);
goto end;
}
if (macho->macho_header.magic != MH_MAGIC_64) {
LOG_ERROR("File is not a 64-bit Mach-O file");
goto end;
}

load_commands = malloc(macho->macho_header.sizeofcmds);
if (load_commands == NULL) {
LOG_ERROR("Error allocating memory for load_commands");
goto end;
}
bytes_read = fread(load_commands, 1, macho->macho_header.sizeofcmds, file);
if (bytes_read != macho->macho_header.sizeofcmds) {
LOG_ERROR("Error reading load commands from file %s", filename);
goto end;
}

// We're only looking for __text, __const in the __TEXT segment, and the string & symbol tables
macho->num_sections = 4;
macho->sections = malloc(macho->num_sections * sizeof(section_info));
if (macho->sections == NULL) {
LOG_ERROR("Error allocating memory for macho sections");
}

int text_found = 0;
int const_found = 0;
int symtab_found = 0;

// mach-o/loader.h explains that cmdsize (and by extension sizeofcmds) must be a multiple of 8 on 64-bit systems. struct load_command will always be 8 bytes.
for (size_t i = 0; i < macho->macho_header.sizeofcmds / sizeof(struct load_command); i += load_commands[i].cmdsize / sizeof(struct load_command)) {
if (load_commands[i].cmd == LC_SEGMENT_64) {
struct segment_command_64 *segment = (struct segment_command_64 *)&load_commands[i];
if (strcmp(segment->segname, "__TEXT") == 0) {
struct section_64 *sections = (struct section_64 *)&segment[1];
for (size_t j = 0; j < segment->nsects; j++) {
if (strcmp(sections[j].sectname, "__text") == 0) {
if (text_found == 1) {
LOG_ERROR("Duplicate __text section found");
goto end;
}
macho->sections[TEXT_INDEX].offset = sections[j].offset;
macho->sections[TEXT_INDEX].size = sections[j].size;
strcpy(macho->sections[TEXT_INDEX].name, sections[j].sectname);
text_found = 1;
} else if (strcmp(sections[j].sectname, "__const") == 0) {
if (const_found == 1) {
LOG_ERROR("Duplicate __const section found");
goto end;
}
macho->sections[CONST_INDEX].offset = sections[j].offset;
macho->sections[CONST_INDEX].size = sections[j].size;
strcpy(macho->sections[CONST_INDEX].name, sections[j].sectname);
const_found = 1;
}
}
}
} else if (load_commands[i].cmd == LC_SYMTAB) {
if (symtab_found == 1) {
LOG_ERROR("Duplicate symbol and string tables found");
goto end;
}
struct symtab_command *symtab = (struct symtab_command *)&load_commands[i];
macho->sections[SYMTABLE_INDEX].offset = symtab->symoff;
macho->sections[SYMTABLE_INDEX].size = symtab->nsyms * sizeof(struct nlist_64);
strcpy(macho->sections[SYMTABLE_INDEX].name, "__symbol_table");
macho->sections[STRTABLE_INDEX].offset = symtab->stroff;
macho->sections[STRTABLE_INDEX].size = symtab->strsize;
strcpy(macho->sections[STRTABLE_INDEX].name, "__string_table");
symtab_found = 1;
}
}

ret = 1;
end:
justsmth marked this conversation as resolved.
Show resolved Hide resolved
free(load_commands);
if (file != NULL) {
fclose(file);
}
return ret;
}

void free_macho_file(machofile *macho) {
free(macho->sections);
free(macho);
macho = NULL;
}

uint8_t* get_macho_section_data(const char *filename, machofile *macho, const char *section_name, size_t *size, uint32_t *offset) {
FILE *file = NULL;
uint8_t *ret = NULL;
uint32_t bytes_read;

file = fopen(filename, "rb");
if (file == NULL) {
LOG_ERROR("Error opening file %s", filename);
goto end;
}

int section_index;
if (strcmp(section_name, "__text") == 0) {
section_index = TEXT_INDEX;
} else if (strcmp(section_name, "__const") == 0) {
section_index = CONST_INDEX;
} else if (strcmp(section_name, "__symbol_table") == 0) {
section_index = SYMTABLE_INDEX;
} else if (strcmp(section_name, "__string_table") == 0) {
section_index = STRTABLE_INDEX;
} else {
LOG_ERROR("Getting invalid macho section data %s", section_name);
goto end;
}

uint8_t *section_data = malloc(macho->sections[section_index].size);
if (section_data == NULL) {
LOG_ERROR("Error allocating memory for section data");
goto end;
}

if (fseek(file, macho->sections[section_index].offset, SEEK_SET) != 0) {
free(section_data);
LOG_ERROR("Failed to seek in file %s", filename);
goto end;
}
bytes_read = fread(section_data, 1, macho->sections[section_index].size, file);
if (bytes_read != macho->sections[section_index].size) {
free(section_data);
LOG_ERROR("Error reading section data from file %s", filename);
goto end;
}

if (size != NULL) {
*size = macho->sections[section_index].size;
}
if (offset != NULL) {
*offset = macho->sections[section_index].offset;
}

ret = section_data;

end:
if (file != NULL) {
fclose(file);
justsmth marked this conversation as resolved.
Show resolved Hide resolved
}
return ret;
}

uint32_t find_macho_symbol_index(uint8_t *symbol_table_data, size_t symbol_table_size, uint8_t *string_table_data, size_t string_table_size, const char *symbol_name, uint32_t *base) {
char* string_table = NULL;
uint32_t ret = 0;

if (symbol_table_data == NULL || string_table_data == NULL) {
LOG_ERROR("Symbol and string table pointers cannot be null to find the symbol index");
goto end;
}

string_table = malloc(string_table_size);
justsmth marked this conversation as resolved.
Show resolved Hide resolved
if (string_table == NULL) {
LOG_ERROR("Error allocating memory for string table");
goto end;
}
memcpy(string_table, string_table_data, string_table_size);

int found = 0;
size_t index = 0;
for (size_t i = 0; i < symbol_table_size / sizeof(struct nlist_64); i++) {
struct nlist_64 *symbol = (struct nlist_64 *)(symbol_table_data + i * sizeof(struct nlist_64));
if (strcmp(symbol_name, &string_table[symbol->n_un.n_strx]) == 0) {
if (found == 0) {
index = symbol->n_value;
found = 1;
} else {
LOG_ERROR("Duplicate symbol %s found", symbol_name);
goto end;
}
}
}
if (found == 0) {
LOG_ERROR("Requested symbol %s not found", symbol_name);
goto end;
}
if (base != NULL) {
index = index - *base;
}
ret = index;

end:
free(string_table);
return ret;
}
45 changes: 45 additions & 0 deletions util/fipstools/inject_hash/macho_parser/macho_parser.h
Original file line number Diff line number Diff line change
@@ -0,0 +1,45 @@
// Copyright Amazon.com, Inc. or its affiliates. All Rights Reserved.
// SPDX-License-Identifier: Apache-2.0 OR ISC

#ifndef MACHO_PARSER_H
#define MACHO_PARSER_H
#ifdef __cplusplus
extern "C"
{
#endif

#include <mach-o/loader.h>
#include <mach-o/nlist.h>

typedef struct {
char name[16];
size_t size;
uint32_t offset;
} section_info;

typedef struct {
struct mach_header_64 macho_header;
section_info *sections;
uint32_t num_sections;
} machofile;

// read_macho_file reads a Mach-O file [in] and populates a machofile struct [out] with its contents.
// It returns 0 on failure, 1 on success.
int read_macho_file(const char *filename, machofile *macho);
torben-hansen marked this conversation as resolved.
Show resolved Hide resolved

// free_macho_file frees the memory allocated to a machofile struct [in]
void free_macho_file(machofile *macho);

// get_macho_section_data retrieves data from a specific section [in] the provided Mach-O file [in].
// In addition to returning a pointer to the retrieved data, or NULL if it doesn't find said section,
// it also populates the size [out] & offset [out] pointers provided they are not NULL.
uint8_t* get_macho_section_data(const char* filename, machofile *macho, const char *section_name, size_t *size, uint32_t *offset);

// find_macho_symbol_index finds the index of a symbol [in] in the Mach-O file's [in] symbol table.
// It returns the index on success, and 0 on failure.
uint32_t find_macho_symbol_index(uint8_t *symbol_table_data, size_t symbol_table_size, uint8_t *string_table_data, size_t string_table_size, const char *symbol_name, uint32_t *base);

#ifdef __cplusplus
} // extern "C"
#endif
#endif
15 changes: 15 additions & 0 deletions util/fipstools/inject_hash/macho_parser/tests/CMakeLists.txt
Original file line number Diff line number Diff line change
@@ -0,0 +1,15 @@
if(FIPS AND APPLE)
add_executable(
test_macho_parser

macho_tests.cc
../macho_parser.c
)

target_link_libraries(
test_macho_parser

test_support_lib
boringssl_gtest_main
)
endif()
66 changes: 66 additions & 0 deletions util/fipstools/inject_hash/macho_parser/tests/macho_tests.cc
Original file line number Diff line number Diff line change
@@ -0,0 +1,66 @@
// Copyright Amazon.com, Inc. or its affiliates. All Rights Reserved.
// SPDX-License-Identifier: Apache-2.0 OR ISC

#include <assert.h>

#include "../common.h"
#include "macho_tests.h"

#define TEST_FILE "test_macho"

machofile *MachoTestFixture::expected_macho;
struct nlist_64 *MachoTestFixture::expected_symtab;
uint32_t MachoTestFixture::expected_symbol1_ind;
uint32_t MachoTestFixture::expected_symbol2_ind;

constexpr char MachoTestFixture::expected_strtab[EXPECTED_STRTAB_SIZE];
constexpr int MachoTestFixture::text_data[TEXT_DATA_SIZE];
constexpr char MachoTestFixture::const_data[CONST_DATA_SIZE];

TEST_F(MachoTestFixture, TestReadMachoFile) {
machofile test_macho_file;
if (!read_macho_file(TEST_FILE, &test_macho_file)) {
LOG_ERROR("Failed to read macho_file");
}

EXPECT_TRUE(memcmp(&test_macho_file.macho_header, &expected_macho->macho_header, sizeof(struct mach_header_64)) == 0);
EXPECT_EQ(test_macho_file.num_sections, expected_macho->num_sections);
EXPECT_TRUE(memcmp(test_macho_file.sections, expected_macho->sections, test_macho_file.num_sections * sizeof(section_info)) == 0);
}

TEST_F(MachoTestFixture, TestGetMachoSectionData) {
std::unique_ptr<uint8_t> text_section(nullptr);
std::unique_ptr<uint8_t> const_section(nullptr);
std::unique_ptr<uint8_t> symbol_table(nullptr);
std::unique_ptr<uint8_t> string_table(nullptr);

size_t text_section_size;
size_t const_section_size;
size_t symbol_table_size;
size_t string_table_size;

text_section.reset(get_macho_section_data(TEST_FILE, expected_macho, "__text", &text_section_size, NULL));
const_section.reset(get_macho_section_data(TEST_FILE, expected_macho, "__const", &const_section_size, NULL));
symbol_table.reset(get_macho_section_data(TEST_FILE, expected_macho, "__symbol_table", &symbol_table_size, NULL));
string_table.reset(get_macho_section_data(TEST_FILE, expected_macho, "__string_table", &string_table_size, NULL));

ASSERT_TRUE(memcmp(text_section.get(), text_data, text_section_size) == 0);
ASSERT_TRUE(memcmp(const_section.get(), const_data, const_section_size) == 0);
ASSERT_TRUE(memcmp(symbol_table.get(), expected_symtab, symbol_table_size) == 0);
ASSERT_TRUE(memcmp(string_table.get(), expected_strtab, string_table_size) == 0);
}

TEST_F(MachoTestFixture, TestFindMachoSymbolIndex) {
std::unique_ptr<uint8_t> symbol_table(nullptr);
std::unique_ptr<uint8_t> string_table(nullptr);

size_t symbol_table_size;
size_t string_table_size;

symbol_table.reset(get_macho_section_data(TEST_FILE, expected_macho, "__symbol_table", &symbol_table_size, NULL));
string_table.reset(get_macho_section_data(TEST_FILE, expected_macho, "__string_table", &string_table_size, NULL));

uint32_t symbol1_index = find_macho_symbol_index(symbol_table.get(), symbol_table_size, string_table.get(), string_table_size, "symbol1", NULL);

ASSERT_EQ(symbol1_index, expected_symbol1_ind);
}
Loading
Loading