From d1813710eddb1f5a1d260726ae15e48c1cae31c0 Mon Sep 17 00:00:00 2001 From: Swaroop Sridhar Date: Tue, 9 Apr 2019 12:46:07 -0700 Subject: [PATCH] Address some feedback by @vitek-karas * Use spaces instead of tabs for indentation * Fix trace messages * Fix some rebasing problems * Improve pal::get_temp_dir() on windows. * Fix a few minor issues (function ordering etc) * Read/Write file extraction in 8KB chunks * Also fix a bug in the extractor for a similar issue. * Rename bundle processor to bundle runner --- src/corehost/cli/bdl_file_entry.cpp | 67 +-- src/corehost/cli/bdl_file_entry.h | 79 ++- src/corehost/cli/bdl_file_type.h | 37 +- src/corehost/cli/bdl_manifest.cpp | 109 ++--- src/corehost/cli/bdl_manifest.h | 156 +++--- src/corehost/cli/bdl_processor.cpp | 448 +++++++++++------- src/corehost/cli/bdl_processor.h | 97 ++-- src/corehost/common/pal.h | 35 +- src/corehost/common/pal.unix.cpp | 9 +- src/corehost/common/pal.windows.cpp | 11 +- src/corehost/corehost.cpp | 12 +- .../Microsoft.NET.Build.Bundle/Extractor.cs | 2 +- 12 files changed, 566 insertions(+), 496 deletions(-) diff --git a/src/corehost/cli/bdl_file_entry.cpp b/src/corehost/cli/bdl_file_entry.cpp index 15276a89a8..3dc75433f5 100644 --- a/src/corehost/cli/bdl_file_entry.cpp +++ b/src/corehost/cli/bdl_file_entry.cpp @@ -2,49 +2,50 @@ // The .NET Foundation licenses this file to you under the MIT license. // See the LICENSE file in the project root for more information. -#ifdef FEATURE_APPHOST - #include "bdl_processor.h" #include "pal.h" #include "error_codes.h" #include "trace.h" #include "utils.h" +using namespace bundle; + bool file_entry_t::is_valid() { - return data.offset > 0 && data.size > 0 && - (file_type_t)data.type < file_type_t::END && - data.path_length > 0 && data.path_length <= PATH_MAX; + return data.offset > 0 && data.size > 0 && + (file_type_t)data.type < file_type_t::__last && + data.path_length > 0 && data.path_length <= PATH_MAX; } -file_entry_t* file_entry_t::read(FILE* bundle) +file_entry_t* file_entry_t::read(FILE* stream) { - file_entry_t* entry = new file_entry_t(); - - // First read the fixed-sized portion of file-entry - bdl_processor_t::read(&entry->data, sizeof(entry->data), bundle); - if (!entry->is_valid()) - { - trace::error(_X("Invalid FileEntry")); - throw StatusCode::BundleExtractionFailure; - } - - // Read the relative-path, given its length - pal::string_t& path = entry->relative_path; - bdl_processor_t::read_string(path, entry->data.path_length, bundle); - - // Fixup the relative-path to have current platform's directory separator. - if (bundle_dir_separator != DIR_SEPARATOR) - { - for(size_t pos = path.find(bundle_dir_separator); - pos != pal::string_t::npos; - pos = path.find(bundle_dir_separator, pos)) - { - path[pos] = DIR_SEPARATOR; - } - } - - return entry; + file_entry_t* entry = new file_entry_t(); + + // First read the fixed-sized portion of file-entry + bundle_runner_t::read(&entry->data, sizeof(entry->data), stream); + if (!entry->is_valid()) + { + trace::error(_X("Failure processing application bundle; possible file corruption.")); + trace::error(_X("Invalid FileEntry detected.")); + throw StatusCode::BundleExtractionFailure; + } + + // Read the relative-path, given its length + pal::string_t& path = entry->relative_path; + bundle_runner_t::read_string(path, entry->data.path_length, stream); + + // Fixup the relative-path to have current platform's directory separator. + if (bundle_dir_separator != DIR_SEPARATOR) + { + for (size_t pos = path.find(bundle_dir_separator); + pos != pal::string_t::npos; + pos = path.find(bundle_dir_separator, pos)) + { + path[pos] = DIR_SEPARATOR; + } + } + + return entry; } -#endif // FEATURE_APPHOST + diff --git a/src/corehost/cli/bdl_file_entry.h b/src/corehost/cli/bdl_file_entry.h index f1de2aa9b0..98faa0231e 100644 --- a/src/corehost/cli/bdl_file_entry.h +++ b/src/corehost/cli/bdl_file_entry.h @@ -5,55 +5,52 @@ #ifndef __BDL_FILE_ENTRY_H__ #define __BDL_FILE_ENTRY_H__ -#if FEATURE_APPHOST - #include #include "bdl_file_type.h" #include "pal.h" -#pragma pack(push, 1) - -// FileEntry: Records information about embedded files. -// -// The bundle manifest records the following meta-data for each -// file embedded in the bundle: -// Fixed size portion (represented by file_entry_inner_t) -// - Offset -// - Size -// - File Entry Type -// - path-length (7-bit extension encoding, 1 Byte due to MAX_PATH) -// Variable Size portion -// - relative path ("path-length" Bytes) - -class file_entry_t +namespace bundle { -public: - - // The inner structure represents the fields that can be - // read contiguously for every file_entry. - struct file_entry_inner_t - { - int64_t offset; - int64_t size; - file_type_t type; - int8_t path_length; - } data; - pal::string_t relative_path; // Path of an embedded file, relative to the extraction directory. - - file_entry_t() - :data(), relative_path() - { - } - static file_entry_t* read(FILE* bundle); - -private: - static const pal::char_t bundle_dir_separator = '/'; - bool is_valid(); -}; + // FileEntry: Records information about embedded files. + // + // The bundle manifest records the following meta-data for each + // file embedded in the bundle: + // Fixed size portion (represented by file_entry_inner_t) + // - Offset + // - Size + // - File Entry Type + // - path-length (7-bit extension encoding, 1 Byte due to MAX_PATH) + // Variable Size portion + // - relative path ("path-length" Bytes) + + class file_entry_t + { + public: + // The inner structure represents the fields that can be + // read contiguously for every file_entry. +#pragma pack(push, 1) + struct + { + int64_t offset; + int64_t size; + file_type_t type; + int8_t path_length; + } data; #pragma pack(pop) + pal::string_t relative_path; // Path of an embedded file, relative to the extraction directory. + + file_entry_t() + :data(), relative_path() + { + } -#endif // FEATURE_APPHOST + static file_entry_t* read(FILE* stream); + private: + static const pal::char_t bundle_dir_separator = '/'; + bool is_valid(); + }; +} #endif // __BDL_FILE_ENTRY_H__ diff --git a/src/corehost/cli/bdl_file_type.h b/src/corehost/cli/bdl_file_type.h index 174a22d5d5..15b577ccba 100644 --- a/src/corehost/cli/bdl_file_type.h +++ b/src/corehost/cli/bdl_file_type.h @@ -5,28 +5,27 @@ #ifndef __BDL_FILE_TYPE_H__ #define __BDL_FILE_TYPE_H__ -#if FEATURE_APPHOST #include -// FileType: Identifies the type of file embedded into the bundle. -// -// The bundler differentiates a few kinds of files via the manifest, -// with respect to the way in which they'll be used by the runtime. -// -// Currently all files are extracted out to the disk, but future -// implementations will process certain file_types directly from the bundle. - -enum file_type_t : uint8_t +namespace bundle { - assembly, - ready2run, - deps_json, - runtime_config_json, - extract, - END -}; - + // FileType: Identifies the type of file embedded into the bundle. + // + // The bundler differentiates a few kinds of files via the manifest, + // with respect to the way in which they'll be used by the runtime. + // + // Currently all files are extracted out to the disk, but future + // implementations will process certain file_types directly from the bundle. -#endif // FEATURE_APPHOST + enum file_type_t : uint8_t + { + assembly, + ready2run, + deps_json, + runtime_config_json, + extract, + __last + }; +} #endif // __BDL_FILE_TYPE_H__ diff --git a/src/corehost/cli/bdl_manifest.cpp b/src/corehost/cli/bdl_manifest.cpp index 2125d0bdd3..50c2fa15e6 100644 --- a/src/corehost/cli/bdl_manifest.cpp +++ b/src/corehost/cli/bdl_manifest.cpp @@ -2,80 +2,83 @@ // The .NET Foundation licenses this file to you under the MIT license. // See the LICENSE file in the project root for more information. -#ifdef FEATURE_APPHOST - #include "bdl_processor.h" #include "pal.h" #include "error_codes.h" #include "trace.h" #include "utils.h" -const char *manifest_footer_t::m_expected_signature = ".NetCoreBundle"; +using namespace bundle; -bool manifest_footer_t::is_valid() +bool manifest_header_t::is_valid() { - return header_offset > 0 && - signature_length == 14 && - strcmp(signature, m_expected_signature) == 0; + return m_data.major_version == m_current_major_version && + m_data.minor_version == m_current_minor_version && + m_data.num_embedded_files > 0 && + m_data.bundle_id_length > 0 && + m_data.bundle_id_length < PATH_MAX; } -manifest_footer_t* manifest_footer_t::read(FILE* bundle) +manifest_header_t* manifest_header_t::read(FILE* stream) { - manifest_footer_t* footer = new manifest_footer_t(); - - bdl_processor_t::read(footer, num_bytes_read(), bundle); - - if (!footer->is_valid()) - { - trace::info(_X("Manifest footer Invalid")); - throw StatusCode::AppHostExeNotBundle; - } - - return footer; + manifest_header_t* header = new manifest_header_t(); + + // First read the fixed size portion of the header + bundle_runner_t::read(&header->m_data, sizeof(header->m_data), stream); + if (!header->is_valid()) + { + trace::error(_X("Failure processing application bundle.")); + trace::error(_X("Manifest header version compatibility check failed")); + + throw StatusCode::BundleExtractionFailure; + } + + // Next read the bundle-ID string, given its length + bundle_runner_t::read_string(header->m_bundle_id, + header->m_data.bundle_id_length, stream); + + return header; } -bool manifest_header_t::is_valid() +const char* manifest_footer_t::m_expected_signature = ".NetCoreBundle"; + +bool manifest_footer_t::is_valid() { - return data.major_version == m_current_major_version && - data.minor_version == m_current_minor_version && - data.num_embedded_files > 0 && - data.bundle_id_length > 0 && data.bundle_id_length < PATH_MAX; + return m_header_offset > 0 && + m_signature_length == 14 && + strcmp(m_signature, m_expected_signature) == 0; } -manifest_header_t* manifest_header_t::read(FILE* bundle) +manifest_footer_t* manifest_footer_t::read(FILE* stream) { - manifest_header_t* header = new manifest_header_t(); - - // First read the fixed size portion of the header - bdl_processor_t::read(&header->data, sizeof(header->data), bundle); - if (!header->is_valid()) - { - trace::error(_X("Manifest header incompatible")); - throw StatusCode::BundleExtractionFailure; - } - - // Next read the bundle-ID string, given its length - bdl_processor_t::read_string(header->bundle_id, header->data.bundle_id_length, bundle); - - return header; + manifest_footer_t* footer = new manifest_footer_t(); + + bundle_runner_t::read(footer, num_bytes_read(), stream); + + if (!footer->is_valid()) + { + trace::info(_X("This executable is not recognized as a bundle.")); + + throw StatusCode::AppHostExeNotBundle; + } + + return footer; } -manifest_t* manifest_t::read(FILE* bundle, int32_t num_files) +manifest_t* manifest_t::read(FILE* stream, int32_t num_files) { - manifest_t* manifest = new manifest_t(); + manifest_t* manifest = new manifest_t(); - for (int32_t i = 0; i < num_files; i++) - { - file_entry_t* entry = file_entry_t::read(bundle); - if (entry == nullptr) - { - return nullptr; - } + for (int32_t i = 0; i < num_files; i++) + { + file_entry_t* entry = file_entry_t::read(stream); + if (entry == nullptr) + { + return nullptr; + } - manifest->files.push_back(entry); - } + manifest->files.push_back(entry); + } - return manifest; + return manifest; } - -#endif // FEATURE_APPHOST diff --git a/src/corehost/cli/bdl_manifest.h b/src/corehost/cli/bdl_manifest.h index afc86fa5f2..a94e9aba5d 100644 --- a/src/corehost/cli/bdl_manifest.h +++ b/src/corehost/cli/bdl_manifest.h @@ -5,97 +5,97 @@ #ifndef __BDL_MANIFEST_H__ #define __BDL_MANIFEST_H__ -#if FEATURE_APPHOST - #include #include #include "bdl_file_entry.h" -#pragma pack(push, 1) - -// Manifest Header contains: -// Fixed size thunk (represened by manifest_header_inner_t) -// - Major Version -// - Minor Version -// - Number of embedded files -// - Bundle ID length -// Variable size portion: -// - Bundle ID ("Bundle ID length" bytes) - -struct manifest_header_t +namespace bundle { -public: - struct manifest_header_inner_t - { - uint32_t major_version; - uint32_t minor_version; - int32_t num_embedded_files; - int8_t bundle_id_length; - } data; - pal::string_t bundle_id; - - manifest_header_t() - :data(), bundle_id() - { - } - - static manifest_header_t* read(FILE* bundle); - -private: - bool is_valid(); - - static const uint32_t m_current_major_version = 0; - static const uint32_t m_current_minor_version = 1; -}; - -// Manifest Footer contains: -// Manifest header offset -// Length-prefixed non-null terminated Bundle Signature ".NetCoreBundle" -struct manifest_footer_t -{ -public: - int64_t header_offset; - uint8_t signature_length; - char signature[15]; - - manifest_footer_t() - :header_offset(0), signature_length(0) + // Manifest Header contains: + // Fixed size thunk (represened by manifest_header_inner_t) + // - Major Version + // - Minor Version + // - Number of embedded files + // - Bundle ID length + // Variable size portion: + // - Bundle ID ("Bundle ID length" bytes) + + struct manifest_header_t { - // The signature string is not null-terminated as read from disk. - // We add an additional character for null termination - signature[14] = 0; - } + public: + manifest_header_t() + :m_data(), m_bundle_id() + { + } + + bool is_valid(); + static manifest_header_t* read(FILE* stream); + const pal::string_t& bundle_id() { return m_bundle_id; } + int32_t num_embedded_files() { return m_data.num_embedded_files; } + + private: +#pragma pack(push, 1) + struct + { + uint32_t major_version; + uint32_t minor_version; + int32_t num_embedded_files; + int8_t bundle_id_length; + } m_data; +#pragma pack(pop) + pal::string_t m_bundle_id; - static manifest_footer_t* read(FILE* bundle); + static const uint32_t m_current_major_version = 0; + static const uint32_t m_current_minor_version = 1; + }; - static size_t num_bytes_read() + // Manifest Footer contains: + // Manifest header offset + // Length-prefixed non-null terminated Bundle Signature ".NetCoreBundle" +#pragma pack(push, 1) + struct manifest_footer_t { - return sizeof(manifest_footer_t) - 1; - } - -private: - bool is_valid(); - - static const char* m_expected_signature; -}; - + manifest_footer_t() + :m_header_offset(0), m_signature_length(0) + { + // The signature string is not null-terminated as read from disk. + // We add an additional character for null termination + m_signature[14] = 0; + } + + bool is_valid(); + static manifest_footer_t* read(FILE* stream); + int64_t manifest_header_offset() { return m_header_offset; } + static size_t num_bytes_read() + { + return sizeof(manifest_footer_t) - 1; + } + + private: + int64_t m_header_offset; + uint8_t m_signature_length; + char m_signature[15]; + + private: + + static const char* m_expected_signature; + }; #pragma pack(pop) -// Bundle Manifest contains: -// Series of file entries (for each embedded file) -class manifest_t -{ -public: - manifest_t() - :files() - {} - - std::list files; + // Bundle Manifest contains: + // Series of file entries (for each embedded file) - static manifest_t* read(FILE *host, int32_t num_files); -}; + class manifest_t + { + public: + manifest_t() + :files() + {} -#endif // FEATURE_APPHOST + std::list files; + static manifest_t* read(FILE* host, int32_t num_files); + }; +} #endif // __BDL_MANIFEST_H__ diff --git a/src/corehost/cli/bdl_processor.cpp b/src/corehost/cli/bdl_processor.cpp index ade1f8d464..7f4a16acab 100644 --- a/src/corehost/cli/bdl_processor.cpp +++ b/src/corehost/cli/bdl_processor.cpp @@ -2,244 +2,328 @@ // The .NET Foundation licenses this file to you under the MIT license. // See the LICENSE file in the project root for more information. -#ifdef FEATURE_APPHOST - #include "bdl_processor.h" #include "pal.h" #include "trace.h" #include "utils.h" -void bdl_processor_t::seek(long offset, int origin) +using namespace bundle; + +void bundle_runner_t::seek(long offset, int origin) { - if (fseek(m_bundle, offset, origin) != 0) - { - trace::error(_X("Bundle extraction: Seek failure.")); - throw StatusCode::BundleExtractionIOError; - } + if (fseek(m_bundle_stream, offset, origin) != 0) + { + trace::error(_X("Failure processing application bundle; possible file corruption.")); + trace::error(_X("I/O seek failure within the bundle.")); + throw StatusCode::BundleExtractionIOError; + } } -void bdl_processor_t::write(const void* buf, size_t size, FILE *stream) +void bundle_runner_t::write(const void* buf, size_t size, FILE *stream) { - if (fwrite(buf, 1, size, stream) != size) - { - trace::error(_X("Bundle extraction: Write failure.")); - throw StatusCode::BundleExtractionIOError; - } + if (fwrite(buf, 1, size, stream) != size) + { + trace::error(_X("Failure extracting contents of the application bundle.")); + trace::error(_X("I/O failure when writing extracted files.")); + throw StatusCode::BundleExtractionIOError; + } } -void bdl_processor_t::read(void* buf, size_t size, FILE* stream) +void bundle_runner_t::read(void* buf, size_t size, FILE* stream) { - if (fread(buf, 1, size, stream) != size) - { - trace::error(_X("Bundle extraction: Read failure.")); - throw StatusCode::BundleExtractionIOError; - } + if (fread(buf, 1, size, stream) != size) + { + trace::error(_X("Failure processing application bundle; possible file corruption.")); + trace::error(_X("I/O failure reading contents of the bundle.")); + throw StatusCode::BundleExtractionIOError; + } } // Read a non-null terminated fixed length UTF8 string from a byte-stream // and transform it to pal::string_t -void bdl_processor_t::read_string(pal::string_t &str, size_t size, FILE* stream) +void bundle_runner_t::read_string(pal::string_t &str, size_t size, FILE* stream) { - uint8_t *buffer = new uint8_t[size + 1]; - read(buffer, size, stream); - buffer[size] = 0; // null-terminator - pal::clr_palstring((const char*)buffer, &str); + uint8_t *buffer = new uint8_t[size + 1]; + read(buffer, size, stream); + buffer[size] = 0; // null-terminator + pal::clr_palstring((const char*)buffer, &str); } -void bdl_processor_t::reopen_host_for_reading() +static bool has_dirs_in_path(const pal::string_t& path) { - m_bundle = pal::file_open(m_bundle_path, _X("rb")); - if (m_bundle == nullptr) - { - trace::error(_X("Host file descriptor invalid")); - throw StatusCode::BundleExtractionIOError; - } + return path.find_last_of(DIR_SEPARATOR) != pal::string_t::npos; } -void bdl_processor_t::process_manifest_footer(int64_t &header_offset) +static void create_directory_tree(const pal::string_t &path) { - seek(-manifest_footer_t::num_bytes_read(), SEEK_END); - - manifest_footer_t* footer = manifest_footer_t::read(m_bundle); - header_offset = footer->header_offset; + if (path.empty()) + { + return; + } + + if (pal::directory_exists(path)) + { + return; + } + + if (has_dirs_in_path(path)) + { + create_directory_tree(get_directory(path)); + } + + if (!pal::mkdir(path.c_str(), 0700)) + { + if (pal::directory_exists(path)) + { + // The directory was created since we last checked. + return; + } + + trace::error(_X("Failure processing application bundle.")); + trace::error(_X("Failed to create directory [%s] for extracting bundled files"), path.c_str()); + throw StatusCode::BundleExtractionIOError; + } } - -void bdl_processor_t::process_manifest_header(int64_t header_offset) +static void remove_directory_tree(const pal::string_t& path) { - seek(header_offset, SEEK_SET); - - manifest_header_t* header = manifest_header_t::read(m_bundle); - - m_num_embedded_files = header->data.num_embedded_files; - m_bundle_id = header->bundle_id; + if (path.empty()) + { + return; + } + + std::vector dirs; + pal::readdir_onlydirectories(path, &dirs); + + for (pal::string_t dir : dirs) + { + remove_directory_tree(dir); + } + + std::vector files; + pal::readdir(path, &files); + + for (pal::string_t file : files) + { + if (!pal::remove(file.c_str())) + { + trace::error(_X("Error removing file [%s]"), file.c_str()); + throw StatusCode::BundleExtractionIOError; + } + } + + if (!pal::rmdir(path.c_str())) + { + trace::error(_X("Error removing directory [%s]"), path.c_str()); + throw StatusCode::BundleExtractionIOError; + } } -static bool has_dirs_in_path(const pal::string_t& path) +void bundle_runner_t::reopen_host_for_reading() { - return path.find_last_of(DIR_SEPARATOR) != pal::string_t::npos; + m_bundle_stream = pal::file_open(m_bundle_path, _X("rb")); + if (m_bundle_stream == nullptr) + { + trace::error(_X("Failure processing application bundle.")); + trace::error(_X("Couldn't open host binary for reading contents")); + throw StatusCode::BundleExtractionIOError; + } } -static void create_directory_tree(const pal::string_t &path) +void bundle_runner_t::process_manifest_footer(int64_t &header_offset) { - if (path.empty()) - { - return; - } - - if (pal::directory_exists(path)) - { - return; - } - - if (has_dirs_in_path(path)) - { - create_directory_tree(get_directory(path)); - } + seek(-manifest_footer_t::num_bytes_read(), SEEK_END); - if (!pal::create_directory(path.c_str(), 0700)) - { - if (pal::directory_exists(path)) - { - // The directory was created since we last checked. - return; - } - - trace::error(_X("Failed to create directory [%s]"), path.c_str()); - throw StatusCode::BundleExtractionIOError; - } + manifest_footer_t* footer = manifest_footer_t::read(m_bundle_stream); + header_offset = footer->manifest_header_offset(); } -void bdl_processor_t::determine_extraction_dir() +void bundle_runner_t::process_manifest_header(int64_t header_offset) { - if (!pal::getenv(_X("DOTNET_BUNDLE_EXTRACT_BASE_DIR"), &m_extraction_dir)) - { - if (!pal::get_temp_directory(m_extraction_dir)) - { - trace::error(_X("Failed to get temp_dir")); - throw StatusCode::BundleExtractionFailure; - } - - append_path(&m_extraction_dir, _X(".net")); - } + seek(header_offset, SEEK_SET); - pal::string_t host_name = strip_executable_ext(get_filename(m_bundle_path)); - append_path(&m_extraction_dir, host_name.c_str()); - append_path(&m_extraction_dir, m_bundle_id.c_str()); + manifest_header_t* header = manifest_header_t::read(m_bundle_stream); - trace::info(_X("Extraction Location [%s]"), m_extraction_dir.c_str()); + m_num_embedded_files = header->num_embedded_files(); + m_bundle_id = header->bundle_id(); } -void bdl_processor_t::create_working_extraction_dir() +// Compute the final extraction location as: +// m_extraction_dir = $DOTNET_BUNDLE_EXTRACT_BASE_DIR///... +// +// If DOTNET_BUNDLE_EXTRACT_BASE_DIR is not set in the environment, the +// base directory defaults to $TMPDIR/.net +void bundle_runner_t::determine_extraction_dir() { - // Set the working extraction path - m_working_extraction_dir = get_directory(m_extraction_dir); - pal::char_t pid[32]; - pal::snwprintf(pid, 32, _X("%x"), pal::get_pid()); - append_path(&m_working_extraction_dir, pid); - - create_directory_tree(m_working_extraction_dir); - - trace::info(_X("Temporary Extraction Location [%s]"), m_working_extraction_dir.c_str()); + if (!pal::getenv(_X("DOTNET_BUNDLE_EXTRACT_BASE_DIR"), &m_extraction_dir)) + { + if (!pal::get_temp_directory(m_extraction_dir)) + { + trace::error(_X("Failure processing application bundle.")); + trace::error(_X("Failed to determine location for extracting embedded files")); + throw StatusCode::BundleExtractionFailure; + } + + append_path(&m_extraction_dir, _X(".net")); + } + + pal::string_t host_name = strip_executable_ext(get_filename(m_bundle_path)); + append_path(&m_extraction_dir, host_name.c_str()); + append_path(&m_extraction_dir, m_bundle_id.c_str()); + + trace::info(_X("Files embedded within the bundled will be extracted to [%s] directory"), m_extraction_dir.c_str()); } -FILE* bdl_processor_t::create_extraction_file(const pal::string_t& relative_path) +// Compute the worker extraction location for this process, before the +// extracted files are committed to the final location +// m_working_extraction_dir = $DOTNET_BUNDLE_EXTRACT_BASE_DIR// +void bundle_runner_t::create_working_extraction_dir() { - pal::string_t file_path = m_working_extraction_dir; - append_path(&file_path, relative_path.c_str()); + // Set the working extraction path + m_working_extraction_dir = get_directory(m_extraction_dir); + pal::char_t pid[32]; + pal::snwprintf(pid, 32, _X("%x"), pal::get_pid()); + append_path(&m_working_extraction_dir, pid); - // m_working_extraction_dir is assumed to exist, - // so we only create sub-directories if relative_path contains directories - if (has_dirs_in_path(relative_path)) - { - create_directory_tree(get_directory(file_path)); - } + create_directory_tree(m_working_extraction_dir); - FILE* file = pal::file_open(file_path.c_str(), _X("wb")); - - if (file == nullptr) - { - trace::error(_X("Failed to open file [%s] for writing"), file_path.c_str()); - throw StatusCode::BundleExtractionIOError; - } - - return file; + trace::info(_X("Temporary directory used to extract bundled files is [%s]"), m_working_extraction_dir.c_str()); } -void bdl_processor_t::extract_file(file_entry_t *entry) +// Create a file to be extracted out on disk, including any intermediate sub-directories. +FILE* bundle_runner_t::create_extraction_file(const pal::string_t& relative_path) { - FILE* file = create_extraction_file(entry->relative_path); - uint8_t* buffer = new uint8_t[entry->data.size]; - - seek(entry->data.offset, SEEK_SET); - read(buffer, entry->data.size, m_bundle); - write(buffer, entry->data.size, file); - - fclose(file); + pal::string_t file_path = m_working_extraction_dir; + append_path(&file_path, relative_path.c_str()); + + // m_working_extraction_dir is assumed to exist, + // so we only create sub-directories if relative_path contains directories + if (has_dirs_in_path(relative_path)) + { + create_directory_tree(get_directory(file_path)); + } + + FILE* file = pal::file_open(file_path.c_str(), _X("wb")); + + if (file == nullptr) + { + trace::error(_X("Failure processing application bundle.")); + trace::error(_X("Failed to open file [%s] for writing"), file_path.c_str()); + throw StatusCode::BundleExtractionIOError; + } + + return file; } -bool bdl_processor_t::can_reuse_extraction() +// Extract one file from the bundle to disk. +void bundle_runner_t::extract_file(file_entry_t *entry) { - if (!pal::directory_exists(m_extraction_dir)) - { - return false; - } - - // TBD: Additional checks to determine validity - // TBD: What to do if the validity fails - // ex: delete the extraction ditectory and proceed with extraction. - - return true; + FILE* file = create_extraction_file(entry->relative_path); + const size_t buffer_size = 8 * 1024; // Copy the file in 8KB chunks + uint8_t buffer[buffer_size]; + int64_t file_size = entry->data.size; + + seek(entry->data.offset, SEEK_SET); + do { + int64_t copy_size = (file_size <= buffer_size) ? file_size : buffer_size; + read(buffer, copy_size, m_bundle_stream); + write(buffer, copy_size, file); + file_size -= copy_size; + } while (file_size > 0); + + fclose(file); } -StatusCode bdl_processor_t::extract() +bool bundle_runner_t::can_reuse_extraction() { - try - { - reopen_host_for_reading(); - - int64_t manifest_header_offset; - process_manifest_footer(manifest_header_offset); - process_manifest_header(manifest_header_offset); - - determine_extraction_dir(); - if (can_reuse_extraction()) - { - return StatusCode::Success; - } + // In this version, the extracted files are assumed to be + // correct by construction. + // + // Files embedded in the bundle are first extracted to m_working_extraction_dir + // Once all files are successfully extracted, the extraction location is + // committed (renamed) to m_extraction_dir. Therefore, the presence of + // m_extraction_dir means that the files are pre-extracted. - create_working_extraction_dir(); - m_manifest = manifest_t::read(m_bundle, m_num_embedded_files); - - for (file_entry_t* entry : m_manifest->files) { - extract_file(entry); - } - - if (pal::rename(m_working_extraction_dir.c_str(), m_extraction_dir.c_str()) != 0) - { - if (can_reuse_extraction()) - { - // Another process successfully extracted the dependencies - - trace::info(_X("Extraction completed by another process")); - - // pal::delete_directory(m_working_extraction_dir); - return StatusCode::Success; - } - - trace::error(_X("Failed to switch extraction directory to [%s]"), m_extraction_dir.c_str()); - throw StatusCode::BundleExtractionFailure; - } - - fclose(m_bundle); - return StatusCode::Success; - } - catch (StatusCode e) - { - fclose(m_bundle); - return e; - } + return pal::directory_exists(m_extraction_dir); } -#endif // FEATURE_APPHOST +// Current support for executing single-file bundles involves +// extraction of embedded files to actual files on disk. +// This method implements the file extraction functionality at startup. +StatusCode bundle_runner_t::extract() +{ + try + { + // Determine if the current executable is a bundle + reopen_host_for_reading(); + + // If the current AppHost is a bundle, it's layout will be + // AppHost binary + // Embedded Files: including the app, its configuration files, + // dependencies, and possibly the runtime. + // Bundle Manifest + + int64_t manifest_header_offset; + process_manifest_footer(manifest_header_offset); + process_manifest_header(manifest_header_offset); + + // Determine if embedded files are already extracted, and available for reuse + determine_extraction_dir(); + if (can_reuse_extraction()) + { + return StatusCode::Success; + } + + // Extract files to temporary working directory + // + // Files are extracted to a specific deterministic location on disk + // on first run, and are available for reuse by subsequent similar runs. + // + // The extraction should be fault tolerant with respect to: + // * Failures/crashes during extraction which result in partial-extraction + // * Race between two or more processes concurrently attempting extraction + // + // In order to solve these issues, we implement a extraction as a two-phase approach: + // 1) Files embedded in a bundle are extracted to a process-specific temporary + // extraction location (m_working_extraction_dir) + // 2) Upon successful extraction, m_working_extraction_dir is renamed to the actual + // extraction location (m_extraction_dir) + // + // This effectively creates a file-lock to protect against races and failed extractions. + + create_working_extraction_dir(); + + m_manifest = manifest_t::read(m_bundle_stream, m_num_embedded_files); + + for (file_entry_t* entry : m_manifest->files) { + extract_file(entry); + } + + // Commit files to the final extraction directory + if (pal::rename(m_working_extraction_dir.c_str(), m_extraction_dir.c_str()) != 0) + { + if (can_reuse_extraction()) + { + // Another process successfully extracted the dependencies + + trace::info(_X("Extraction completed by another process, aborting current extracion.")); + + remove_directory_tree(m_working_extraction_dir); + return StatusCode::Success; + } + + trace::error(_X("Failure processing application bundle.")); + trace::error(_X("Failed to commit extracted to files to directory [%s]"), m_extraction_dir.c_str()); + throw StatusCode::BundleExtractionFailure; + } + + fclose(m_bundle_stream); + return StatusCode::Success; + } + catch (StatusCode e) + { + fclose(m_bundle_stream); + return e; + } +} diff --git a/src/corehost/cli/bdl_processor.h b/src/corehost/cli/bdl_processor.h index b494bf971d..1c8a0cae34 100644 --- a/src/corehost/cli/bdl_processor.h +++ b/src/corehost/cli/bdl_processor.h @@ -5,63 +5,58 @@ #ifndef __BDL_PROCESSOR_H__ #define __BDL_PROCESSOR_H__ -#if FEATURE_APPHOST #include #include "bdl_manifest.h" #include "error_codes.h" -// If the current AppHost is a bundle, it's layout will be -// AppHost binary -// Embedded Files: including the app, its configuration files, -// dependencies, and possibly the runtime. -// Bundle Manifest - -class bdl_processor_t +namespace bundle { -public: - bdl_processor_t (const pal::string_t &bundle_path) - :m_bundle_path(bundle_path), - m_bundle(nullptr), - m_manifest(nullptr), - m_num_embedded_files(0) + class bundle_runner_t { - } - - pal::string_t get_extraction_dir() - { - return m_extraction_dir; - } - - StatusCode extract(); - - static void read(void* buf, size_t size, FILE* stream); - static void write(const void* buf, size_t size, FILE* stream); - static void read_string(pal::string_t& str, size_t size, FILE* stream); - -private: - void reopen_host_for_reading(); - void seek(long offset, int origin); - - void process_manifest_footer(int64_t& header_offset); - void process_manifest_header(int64_t header_offset); - - void determine_extraction_dir(); - void create_working_extraction_dir(); - bool can_reuse_extraction(); - - FILE* create_extraction_file(const pal::string_t& relative_path); - void extract_file(file_entry_t* entry); - - FILE* m_bundle; - manifest_t* m_manifest; - int32_t m_num_embedded_files; - pal::string_t m_bundle_path; - pal::string_t m_bundle_id; - pal::string_t m_extraction_dir; - pal::string_t m_working_extraction_dir; -}; - -#endif // FEATURE_APPHOST + public: + bundle_runner_t(const pal::string_t& bundle_path) + :m_bundle_path(bundle_path), + m_bundle_stream(nullptr), + m_manifest(nullptr), + m_num_embedded_files(0) + { + } + + pal::string_t get_extraction_dir() + { + return m_extraction_dir; + } + + StatusCode extract(); + + static void read(void* buf, size_t size, FILE* stream); + static void write(const void* buf, size_t size, FILE* stream); + static void read_string(pal::string_t& str, size_t size, FILE* stream); + + private: + void reopen_host_for_reading(); + void seek(long offset, int origin); + + void process_manifest_footer(int64_t& header_offset); + void process_manifest_header(int64_t header_offset); + + void determine_extraction_dir(); + void create_working_extraction_dir(); + bool can_reuse_extraction(); + + FILE* create_extraction_file(const pal::string_t& relative_path); + void extract_file(file_entry_t* entry); + + FILE* m_bundle_stream; + manifest_t* m_manifest; + int32_t m_num_embedded_files; + pal::string_t m_bundle_path; + pal::string_t m_bundle_id; + pal::string_t m_extraction_dir; + pal::string_t m_working_extraction_dir; + }; + +} #endif // __BDL_PROCESSOR_H__ diff --git a/src/corehost/common/pal.h b/src/corehost/common/pal.h index 3ab874d03d..20a225da36 100644 --- a/src/corehost/common/pal.h +++ b/src/corehost/common/pal.h @@ -138,8 +138,12 @@ namespace pal bool pal_clrstring(const pal::string_t& str, std::vector* out); bool clr_palstring(const char* cstr, pal::string_t* out); - inline bool create_directory(const pal::char_t* dir, int mode) { return CreateDirectoryW(dir, NULL) != 0; } - inline int rename(const pal::char_t* old_name, const pal::char_t* new_name) { return ::_wrename(old_name, new_name); } + inline bool mkdir(const pal::char_t* dir, int mode) { return CreateDirectoryW(dir, NULL) != 0; } + inline bool rmdir (const pal::char_t* path) { return RemoveDirectoryW(path) != 0; } + inline int rename(const pal::char_t* old_name, const pal::char_t* new_name) { return ::_wrename(old_name, new_name); } + inline int remove(const pal::char_t* path) { return ::_wremove(path); } + int get_pid() { return GetCurrentProcessId(); } + #else #ifdef EXPORT_SHARED_API #define SHARED_API extern "C" __attribute__((__visibility__("default"))) @@ -188,18 +192,22 @@ namespace pal inline bool pal_clrstring(const pal::string_t& str, std::vector* out) { return pal_utf8string(str, out); } inline bool clr_palstring(const char* cstr, pal::string_t* out) { out->assign(cstr); return true; } - inline bool create_directory(const pal::char_t* dir, int mode) { return mkdir(dir, mode) == 0; } - inline int rename(const pal::char_t* old_name, const pal::char_t* new_name) { return ::rename(old_name, new_name); } + inline bool mkdir(const pal::char_t* dir, int mode) { return mkdir(dir, mode) == 0; } + inline bool rmdir(const pal::char_t* path) { return rmdir(path) == 0; } + inline int rename(const pal::char_t* old_name, const pal::char_t* new_name) { return ::rename(old_name, new_name); } + inline int remove(const pal::char_t* path) { return ::remove(path); } + int get_pid() { return getpid(); } + #endif - inline int snwprintf(char_t* buffer, size_t count, const char_t* format, ...) - { - va_list args; - va_start(args, format); - int ret = str_vprintf(buffer, count, format, args); - va_end(args); - return ret; - } + inline int snwprintf(char_t* buffer, size_t count, const char_t* format, ...) + { + va_list args; + va_start(args, format); + int ret = str_vprintf(buffer, count, format, args); + va_end(args); + return ret; + } pal::string_t to_string(int value); pal::string_t get_timestamp(); @@ -245,8 +253,7 @@ namespace pal bool get_default_breadcrumb_store(string_t* recv); bool is_path_rooted(const string_t& path); - bool get_temp_directory(pal::string_t& tmp_dir); - int get_pid(); + bool get_temp_directory(pal::string_t& tmp_dir); int xtoi(const char_t* input); diff --git a/src/corehost/common/pal.unix.cpp b/src/corehost/common/pal.unix.cpp index db3e3e1e4f..52b7d673b8 100644 --- a/src/corehost/common/pal.unix.cpp +++ b/src/corehost/common/pal.unix.cpp @@ -106,11 +106,6 @@ void pal::unload_library(dll_t library) } } -int pal::get_pid() -{ - return getpid(); -} - int pal::xtoi(const char_t* input) { return atoi(input); @@ -188,8 +183,8 @@ bool pal::get_default_servicing_directory(string_t* recv) bool pal::get_temp_directory(pal::string_t& tmp_dir) { - pal::getenv(_X("TMPDIR"), &tmp_dir); - return pal::realpath(&tmp_dir); + pal::getenv(_X("TMPDIR"), &tmp_dir); + return pal::realpath(&tmp_dir); } static diff --git a/src/corehost/common/pal.windows.cpp b/src/corehost/common/pal.windows.cpp index c39d272b99..34e9387aa0 100644 --- a/src/corehost/common/pal.windows.cpp +++ b/src/corehost/common/pal.windows.cpp @@ -423,11 +423,6 @@ bool pal::getenv(const char_t* name, string_t* recv) return true; } -int pal::get_pid() -{ - return GetCurrentProcessId(); -} - int pal::xtoi(const char_t* input) { return ::_wtoi(input); @@ -464,8 +459,10 @@ bool pal::get_module_path(dll_t mod, string_t* recv) bool pal::get_temp_directory(pal::string_t& tmp_dir) { - pal::getenv(_X("TEMP"), &tmp_dir); - return pal::realpath(&tmp_dir); + pal::char_t temp_path[MAX_PATH + 1]; + GetTempPathW(MAX_PATH + 1, temp_path); + tmp_dir.assign(temp_path); + return pal::realpath(&tmp_dir); } static bool wchar_convert_helper(DWORD code_page, const char* cstr, int len, pal::string_t* out) diff --git a/src/corehost/corehost.cpp b/src/corehost/corehost.cpp index 004b79e464..e3dd011727 100644 --- a/src/corehost/corehost.cpp +++ b/src/corehost/corehost.cpp @@ -35,14 +35,6 @@ #define EMBED_HASH_LO_PART_UTF8 "74e592c2fa383d4a3960714caef0c4f2" #define EMBED_HASH_FULL_UTF8 (EMBED_HASH_HI_PART_UTF8 EMBED_HASH_LO_PART_UTF8) // NUL terminated -bool is_exe_enabled_for_execution_hack(pal::string_t* app_dll) -{ - pal::string_t host_path; - pal::get_own_executable_path(&host_path); - *app_dll = strip_executable_ext(get_filename(host_path)).append(_X(".dll")); - return true; -} - bool is_exe_enabled_for_execution(pal::string_t* app_dll) { constexpr int EMBED_SZ = sizeof(EMBED_HASH_FULL_UTF8) / sizeof(EMBED_HASH_FULL_UTF8[0]); @@ -102,7 +94,7 @@ int exe_start(const int argc, const pal::char_t* argv[]) #if FEATURE_APPHOST pal::string_t embedded_app_name; - if (!is_exe_enabled_for_execution_hack(&embedded_app_name)) + if (!is_exe_enabled_for_execution(&embedded_app_name)) { trace::error(_X("A fatal error was encountered. This executable was not bound to load a managed DLL.")); return StatusCode::AppHostExeNotBoundFailure; @@ -119,7 +111,7 @@ int exe_start(const int argc, const pal::char_t* argv[]) requires_v2_hostfxr_interface = true; } - bdl_processor_t extractor(host_path); + bundle::bundle_runner_t extractor(host_path); StatusCode bundle_status = extractor.extract(); switch (bundle_status) diff --git a/src/managed/Microsoft.NET.Build.Bundle/Extractor.cs b/src/managed/Microsoft.NET.Build.Bundle/Extractor.cs index 5b41f88566..cd831b7ea8 100644 --- a/src/managed/Microsoft.NET.Build.Bundle/Extractor.cs +++ b/src/managed/Microsoft.NET.Build.Bundle/Extractor.cs @@ -54,7 +54,7 @@ public void Spill() long size = entry.Size; do { - int copySize = (int)(size % int.MaxValue); + int copySize = (int)(size <= int.MaxValue ? size : int.MaxValue); file.Write(reader.ReadBytes(copySize)); size -= copySize; } while (size > 0);