-
Notifications
You must be signed in to change notification settings - Fork 726
Commit
This commit does not belong to any branch on this repository, and may belong to a fork outside of the repository.
- Loading branch information
Showing
2 changed files
with
102 additions
and
2 deletions.
There are no files selected for viewing
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -1,4 +1,4 @@ | ||
// SPDX-FileCopyrightText: 2019-2023 Connor McLaughlin <[email protected]> | ||
// SPDX-FileCopyrightText: 2019-2024 Connor McLaughlin <[email protected]> | ||
// SPDX-License-Identifier: (GPL-3.0 OR CC-BY-NC-ND-4.0) | ||
|
||
#include "string_util.h" | ||
|
@@ -9,6 +9,12 @@ | |
#include <cstdio> | ||
#include <sstream> | ||
|
||
#ifndef __APPLE__ | ||
#include <malloc.h> // alloca | ||
#else | ||
#include <alloca.h> | ||
#endif | ||
|
||
#ifdef _WIN32 | ||
#include "windows_headers.h" | ||
#endif | ||
|
@@ -427,6 +433,96 @@ void StringUtil::EllipsiseInPlace(std::string& str, u32 max_length, const char* | |
} | ||
} | ||
|
||
std::optional<size_t> StringUtil::BytePatternSearch(const std::span<const u8> bytes, const std::string_view pattern) | ||
{ | ||
// Parse the pattern into a bytemask. | ||
size_t pattern_length = 0; | ||
bool hinibble = true; | ||
for (size_t i = 0; i < pattern.size(); i++) | ||
{ | ||
if ((pattern[i] >= '0' && pattern[i] <= '9') || (pattern[i] >= 'a' && pattern[i] <= 'f') || | ||
(pattern[i] >= 'A' && pattern[i] <= 'F') || pattern[i] == '?') | ||
{ | ||
hinibble ^= true; | ||
if (hinibble) | ||
pattern_length++; | ||
} | ||
else if (pattern[i] == ' ' || pattern[i] == '\r' || pattern[i] == '\n') | ||
{ | ||
continue; | ||
} | ||
else | ||
{ | ||
break; | ||
} | ||
} | ||
if (pattern_length == 0) | ||
return std::nullopt; | ||
|
||
const bool allocate_on_heap = (pattern_length >= 512); | ||
u8* match_bytes = allocate_on_heap ? static_cast<u8*>(alloca(pattern_length * 2)) : new u8[pattern_length * 2]; | ||
u8* match_masks = match_bytes + pattern_length; | ||
|
||
hinibble = true; | ||
u8 match_byte = 0; | ||
u8 match_mask = 0; | ||
for (size_t i = 0, match_len = 0; i < pattern.size(); i++) | ||
{ | ||
u8 nibble = 0, nibble_mask = 0xF; | ||
if (pattern[i] >= '0' && pattern[i] <= '9') | ||
nibble = pattern[i] - '0'; | ||
else if (pattern[i] >= 'a' && pattern[i] <= 'f') | ||
nibble = pattern[i] - 'a' + 0xa; | ||
else if (pattern[i] >= 'A' && pattern[i] <= 'F') | ||
nibble = pattern[i] - 'A' + 0xa; | ||
else if (pattern[i] == '?') | ||
nibble_mask = 0; | ||
else if (pattern[i] == ' ' || pattern[i] == '\r' || pattern[i] == '\n') | ||
continue; | ||
else | ||
break; | ||
|
||
hinibble ^= true; | ||
if (hinibble) | ||
{ | ||
match_bytes[match_len] = nibble | (match_byte << 4); | ||
match_masks[match_len] = nibble_mask | (match_mask << 4); | ||
match_len++; | ||
} | ||
else | ||
{ | ||
match_byte = nibble; | ||
match_mask = nibble_mask; | ||
} | ||
} | ||
if (pattern_length == 0) | ||
return std::nullopt; | ||
|
||
std::optional<size_t> ret; | ||
const size_t max_search_offset = bytes.size() - pattern_length; | ||
for (size_t offset = 0; offset < max_search_offset; offset++) | ||
{ | ||
const u8* start = bytes.data() + offset; | ||
for (size_t match_offset = 0;;) | ||
{ | ||
if ((start[match_offset] & match_masks[match_offset]) != match_bytes[match_offset]) | ||
break; | ||
|
||
match_offset++; | ||
if (match_offset == pattern_length) | ||
{ | ||
// found it! | ||
ret = offset; | ||
} | ||
} | ||
} | ||
|
||
if (allocate_on_heap) | ||
delete[] match_bytes; | ||
|
||
return ret; | ||
} | ||
|
||
size_t StringUtil::DecodeUTF8(const std::string_view str, size_t offset, char32_t* ch) | ||
{ | ||
return DecodeUTF8(str.data() + offset, str.length() - offset, ch); | ||
|
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -1,4 +1,4 @@ | ||
// SPDX-FileCopyrightText: 2019-2022 Connor McLaughlin <[email protected]> | ||
// SPDX-FileCopyrightText: 2019-2024 Connor McLaughlin <[email protected]> | ||
// SPDX-License-Identifier: (GPL-3.0 OR CC-BY-NC-ND-4.0) | ||
|
||
#pragma once | ||
|
@@ -8,6 +8,7 @@ | |
#include <cstring> | ||
#include <iomanip> | ||
#include <optional> | ||
#include <span> | ||
#include <string> | ||
#include <string_view> | ||
#include <vector> | ||
|
@@ -275,6 +276,9 @@ size_t DecodeUTF8(const std::string& str, size_t offset, char32_t* ch); | |
std::string Ellipsise(const std::string_view str, u32 max_length, const char* ellipsis = "..."); | ||
void EllipsiseInPlace(std::string& str, u32 max_length, const char* ellipsis = "..."); | ||
|
||
/// Searches for the specified byte pattern in the given memory span. Wildcards (i.e. ??) are supported. | ||
std::optional<size_t> BytePatternSearch(const std::span<const u8> bytes, const std::string_view pattern); | ||
|
||
/// Strided memcpy/memcmp. | ||
ALWAYS_INLINE static void StrideMemCpy(void* dst, std::size_t dst_stride, const void* src, std::size_t src_stride, | ||
std::size_t copy_size, std::size_t count) | ||
|