Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

security: Add "marker" mode for log densensitation #9136

Merged
merged 13 commits into from
Jun 18, 2024
2 changes: 1 addition & 1 deletion contrib/client-c
100 changes: 82 additions & 18 deletions dbms/src/Common/FieldVisitors.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -221,57 +221,121 @@ String FieldVisitorToString::operator()(const Tuple & x_def) const

String FieldVisitorToDebugString::operator()(const Null &) const
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Can we use template to reduce similar code?

Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

The UInt64/Int64/DecimalField<...> share the similar code, but other types are not.
We can try to eliminate it using std::enable_if, but it is not related to this PR change, maybe we can do it in another PR.

{
if (Redact::REDACT_LOG.load(std::memory_order_relaxed))
const auto v = Redact::REDACT_LOG.load(std::memory_order_relaxed);
switch (v)
{
case RedactMode::Enable:
return "?";
return "NULL";
case RedactMode::Disable:
return "NULL";
case RedactMode::Marker:
return Redact::toMarkerString("NULL", /*ignore_escape*/ true);
}
}
String FieldVisitorToDebugString::operator()(const UInt64 & x) const
{
if (Redact::REDACT_LOG.load(std::memory_order_relaxed))
const auto v = Redact::REDACT_LOG.load(std::memory_order_relaxed);
switch (v)
{
case RedactMode::Enable:
return "?";
return formatQuoted(x);
case RedactMode::Disable:
return formatQuoted(x);
case RedactMode::Marker:
return Redact::toMarkerString(formatQuoted(x), /*ignore_escape*/ true);
}
}
String FieldVisitorToDebugString::operator()(const Int64 & x) const
{
if (Redact::REDACT_LOG.load(std::memory_order_relaxed))
const auto v = Redact::REDACT_LOG.load(std::memory_order_relaxed);
switch (v)
{
case RedactMode::Enable:
return "?";
return formatQuoted(x);
case RedactMode::Disable:
return formatQuoted(x);
case RedactMode::Marker:
return Redact::toMarkerString(formatQuoted(x), /*ignore_escape*/ true);
}
}
String FieldVisitorToDebugString::operator()(const Float64 & x) const
{
if (Redact::REDACT_LOG.load(std::memory_order_relaxed))
const auto v = Redact::REDACT_LOG.load(std::memory_order_relaxed);
switch (v)
{
case RedactMode::Enable:
return "?";
return formatFloat(x);
case RedactMode::Disable:
return formatFloat(x);
case RedactMode::Marker:
return Redact::toMarkerString(formatFloat(x), /*ignore_escape*/ true);
}
}
String FieldVisitorToDebugString::operator()(const String & x) const
{
if (Redact::REDACT_LOG.load(std::memory_order_relaxed))
const auto v = Redact::REDACT_LOG.load(std::memory_order_relaxed);
switch (v)
{
case RedactMode::Enable:
return "?";
return formatQuoted(x);
case RedactMode::Disable:
return formatQuoted(x);
case RedactMode::Marker:
// The string may contains utf-8 char that need to be escaped
return Redact::toMarkerString(formatQuoted(x), /*ignore_escape*/ false);
}
}
String FieldVisitorToDebugString::operator()(const DecimalField<Decimal32> & x) const
{
if (Redact::REDACT_LOG.load(std::memory_order_relaxed))
const auto v = Redact::REDACT_LOG.load(std::memory_order_relaxed);
switch (v)
{
case RedactMode::Enable:
return "?";
return formatQuoted(x);
case RedactMode::Disable:
return formatQuoted(x);
case RedactMode::Marker:
return Redact::toMarkerString(formatQuoted(x), /*ignore_escape*/ true);
}
}
String FieldVisitorToDebugString::operator()(const DecimalField<Decimal64> & x) const
{
if (Redact::REDACT_LOG.load(std::memory_order_relaxed))
const auto v = Redact::REDACT_LOG.load(std::memory_order_relaxed);
switch (v)
{
case RedactMode::Enable:
return "?";
return formatQuoted(x);
case RedactMode::Disable:
return formatQuoted(x);
case RedactMode::Marker:
return Redact::toMarkerString(formatQuoted(x), /*ignore_escape*/ true);
}
}
String FieldVisitorToDebugString::operator()(const DecimalField<Decimal128> & x) const
{
if (Redact::REDACT_LOG.load(std::memory_order_relaxed))
const auto v = Redact::REDACT_LOG.load(std::memory_order_relaxed);
switch (v)
{
case RedactMode::Enable:
return "?";
return formatQuoted(x);
case RedactMode::Disable:
return formatQuoted(x);
case RedactMode::Marker:
return Redact::toMarkerString(formatQuoted(x), /*ignore_escape*/ true);
}
}
String FieldVisitorToDebugString::operator()(const DecimalField<Decimal256> & x) const
{
if (Redact::REDACT_LOG.load(std::memory_order_relaxed))
const auto v = Redact::REDACT_LOG.load(std::memory_order_relaxed);
switch (v)
{
case RedactMode::Enable:
return "?";
return formatQuoted(x);
case RedactMode::Disable:
return formatQuoted(x);
case RedactMode::Marker:
return Redact::toMarkerString(formatQuoted(x), /*ignore_escape*/ true);
}
}

String FieldVisitorToDebugString::operator()(const Array & x) const
Expand Down
144 changes: 124 additions & 20 deletions dbms/src/Common/RedactHelpers.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -12,29 +12,118 @@
// See the License for the specific language governing permissions and
// limitations under the License.

#include <Common/FmtUtils.h>
#include <Common/RedactHelpers.h>
#include <Common/hex.h>
#include <IO/WriteHelpers.h>
#include <common/types.h>
#include <pingcap/RedactHelpers.h>

#include <iomanip>
#include <string>

std::atomic<bool> Redact::REDACT_LOG = false;
std::atomic<RedactMode> Redact::REDACT_LOG = RedactMode::Disable;

void Redact::setRedactLog(bool v)
void Redact::setRedactLog(RedactMode v)
{
pingcap::Redact::setRedactLog(v); // set redact flag for client-c
switch (v)
{
case RedactMode::Enable:
pingcap::Redact::setRedactLog(pingcap::RedactMode::Enable);
case RedactMode::Disable:
pingcap::Redact::setRedactLog(pingcap::RedactMode::Disable);
case RedactMode::Marker:
pingcap::Redact::setRedactLog(pingcap::RedactMode::Marker);
}
Redact::REDACT_LOG.store(v, std::memory_order_relaxed);
}

std::string Redact::toMarkerString(const std::string & raw, bool ignore_escape)
{
// A shortcut for those caller ensure the `raw` must not contain any char that
// need to be escaped.
if (likely(ignore_escape))
return fmt::format("‹{}›", raw);

constexpr static size_t BEGIN_SIZE = std::string_view("‹").size();
constexpr static size_t END_SIZE = std::string_view("›").size();
enum class EscapeMark
{
Begin,
End,
};
// must be an ordered map, <marker_position, marker_type>
std::map<size_t, EscapeMark> found_pos;
std::string::size_type pos = 0;
do
{
pos = raw.find("‹", pos);
if (pos == std::string::npos)
break;
found_pos.emplace(pos, EscapeMark::Begin);
pos += BEGIN_SIZE;
} while (pos != std::string::npos && pos < raw.size());
pos = 0;
do
{
pos = raw.find("›", pos);
if (pos == std::string::npos)
break;
found_pos.emplace(pos, EscapeMark::End);
pos += END_SIZE;
} while (pos != std::string::npos && pos < raw.size());
if (likely(found_pos.empty()))
{
// A shortcut for detecting that nothing to be escaped.
return fmt::format("‹{}›", raw);
}

// Escape the chars in `raw` to `fmt_buf`
DB::FmtBuffer fmt_buf;
fmt_buf.append("‹");
pos = 0; // the copy pos from `raw`
for (const auto & [to_escape_pos, to_escape_type] : found_pos)
{
switch (to_escape_type)
{
case EscapeMark::Begin:
{
fmt_buf.append(std::string_view(raw.c_str() + pos, to_escape_pos - pos + BEGIN_SIZE));
fmt_buf.append("‹"); // append for escape
pos = to_escape_pos + BEGIN_SIZE; // move the copy begin pos from `raw`
break;
}
case EscapeMark::End:
{
fmt_buf.append(std::string_view(raw.c_str() + pos, to_escape_pos - pos + END_SIZE));
fmt_buf.append("›"); // append for escape
pos = to_escape_pos + END_SIZE; // move the copy begin pos from `raw`
break;
}
}
}
// handle the suffix
if (pos < raw.size())
fmt_buf.append(std::string_view(raw.c_str() + pos, raw.size() - pos));
fmt_buf.append("›");
return fmt_buf.toString();
}

std::string Redact::handleToDebugString(int64_t handle)
{
if (Redact::REDACT_LOG.load(std::memory_order_relaxed))
const auto v = Redact::REDACT_LOG.load(std::memory_order_relaxed);
switch (v)
{
case RedactMode::Enable:
return "?";

// Encode as string
return DB::toString(handle);
case RedactMode::Disable:
// Encode as string
return DB::toString(handle);
case RedactMode::Marker:
// Note: the `handle` must be int64 so we don't need to care
// about escaping here.
return toMarkerString(DB::toString(handle), /*ignore_escape*/ true);
}
}

std::string Redact::keyToHexString(const char * key, size_t size)
Expand All @@ -52,29 +141,44 @@ std::string Redact::keyToHexString(const char * key, size_t size)

std::string Redact::keyToDebugString(const char * key, const size_t size)
{
if (Redact::REDACT_LOG.load(std::memory_order_relaxed))
const auto v = Redact::REDACT_LOG.load(std::memory_order_relaxed);
switch (v)
{
case RedactMode::Enable:
return "?";

return Redact::keyToHexString(key, size);
case RedactMode::Disable:
// Encode as string
return Redact::keyToHexString(key, size);
case RedactMode::Marker:
// Note: the `s` must be hexadecimal string so we don't need to care
// about escaping here.
return toMarkerString(Redact::keyToHexString(key, size), /*ignore_escape*/ true);
}
}

void Redact::keyToDebugString(const char * key, const size_t size, std::ostream & oss)
{
if (Redact::REDACT_LOG.load(std::memory_order_relaxed))
const auto v = Redact::REDACT_LOG.load(std::memory_order_relaxed);
switch (v)
{
case RedactMode::Enable:
{
oss << "?";
return;
}

// Encode as upper hex string
const auto flags = oss.flags();
oss << std::uppercase << std::setfill('0') << std::hex;
for (size_t i = 0; i < size; ++i)
case RedactMode::Disable:
{
// width need to be set for each output (https://stackoverflow.com/questions/405039/permanent-stdsetw)
oss << std::setw(2) << static_cast<Int32>(static_cast<UInt8>(key[i]));
oss << Redact::keyToHexString(key, size);
return;
}
case RedactMode::Marker:
{
// Note: the `s` must be hexadecimal string so we don't need to care
// about escaping here.
oss << toMarkerString(Redact::keyToHexString(key, size), /*ignore_escape*/ true);
return;
}
}
oss.flags(flags); // restore flags
}

std::string Redact::hexStringToKey(const char * start, size_t len)
Expand All @@ -92,4 +196,4 @@ std::string Redact::hexStringToKey(const char * start, size_t len)
s.push_back(x);
}
return s;
}
}
13 changes: 11 additions & 2 deletions dbms/src/Common/RedactHelpers.h
Original file line number Diff line number Diff line change
Expand Up @@ -22,10 +22,17 @@ namespace DB
class FieldVisitorToDebugString;
}

enum class RedactMode
{
Disable,
Enable,
Marker,
};

class Redact
{
public:
static void setRedactLog(bool v);
static void setRedactLog(RedactMode v);

static std::string handleToDebugString(int64_t handle);
static std::string keyToDebugString(const char * key, size_t size);
Expand All @@ -37,10 +44,12 @@ class Redact

friend class DB::FieldVisitorToDebugString;

static std::string toMarkerString(const std::string & raw, bool ignore_escape = false);

protected:
Redact() = default;

private:
// Log user data to log only when this flag is set to false.
static std::atomic<bool> REDACT_LOG;
static std::atomic<RedactMode> REDACT_LOG;
};
Loading