Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

chore: throw exception when encounter unpaired surrogate instead of replace with U+FFFD #2415

Merged
merged 1 commit into from
Feb 13, 2024
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
4 changes: 2 additions & 2 deletions LiteDB/Document/BsonValue.cs
Original file line number Diff line number Diff line change
Expand Up @@ -648,7 +648,7 @@ internal virtual int GetBytesCount(bool recalc)
case BsonType.Double: return 8;
case BsonType.Decimal: return 16;

case BsonType.String: return Encoding.UTF8.GetByteCount(this.AsString);
case BsonType.String: return StringEncoding.UTF8.GetByteCount(this.AsString);

case BsonType.Binary: return this.AsBinary.Length;
case BsonType.ObjectId: return 12;
Expand All @@ -674,7 +674,7 @@ protected int GetBytesCountElement(string key, BsonValue value)

return
1 + // element type
Encoding.UTF8.GetByteCount(key) + // CString
StringEncoding.UTF8.GetByteCount(key) + // CString
1 + // CString \0
value.GetBytesCount(true) +
(variant ? 5 : 0); // bytes.Length + 0x??
Expand Down
8 changes: 4 additions & 4 deletions LiteDB/Engine/Disk/Serializer/BufferReader.cs
Original file line number Diff line number Diff line change
Expand Up @@ -154,7 +154,7 @@ public string ReadString(int count)
// if fits in current segment, use inner array - otherwise copy from multiples segments
if (_currentPosition + count <= _current.Count)
{
value = Encoding.UTF8.GetString(_current.Array, _current.Offset + _currentPosition, count);
value = StringEncoding.UTF8.GetString(_current.Array, _current.Offset + _currentPosition, count);

this.MoveForward(count);
}
Expand All @@ -165,7 +165,7 @@ public string ReadString(int count)

this.Read(buffer, 0, count);

value = Encoding.UTF8.GetString(buffer, 0, count);
value = StringEncoding.UTF8.GetString(buffer, 0, count);

BufferPool.Return(buffer);
}
Expand Down Expand Up @@ -204,7 +204,7 @@ public string ReadCString()

this.MoveForward(1); // +1 to '\0'

return Encoding.UTF8.GetString(mem.ToArray());
return StringEncoding.UTF8.GetString(mem.ToArray());
}
}
}
Expand All @@ -220,7 +220,7 @@ private bool TryReadCStringCurrentSegment(out string value)
{
if (_current[pos] == 0x00)
{
value = Encoding.UTF8.GetString(_current.Array, _current.Offset + _currentPosition, count);
value = StringEncoding.UTF8.GetString(_current.Array, _current.Offset + _currentPosition, count);
this.MoveForward(count + 1); // +1 means '\0'
return true;
}
Expand Down
12 changes: 6 additions & 6 deletions LiteDB/Engine/Disk/Serializer/BufferWriter.cs
Original file line number Diff line number Diff line change
Expand Up @@ -152,13 +152,13 @@ public void WriteCString(string value)
{
if (value.IndexOf('\0') > -1) throw LiteException.InvalidNullCharInString();

var bytesCount = Encoding.UTF8.GetByteCount(value);
var bytesCount = StringEncoding.UTF8.GetByteCount(value);
var available = _current.Count - _currentPosition; // avaiable in current segment

// can write direct in current segment (use < because need +1 \0)
if (bytesCount < available)
{
Encoding.UTF8.GetBytes(value, 0, value.Length, _current.Array, _current.Offset + _currentPosition);
StringEncoding.UTF8.GetBytes(value, 0, value.Length, _current.Array, _current.Offset + _currentPosition);

_current[_currentPosition + bytesCount] = 0x00;

Expand All @@ -168,7 +168,7 @@ public void WriteCString(string value)
{
var buffer = BufferPool.Rent(bytesCount);

Encoding.UTF8.GetBytes(value, 0, value.Length, buffer, 0);
StringEncoding.UTF8.GetBytes(value, 0, value.Length, buffer, 0);

this.Write(buffer, 0, bytesCount);

Expand All @@ -186,7 +186,7 @@ public void WriteCString(string value)
/// </summary>
public void WriteString(string value, bool specs)
{
var count = Encoding.UTF8.GetByteCount(value);
var count = StringEncoding.UTF8.GetByteCount(value);

if (specs)
{
Expand All @@ -195,7 +195,7 @@ public void WriteString(string value, bool specs)

if (count <= _current.Count - _currentPosition)
{
Encoding.UTF8.GetBytes(value, 0, value.Length, _current.Array, _current.Offset + _currentPosition);
StringEncoding.UTF8.GetBytes(value, 0, value.Length, _current.Array, _current.Offset + _currentPosition);

this.MoveForward(count);
}
Expand All @@ -204,7 +204,7 @@ public void WriteString(string value, bool specs)
// rent a buffer to be re-usable
var buffer = BufferPool.Rent(count);

Encoding.UTF8.GetBytes(value, 0, value.Length, buffer, 0);
StringEncoding.UTF8.GetBytes(value, 0, value.Length, buffer, 0);

this.Write(buffer, 0, count);

Expand Down
4 changes: 2 additions & 2 deletions LiteDB/Engine/Structures/CollectionIndex.cs
Original file line number Diff line number Diff line change
Expand Up @@ -121,8 +121,8 @@ public static int GetLength(string name, string expr)
return
1 + // Slot
1 + // IndexType
Encoding.UTF8.GetByteCount(name) + 1 + // Name + \0
Encoding.UTF8.GetByteCount(expr) + 1 + // Expression + \0
StringEncoding.UTF8.GetByteCount(name) + 1 + // Name + \0
StringEncoding.UTF8.GetByteCount(expr) + 1 + // Expression + \0
1 + // Unique
PageAddress.SIZE + // Head
PageAddress.SIZE + // Tail
Expand Down
12 changes: 12 additions & 0 deletions LiteDB/Utils/Encoding.cs
Original file line number Diff line number Diff line change
@@ -0,0 +1,12 @@
using System.Text;

namespace LiteDB
{
internal class StringEncoding
{
// Original Encoding.UTF8 will replace unpaired surrogate with U+FFFD, which is not suitable for database
// so, we need to use new UTF8Encoding(false, true) to make throw exception when unpaired surrogate is found
//public static System.Text.Encoding UTF8 = new UTF8Encoding(false, true);
public static Encoding UTF8 = new UTF8Encoding(false, true);
}
}