Skip to content

Commit

Permalink
Merge pull request #2415 from anatawa12/validate-bson-string
Browse files Browse the repository at this point in the history
chore: throw exception when encounter unpaired surrogate instead of replace with U+FFFD
  • Loading branch information
mbdavid authored Feb 13, 2024
2 parents 4e856d8 + 289e9b1 commit 31e0ff7
Show file tree
Hide file tree
Showing 5 changed files with 26 additions and 14 deletions.
4 changes: 2 additions & 2 deletions LiteDB/Document/BsonValue.cs
Original file line number Diff line number Diff line change
Expand Up @@ -648,7 +648,7 @@ internal virtual int GetBytesCount(bool recalc)
case BsonType.Double: return 8;
case BsonType.Decimal: return 16;

case BsonType.String: return Encoding.UTF8.GetByteCount(this.AsString);
case BsonType.String: return StringEncoding.UTF8.GetByteCount(this.AsString);

case BsonType.Binary: return this.AsBinary.Length;
case BsonType.ObjectId: return 12;
Expand All @@ -674,7 +674,7 @@ protected int GetBytesCountElement(string key, BsonValue value)

return
1 + // element type
Encoding.UTF8.GetByteCount(key) + // CString
StringEncoding.UTF8.GetByteCount(key) + // CString
1 + // CString \0
value.GetBytesCount(true) +
(variant ? 5 : 0); // bytes.Length + 0x??
Expand Down
8 changes: 4 additions & 4 deletions LiteDB/Engine/Disk/Serializer/BufferReader.cs
Original file line number Diff line number Diff line change
Expand Up @@ -154,7 +154,7 @@ public string ReadString(int count)
// if fits in current segment, use inner array - otherwise copy from multiples segments
if (_currentPosition + count <= _current.Count)
{
value = Encoding.UTF8.GetString(_current.Array, _current.Offset + _currentPosition, count);
value = StringEncoding.UTF8.GetString(_current.Array, _current.Offset + _currentPosition, count);

this.MoveForward(count);
}
Expand All @@ -165,7 +165,7 @@ public string ReadString(int count)

this.Read(buffer, 0, count);

value = Encoding.UTF8.GetString(buffer, 0, count);
value = StringEncoding.UTF8.GetString(buffer, 0, count);

BufferPool.Return(buffer);
}
Expand Down Expand Up @@ -204,7 +204,7 @@ public string ReadCString()

this.MoveForward(1); // +1 to '\0'

return Encoding.UTF8.GetString(mem.ToArray());
return StringEncoding.UTF8.GetString(mem.ToArray());
}
}
}
Expand All @@ -220,7 +220,7 @@ private bool TryReadCStringCurrentSegment(out string value)
{
if (_current[pos] == 0x00)
{
value = Encoding.UTF8.GetString(_current.Array, _current.Offset + _currentPosition, count);
value = StringEncoding.UTF8.GetString(_current.Array, _current.Offset + _currentPosition, count);
this.MoveForward(count + 1); // +1 means '\0'
return true;
}
Expand Down
12 changes: 6 additions & 6 deletions LiteDB/Engine/Disk/Serializer/BufferWriter.cs
Original file line number Diff line number Diff line change
Expand Up @@ -152,13 +152,13 @@ public void WriteCString(string value)
{
if (value.IndexOf('\0') > -1) throw LiteException.InvalidNullCharInString();

var bytesCount = Encoding.UTF8.GetByteCount(value);
var bytesCount = StringEncoding.UTF8.GetByteCount(value);
var available = _current.Count - _currentPosition; // avaiable in current segment

// can write direct in current segment (use < because need +1 \0)
if (bytesCount < available)
{
Encoding.UTF8.GetBytes(value, 0, value.Length, _current.Array, _current.Offset + _currentPosition);
StringEncoding.UTF8.GetBytes(value, 0, value.Length, _current.Array, _current.Offset + _currentPosition);

_current[_currentPosition + bytesCount] = 0x00;

Expand All @@ -168,7 +168,7 @@ public void WriteCString(string value)
{
var buffer = BufferPool.Rent(bytesCount);

Encoding.UTF8.GetBytes(value, 0, value.Length, buffer, 0);
StringEncoding.UTF8.GetBytes(value, 0, value.Length, buffer, 0);

this.Write(buffer, 0, bytesCount);

Expand All @@ -186,7 +186,7 @@ public void WriteCString(string value)
/// </summary>
public void WriteString(string value, bool specs)
{
var count = Encoding.UTF8.GetByteCount(value);
var count = StringEncoding.UTF8.GetByteCount(value);

if (specs)
{
Expand All @@ -195,7 +195,7 @@ public void WriteString(string value, bool specs)

if (count <= _current.Count - _currentPosition)
{
Encoding.UTF8.GetBytes(value, 0, value.Length, _current.Array, _current.Offset + _currentPosition);
StringEncoding.UTF8.GetBytes(value, 0, value.Length, _current.Array, _current.Offset + _currentPosition);

this.MoveForward(count);
}
Expand All @@ -204,7 +204,7 @@ public void WriteString(string value, bool specs)
// rent a buffer to be re-usable
var buffer = BufferPool.Rent(count);

Encoding.UTF8.GetBytes(value, 0, value.Length, buffer, 0);
StringEncoding.UTF8.GetBytes(value, 0, value.Length, buffer, 0);

this.Write(buffer, 0, count);

Expand Down
4 changes: 2 additions & 2 deletions LiteDB/Engine/Structures/CollectionIndex.cs
Original file line number Diff line number Diff line change
Expand Up @@ -121,8 +121,8 @@ public static int GetLength(string name, string expr)
return
1 + // Slot
1 + // IndexType
Encoding.UTF8.GetByteCount(name) + 1 + // Name + \0
Encoding.UTF8.GetByteCount(expr) + 1 + // Expression + \0
StringEncoding.UTF8.GetByteCount(name) + 1 + // Name + \0
StringEncoding.UTF8.GetByteCount(expr) + 1 + // Expression + \0
1 + // Unique
PageAddress.SIZE + // Head
PageAddress.SIZE + // Tail
Expand Down
12 changes: 12 additions & 0 deletions LiteDB/Utils/Encoding.cs
Original file line number Diff line number Diff line change
@@ -0,0 +1,12 @@
using System.Text;

namespace LiteDB
{
internal class StringEncoding
{
// Original Encoding.UTF8 will replace unpaired surrogate with U+FFFD, which is not suitable for database
// so, we need to use new UTF8Encoding(false, true) to make throw exception when unpaired surrogate is found
//public static System.Text.Encoding UTF8 = new UTF8Encoding(false, true);
public static Encoding UTF8 = new UTF8Encoding(false, true);
}
}

0 comments on commit 31e0ff7

Please sign in to comment.