From 931b70d5ed7a3dc75a56a86357cf18cbd9824d6b Mon Sep 17 00:00:00 2001 From: Kevin Park Date: Thu, 8 Jun 2023 09:14:10 +0900 Subject: [PATCH] Support utf16 code units for yorkie.Tree (#545) --- api/converter/converter_test.go | 10 ++++++++++ pkg/document/crdt/tree.go | 15 +++++++++------ pkg/document/crdt/tree_test.go | 22 ++++++++++++++++++++++ 3 files changed, 41 insertions(+), 6 deletions(-) diff --git a/api/converter/converter_test.go b/api/converter/converter_test.go index ca7c5b76c..c247913a0 100644 --- a/api/converter/converter_test.go +++ b/api/converter/converter_test.go @@ -113,6 +113,16 @@ func TestConverter(t *testing.T) { Increase(10). Increase(math.MaxInt64) + // tree + root.SetNewTree("k5"). + Edit(0, 0, &json.TreeNode{ + Type: "p", + Children: []json.TreeNode{{ + Type: "text", + Value: "Hello world", + }}, + }) + return nil }) assert.NoError(t, err) diff --git a/pkg/document/crdt/tree.go b/pkg/document/crdt/tree.go index b5598d0c9..5a439ee5e 100644 --- a/pkg/document/crdt/tree.go +++ b/pkg/document/crdt/tree.go @@ -20,6 +20,7 @@ import ( "errors" "fmt" "strings" + "unicode/utf16" "github.com/yorkie-team/yorkie/pkg/document/time" "github.com/yorkie-team/yorkie/pkg/index" @@ -127,7 +128,8 @@ func (n *TreeNode) IsRemoved() bool { // Length returns the length of this node. func (n *TreeNode) Length() int { - return len(n.Value) + encoded := utf16.Encode([]rune(n.Value)) + return len(encoded) } // String returns the string representation of this node. @@ -175,16 +177,17 @@ func (n *TreeNode) SplitText(offset int) *TreeNode { return nil } - leftValue := n.Value[:offset] - rightValue := n.Value[offset:] + encoded := utf16.Encode([]rune(n.Value)) + leftRune := utf16.Decode(encoded[0:offset]) + rightRune := utf16.Decode(encoded[offset:]) - n.Value = leftValue - n.IndexTreeNode.Length = len(leftValue) + n.Value = string(leftRune) + n.IndexTreeNode.Length = len(leftRune) rightNode := NewTreeNode(&TreePos{ CreatedAt: n.Pos.CreatedAt, Offset: offset, - }, n.Type(), rightValue) + }, n.Type(), string(rightRune)) n.IndexTreeNode.Parent.InsertAfterInternal(rightNode.IndexTreeNode, n.IndexTreeNode) return rightNode diff --git a/pkg/document/crdt/tree_test.go b/pkg/document/crdt/tree_test.go index 2b5fa9eb9..efbbacae9 100644 --- a/pkg/document/crdt/tree_test.go +++ b/pkg/document/crdt/tree_test.go @@ -54,6 +54,28 @@ func TestTreeNode(t *testing.T) { assert.Equal(t, &crdt.TreePos{CreatedAt: time.InitialTicket, Offset: 0}, left.Pos) assert.Equal(t, &crdt.TreePos{CreatedAt: time.InitialTicket, Offset: 5}, right.Pos) }) + + t.Run("UTF-16 code unit test", func(t *testing.T) { + tests := []struct { + length int + value string + }{ + {4, "abcd"}, + {6, "우리나라한글"}, + {8, "अनुच्छेद"}, + {10, "Ĺo͂řȩm̅"}, + {12, "🌷🎁💩😜👍🏳"}, + } + for _, test := range tests { + para := crdt.NewTreeNode(crdt.DummyTreePos, "p") + para.Append(crdt.NewTreeNode(crdt.DummyTreePos, "text", test.value)) + + left := para.Child(0) + assert.Equal(t, test.length, left.Len()) + right := left.Split(2) + assert.Equal(t, test.length-2, right.Len()) + } + }) } func TestTree(t *testing.T) {