Skip to content

Commit

Permalink
Merge pull request #48 from BDisp/column-width
Browse files Browse the repository at this point in the history
Fixes #47. ColumnWidth needs to differentiate between  non-printable and null characters.
  • Loading branch information
migueldeicaza authored Oct 30, 2020
2 parents 447f61d + 858a8db commit a9b2986
Show file tree
Hide file tree
Showing 3 changed files with 83 additions and 18 deletions.
18 changes: 8 additions & 10 deletions NStack/unicode/Rune.ColumnWidth.cs
Original file line number Diff line number Diff line change
Expand Up @@ -79,20 +79,18 @@ static int bisearch (uint rune, uint [,] table, int max)
return 0;
}

/// <summary>
/// Number of column positions of a wide-character code. This is used to measure runes as displayed by text-based terminals.
/// </summary>
/// <returns>The width in columns, 0 if the argument is the null character, -1 if the value is not printable, otherwise the number of columsn that the rune occupies.</returns>
/// <param name="r">The red component.</param>
public static int ColumnWidth (Rune rune)
/// <summary>
/// Number of column positions of a wide-character code. This is used to measure runes as displayed by text-based terminals.
/// </summary>
/// <returns>The width in columns, 0 if the argument is the null character, -1 if the value is not printable, otherwise the number of columns that the rune occupies.</returns>
/// <param name="rune">The rune.</param>
public static int ColumnWidth (Rune rune)
{
uint irune = (uint)rune;
if (irune < 32)
return 0;
if (irune < 32 || (irune >= 0x7f && irune <= 0xa0))
return -1;
if (irune < 127)
return 1;
if (irune >= 0x7f && irune <= 0xa0)
return 0;
/* binary search in table of non-spacing characters */
if (bisearch (irune, combining, combining.GetLength (0)-1) != 0)
return 0;
Expand Down
37 changes: 34 additions & 3 deletions NStack/unicode/Rune.cs
Original file line number Diff line number Diff line change
Expand Up @@ -58,9 +58,37 @@ public Rune (uint rune)
/// <param name="ch">C# characters.</param>
public Rune (char ch)
{
if (ch >= surrogateMin && ch <= surrogateMax)
{
throw new ArgumentException("Value in the surrogate range and isn't part of a surrogate pair!");
}
this.value = (uint)ch;
}

/// <summary>
/// Initializes a new instance of the <see cref="T:System.Rune"/> from a surrogate pair value.
/// </summary>
/// <param name="sgateMin">The high surrogate code points minimum value.</param>
/// <param name="sgateMax">The low surrogate code points maximum value.</param>
public Rune (uint sgateMin, uint sgateMax)
{
if (sgateMin < surrogateMin || sgateMax > surrogateMax)
{
throw new ArgumentOutOfRangeException($"Must be between {surrogateMin:x} and {surrogateMax:x} inclusive!");
}
this.value = DecodeSurrogatePair(sgateMin, sgateMax);
}

/// <summary>
/// Gets a value indicating whether this <see cref="T:System.Rune"/> can be encoded as UTF-8 from a surrogate pair.
/// </summary>
/// <param name="sgateMin">The high surrogate code points minimum value.</param>
/// <param name="sgateMax">The low surrogate code points maximum value.</param>
public static uint DecodeSurrogatePair(uint sgateMin, uint sgateMax)
{
return 0x10000 + ((sgateMin - surrogateMin) * 0x0400) + (sgateMax - lowSurrogateMin);
}

/// <summary>
/// Gets a value indicating whether this <see cref="T:System.Rune"/> can be encoded as UTF-8
/// </summary>
Expand All @@ -79,6 +107,9 @@ public bool IsValid {
const uint surrogateMin = 0xd800;
const uint surrogateMax = 0xdfff;

const uint highSurrogateMax = 0xdbff;
const uint lowSurrogateMin = 0xdc00;

const byte t1 = 0x00; // 0000 0000
const byte tx = 0x80; // 1000 0000
const byte t2 = 0xC0; // 1100 0000
Expand Down Expand Up @@ -305,7 +336,7 @@ public static (Rune rune, int size) DecodeLastRune (byte [] buffer, int end = -1
/// number of bytes required to encode the rune.
/// </summary>
/// <returns>The length, or -1 if the rune is not a valid value to encode in UTF-8.</returns>
/// <param name="rvalue">Rune to probe.</param>
/// <param name="rune">Rune to probe.</param>
public static int RuneLen (Rune rune)
{
var rvalue = rune.value;
Expand Down Expand Up @@ -771,8 +802,8 @@ public override int GetHashCode ()
public override string ToString ()
{
var buff = new byte [4];
EncodeRune (this, buff, 0);
return System.Text.Encoding.UTF8.GetString (buff);
var size = EncodeRune (this, buff, 0);
return System.Text.Encoding.UTF8.GetString(buff, 0, size);
}

/// <summary>
Expand Down
46 changes: 41 additions & 5 deletions NStackTests/RuneTest.cs
Original file line number Diff line number Diff line change
@@ -1,12 +1,48 @@
using System;
using NUnit.Framework;
using System;
namespace NStackTests {
public class RuneTest {
public RuneTest ()
Rune a = 'a';
Rune b = 'b';
Rune c = 123;
Rune d = '\u1150'; // 0x1150 ᅐ Unicode Technical Report #11
Rune e = '\u1161'; // 0x1161 ᅡ null character with column equal to 0
Rune f = 31; // non printable character
Rune g = 127; // non printable character

[Test]
public void TestColumnWidth()
{
Rune a = 'a';
Rune b = 'b';
var rt = new RuneTest();

Assert.AreEqual(1, Rune.ColumnWidth(rt.a));
Assert.AreEqual(1, Rune.ColumnWidth(rt.b));
var l = a < b;
Rune c = 123;
Assert.IsTrue(l);
Assert.AreEqual(1, Rune.ColumnWidth(rt.c));
Assert.AreEqual(2, Rune.ColumnWidth(rt.d));
Assert.AreEqual(0, Rune.ColumnWidth(rt.e));
Assert.AreEqual(-1, Rune.ColumnWidth(rt.f));
Assert.AreEqual(-1, Rune.ColumnWidth(rt.g));
}

[Test]
public void TestRune()
{
Rune a = new Rune('a');
Assert.AreEqual("a", a.ToString());
Rune b = new Rune(0x0061);
Assert.AreEqual("a", b.ToString());
Rune c = new Rune('\u0061');
Assert.AreEqual("a", c.ToString());
Rune d = new Rune(0x10421);
Assert.AreEqual("𐐡", d.ToString());
Assert.Throws<ArgumentOutOfRangeException>(() => new Rune('\ud799', '\udc21'));
Rune e = new Rune('\ud801', '\udc21');
Assert.AreEqual("𐐡", e.ToString());
Assert.Throws<ArgumentException>(() => new Rune('\ud801'));
Rune f = new Rune('\ud83c', '\udf39');
Assert.AreEqual("🌹", f.ToString());
}
}
}

0 comments on commit a9b2986

Please sign in to comment.