Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

new package Encoding #1236

Draft
wants to merge 25 commits into
base: main
Choose a base branch
from
Draft
Show file tree
Hide file tree
Changes from all commits
Commits
Show all changes
25 commits
Select commit Hold shift + click to select a range
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
77 changes: 69 additions & 8 deletions buffer/buffer.mbt
Original file line number Diff line number Diff line change
Expand Up @@ -84,11 +84,16 @@ pub fn to_string(self : T) -> String {
/// Return a new unchecked string contains the data in buffer.
/// Note this function does not validate the encoding of the byte sequence,
/// it simply copy the bytes into a new String.
pub fn to_unchecked_string(self : T) -> String {
Bytes::from_fixedarray(self.data).to_unchecked_string(
offset=0,
length=self.len,
)
pub fn to_unchecked_string(self : T, offset? : Int, length? : Int) -> String {
let offset = match offset {
None => 0
Some(x) => x
}
let length = match length {
None => self.len
Some(x) => x
}
Bytes::from_fixedarray(self.data).to_unchecked_string(offset~, length~)
}

///|
Expand All @@ -99,6 +104,22 @@ pub fn T::new(size_hint~ : Int = 0) -> T {
{ data, len: 0, initial_data: data }
}

///|
pub fn T::from_bytes(bytes : Bytes) -> T {
let buf = T::new(size_hint=bytes.length())
buf.write_bytes(bytes)
buf
}

///|
pub fn T::from_array(arr : Array[Byte]) -> T {
let buf = T::new(size_hint=arr.length())
for byte in arr {
buf.write_byte(byte)
}
buf
}

///|
/// Write a string into buffer.
pub fn write_string(self : T, value : String) -> Unit {
Expand Down Expand Up @@ -147,10 +168,26 @@ pub fn write_sub_string(
}

///|
/// Write a char into buffer.
/// Write a char into buffer as UTF16LE.
pub fn write_char(self : T, value : Char) -> Unit {
self.grow_if_necessary(self.len + 4)
let inc = self.data.set_utf16_char(self.len, value)
let inc = self.data.set_utf16le_char(self.len, value)
self.len += inc
}

///|
/// Write a char into buffer as UTF16BE.
pub fn write_utf16be_char(self : T, value : Char) -> Unit {
self.grow_if_necessary(self.len + 4)
let inc = self.data.set_utf16be_char(self.len, value)
self.len += inc
}

///|
/// Write a char into buffer as UTF8.
pub fn write_utf8_char(self : T, value : Char) -> Unit {
self.grow_if_necessary(self.len + 4)
let inc = self.data.set_utf8_char(self.len, value)
self.len += inc
}

Expand All @@ -162,6 +199,11 @@ pub fn write_byte(self : T, value : Byte) -> Unit {
self.len += 1
}

///|
pub fn blit(self : T, srcoff : Int, dst : T, dstoff : Int, len : Int) -> Unit {
Bytes::blit(self.to_bytes(), srcoff, dst.to_bytes(), dstoff, len)
}

///|
pub fn reset(self : T) -> Unit {
self.data = self.initial_data
Expand All @@ -173,7 +215,26 @@ pub fn to_bytes(self : T) -> Bytes {
Bytes::from_fixedarray(self.data, len=self.len)
}

///|
pub fn to_array(self : T) -> Array[Byte] {
self.to_bytes().to_array()
}

///|
pub fn op_set(self : T, index : Int, value : Byte) -> Unit {
let len = self.length()
guard index >= 0 && index < len
self.data[index] = value
jetjinser marked this conversation as resolved.
Show resolved Hide resolved
}

///|
pub fn op_get(self : T, index : Int) -> Byte {
let len = self.length()
guard index >= 0 && index < len
self.data[index]
}
jetjinser marked this conversation as resolved.
Show resolved Hide resolved

///|
pub impl Show for T with output(self, logger) {
logger.write_string(self.to_unchecked_string())
logger.write_string(self.to_unchecked_string(offset=0, length=self.len))
}
10 changes: 9 additions & 1 deletion buffer/buffer.mbti
Original file line number Diff line number Diff line change
Expand Up @@ -5,20 +5,28 @@ package moonbitlang/core/buffer
// Types and methods
type T
impl T {
blit(Self, Int, Self, Int, Int) -> Unit
from_array(Array[Byte]) -> Self
from_bytes(Bytes) -> Self
is_empty(Self) -> Bool
length(Self) -> Int
new(size_hint~ : Int = ..) -> Self
op_get(Self, Int) -> Byte
op_set(Self, Int, Byte) -> Unit
reset(Self) -> Unit
to_array(Self) -> Array[Byte]
to_bytes(Self) -> Bytes
to_string(Self) -> String //deprecated
to_unchecked_string(Self) -> String
to_unchecked_string(Self, offset? : Int, length? : Int) -> String
write_byte(Self, Byte) -> Unit
write_bytes(Self, Bytes) -> Unit
write_char(Self, Char) -> Unit
write_object(Self, Show) -> Unit
write_string(Self, String) -> Unit
write_sub_string(Self, String, Int, Int) -> Unit //deprecated
write_substring(Self, String, Int, Int) -> Unit
write_utf16be_char(Self, Char) -> Unit
write_utf8_char(Self, Char) -> Unit
}
impl Show for T

Expand Down
8 changes: 6 additions & 2 deletions builtin/builtin.mbti
Original file line number Diff line number Diff line change
Expand Up @@ -246,6 +246,7 @@ impl Iter {
tap[T](Self[T], (T) -> Unit) -> Self[T] //deprecated
to_array[T](Self[T]) -> Array[T]
to_string[T : Show](Self[T]) -> String
try_collect[T, E : Error](Self[Result[T, E]]) -> Array[T]!E
}
impl[T : Show] Show for Iter[T]

Expand Down Expand Up @@ -667,7 +668,10 @@ impl FixedArray {
op_get[T](Self[T], Int) -> T
op_set[T](Self[T], Int, T) -> Unit
set[T](Self[T], Int, T) -> Unit
set_utf16_char(Self[Byte], Int, Char) -> Int
set_utf16_char(Self[Byte], Int, Char) -> Int //deprecated
set_utf16be_char(Self[Byte], Int, Char) -> Int
set_utf16le_char(Self[Byte], Int, Char) -> Int
set_utf8_char(Self[Byte], Int, Char) -> Int
to_json[X : ToJson](Self[X]) -> Json
to_string[X : Show](Self[X]) -> String
unsafe_blit[A](Self[A], Int, Self[A], Int, Int) -> Unit
Expand All @@ -685,7 +689,7 @@ impl Bytes {
op_equal(Bytes, Bytes) -> Bool
op_get(Bytes, Int) -> Byte
op_set(Bytes, Int, Byte) -> Unit
set_utf16_char(Bytes, Int, Char) -> Int
set_utf16_char(Bytes, Int, Char) -> Int //deprecated
set_utf8_char(Bytes, Int, Char) -> Int //deprecated
sub_string(Bytes, Int, Int) -> String //deprecated
to_string(Bytes) -> String //deprecated
Expand Down
92 changes: 90 additions & 2 deletions builtin/bytes.mbt
Original file line number Diff line number Diff line change
Expand Up @@ -140,7 +140,7 @@ pub fn copy(self : Bytes) -> Bytes {
}

///|
/// Fill utf8 encoded char `value` into byte sequence `self`, starting at `offset`.
/// Fill UTF8 encoded char `value` into byte sequence `self`, starting at `offset`.
/// It return the length of bytes has been written.
/// @alert deprecated "The type Bytes is about to be changed to be immutable. Use `FixedArray[Byte]` or `Buffer` instead."
pub fn set_utf8_char(self : Bytes, offset : Int, value : Char) -> Int {
Expand Down Expand Up @@ -169,9 +169,40 @@ pub fn set_utf8_char(self : Bytes, offset : Int, value : Char) -> Int {
}

///|
/// Fill utf16 encoded char `value` into byte sequence `self`, starting at `offset`.
pub fn set_utf8_char(
self : FixedArray[Byte],
offset : Int,
value : Char
) -> Int {
let code = value.to_uint()
if code < 0x80 {
self[offset] = ((code & 0x7F) | 0x00).to_byte()
1
} else if code < 0x0800 {
self[offset] = (((code >> 6) & 0x1F) | 0xC0).to_byte()
self[offset + 1] = ((code & 0x3F) | 0x80).to_byte()
2
} else if code < 0x010000 {
self[offset] = (((code >> 12) & 0x0F) | 0xE0).to_byte()
self[offset + 1] = (((code >> 6) & 0x3F) | 0x80).to_byte()
self[offset + 2] = ((code & 0x3F) | 0x80).to_byte()
3
} else if code < 0x110000 {
self[offset] = (((code >> 18) & 0x07) | 0xF0).to_byte()
self[offset + 1] = (((code >> 12) & 0x3F) | 0x80).to_byte()
self[offset + 2] = (((code >> 6) & 0x3F) | 0x80).to_byte()
self[offset + 3] = ((code & 0x3F) | 0x80).to_byte()
4
} else {
abort("Char out of range")
}
}

///|
/// Fill UTF16 encoded char `value` into byte sequence `self`, starting at `offset`.
/// It return the length of bytes has been written.
/// @alert unsafe "Panic if the [value] is out of range"
/// @alert deprecated "The type Bytes is about to be changed to be immutable. Use `FixedArray[Byte]` or `Buffer` instead."
pub fn set_utf16_char(self : Bytes, offset : Int, value : Char) -> Int {
let code = value.to_uint()
if code < 0x10000 {
Expand All @@ -196,6 +227,7 @@ pub fn set_utf16_char(self : Bytes, offset : Int, value : Char) -> Int {
/// Fill utf16 encoded char `value` into byte sequence `self`, starting at `offset`.
/// It return the length of bytes has been written.
/// @alert unsafe "Panic if the [value] is out of range"
/// @alert deprecated "Use `set_utf16le_char` instead"
pub fn set_utf16_char(
self : FixedArray[Byte],
offset : Int,
Expand All @@ -220,6 +252,62 @@ pub fn set_utf16_char(
}
}

///|
/// Fill UTF16LE encoded char `value` into byte sequence `self`, starting at `offset`.
/// It return the length of bytes has been written.
/// @alert unsafe "Panic if the [value] is out of range"
pub fn set_utf16le_char(
self : FixedArray[Byte],
offset : Int,
value : Char
) -> Int {
let code = value.to_uint()
if code < 0x10000 {
self[offset] = (code & 0xFF).to_byte()
self[offset + 1] = (code >> 8).to_byte()
2
} else if code < 0x110000 {
let hi = code - 0x10000
let lo = (hi >> 10) | 0xD800
let hi = (hi & 0x3FF) | 0xDC00
self[offset] = (lo & 0xFF).to_byte()
self[offset + 1] = (lo >> 8).to_byte()
self[offset + 2] = (hi & 0xFF).to_byte()
self[offset + 3] = (hi >> 8).to_byte()
4
} else {
abort("Char out of range")
}
}

///|
/// Fill UTF16BE encoded char `value` into byte sequence `self`, starting at `offset`.
/// It return the length of bytes has been written.
/// @alert unsafe "Panic if the [value] is out of range"
pub fn set_utf16be_char(
self : FixedArray[Byte],
offset : Int,
value : Char
) -> Int {
let code = value.to_uint()
if code < 0x10000 {
self[offset] = (code >> 0xFF).to_byte()
self[offset + 1] = (code & 0xFF).to_byte()
2
} else if code < 0x110000 {
let hi = code - 0x10000
let lo = (hi >> 10) | 0xD800
let hi = (hi & 0x3FF) | 0xDC00
self[offset] = (lo >> 8).to_byte()
self[offset + 1] = (lo & 0xFF).to_byte()
self[offset + 2] = (hi >> 8).to_byte()
self[offset + 3] = (hi & 0xFF).to_byte()
4
} else {
abort("Char out of range")
}
}

///|
pub fn op_equal(self : Bytes, other : Bytes) -> Bool {
if self.length() != other.length() {
Expand Down
13 changes: 13 additions & 0 deletions builtin/iter.mbt
Original file line number Diff line number Diff line change
Expand Up @@ -786,6 +786,19 @@ pub fn collect[T](self : Iter[T]) -> Array[T] {
result
}

///|
/// Collects the elements of the iterator into an array.
pub fn try_collect[T, E : Error](self : Iter[Result[T, E]]) -> Array[T]!E {
let result = []
for a in self {
match a {
Ok(x) => result.push(x)
Err(e) => raise e
}
}
result
}

///|
/// Iter itself is an iterator.
/// so that it works with array spread operator. e.g, `[..iter]`
Expand Down
Loading
Loading