-
-
Notifications
You must be signed in to change notification settings - Fork 5.5k
Commit
This commit does not belong to any branch on this repository, and may belong to a fork outside of the repository.
- Loading branch information
1 parent
5ea501d
commit 544801b
Showing
8 changed files
with
85 additions
and
35 deletions.
There are no files selected for viewing
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,32 @@ | ||
# This file is a part of Julia. License is MIT: http://julialang.org/license | ||
|
||
#=#! | ||
@brief Error messages for Unicode / UTF support | ||
=# | ||
|
||
const UTF_ERR_SHORT = "invalid UTF-8 sequence starting at index <<1>> (0x<<2>>) missing one or more continuation bytes)" | ||
const UTF_ERR_CONT = "invalid UTF-8 sequence starting at index <<1>> (0x<<2>> is not a continuation byte)" | ||
const UTF_ERR_LONG = "invalid UTF-8 sequence, overlong encoding starting at index <<1>> (0x<<2>>)" | ||
const UTF_ERR_NOT_LEAD = "not a leading Unicode surrogate code unit at index <<1>> (0x<<2>>)" | ||
const UTF_ERR_NOT_TRAIL = "not a trailing Unicode surrogate code unit at index <<1>> (0x<<2>>)" | ||
const UTF_ERR_NOT_SURROGATE = "not a valid Unicode surrogate code unit at index <<1>> (0x<<2>>" | ||
const UTF_ERR_MISSING_SURROGATE = "missing trailing Unicode surrogate code unit after index <<1>> (0x<<2>>)" | ||
const UTF_ERR_INVALID = "invalid Unicode character starting at index <<1>> (0x<<2>> > 0x10ffff)" | ||
const UTF_ERR_SURROGATE = "surrogate encoding not allowed in UTF-8 or UTF-32, at index <<1>> (0x<<2>>)" | ||
const UTF_ERR_NULL_16_TERMINATE = "UTF16String data must be NULL-terminated" | ||
const UTF_ERR_NULL_32_TERMINATE = "UTF32String data must be NULL-terminated" | ||
const UTF_ERR_ODD_BYTES_16 = "UTF16String can't have odd number of bytes <<1>>" | ||
const UTF_ERR_ODD_BYTES_32 = "UTF32String must have multiple of 4 bytes <<1>>" | ||
const UTF_ERR_INVALID_CHAR = "invalid Unicode character (0x<<2>>)" | ||
const UTF_ERR_INVALID_8 = "invalid UTF-8 data" | ||
const UTF_ERR_INVALID_16 = "invalid UTF-16 data" | ||
const UTF_ERR_INVALID_INDEX = "invalid character index" | ||
const UTF_ERR_MAP_CHAR = "map(f,s::AbstractString) requires f to return Char; try map(f,collect(s)) or a comprehension instead" | ||
|
||
type UnicodeError <: Exception | ||
errmsg::AbstractString #< A UTF_ERR_ message | ||
errpos::Int32 #< Position of invalid character | ||
errchr::UInt32 #< Invalid character | ||
end | ||
|
||
show(io::IO, exc::UnicodeError) = print(io, replace(replace(exc.errmsg,"<<1>>",string(exc.errpos)),"<<2>>",hex(exc.errchr))) |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,39 @@ | ||
# This file is a part of Julia. License is MIT: http://julialang.org/license | ||
|
||
#=#! | ||
@brief Base UTF16String type, has 16-bit NULL termination word after data, native byte order | ||
@throws UnicodeError | ||
=# | ||
immutable UTF16String <: AbstractString | ||
data::Vector{UInt16} # includes 16-bit NULL termination after string chars | ||
function UTF16String(data::Vector{UInt16}) | ||
if length(data) < 1 || data[end] != 0 | ||
throw(UnicodeError(UTF_ERR_NULL_16_TERMINATE, 0, 0)) | ||
end | ||
new(data) | ||
end | ||
end | ||
|
||
#=#! | ||
@brief Base UTF32String type, has 32-bit NULL termination word after data, native byte order | ||
@throws UnicodeError | ||
=# | ||
immutable UTF32String <: DirectIndexString | ||
data::Vector{Char} # includes 32-bit NULL termination after string chars | ||
|
||
function UTF32String(data::Vector{Char}) | ||
if length(data) < 1 || data[end] != Char(0) | ||
throw(UnicodeError(UTF_ERR_NULL_32_TERMINATE, 0, 0)) | ||
end | ||
new(data) | ||
end | ||
end | ||
UTF32String(data::Vector{UInt32}) = UTF32String(reinterpret(Char, data)) | ||
|
||
const empty_utf16 = UTF16String(UInt16[0]) | ||
const empty_utf32 = UTF32String(UInt32[0]) | ||
|
||
isvalid{T<:Union(ASCIIString,UTF8String,UTF16String,UTF32String)}(str::T) = isvalid(T, str.data) | ||
isvalid{T<:Union(ASCIIString,UTF8String,UTF16String,UTF32String)}(::Type{T}, str::T) = isvalid(T, str.data) |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters