Skip to content

Commit

Permalink
AnnotatedStrings, and a string styling stdlib (#49586)
Browse files Browse the repository at this point in the history
  • Loading branch information
LilithHafner authored Oct 20, 2023
2 parents 01f6c4c + e5cd9b6 commit abe4303
Show file tree
Hide file tree
Showing 21 changed files with 705 additions and 31 deletions.
16 changes: 16 additions & 0 deletions NEWS.md
Original file line number Diff line number Diff line change
Expand Up @@ -8,6 +8,11 @@ New language features
difference between `public` and `export` is that `public` names do not become
available when `using` a package/module. ([#50105])
* `ScopedValue` implement dynamic scope with inheritance across tasks ([#50958]).
* A new `AbstractString` type, `AnnotatedString`, is introduced that allows for
regional annotations to be attached to an underlying string. This type is
particularly useful for holding styling information, and is used extensively
in the new `StyledStrings` standard library. There is also a new `AnnotatedChar`
type, that is the equivalent new `AbstractChar` type.

Language changes
----------------
Expand Down Expand Up @@ -51,6 +56,17 @@ New library features
Standard library changes
------------------------

#### StyledStrings

* A new standard library for handling styling in a more comprehensive and structured way.
* The new `Faces` struct serves as a container for text styling information
(think typeface, as well as color and decoration), and comes with a framework
to provide a convenient, extensible (via `addface!`), and customisable (with a
user's `Faces.toml` and `loadfaces!`) approach to
styled content.
* The new `@styled_str` string macro provides a convenient way of creating a
`AnnotatedString` with various faces or other attributes applied.

#### Package Manager

#### LinearAlgebra
Expand Down
6 changes: 4 additions & 2 deletions base/client.jl
Original file line number Diff line number Diff line change
Expand Up @@ -271,6 +271,10 @@ function exec_options(opts)
interactiveinput = (repl || is_interactive::Bool) && isa(stdin, TTY)
is_interactive::Bool |= interactiveinput

# load terminfo in for styled printing
term_env = get(ENV, "TERM", @static Sys.iswindows() ? "" : "dumb")
global current_terminfo = load_terminfo(term_env)

# load ~/.julia/config/startup.jl file
if startup
try
Expand Down Expand Up @@ -416,11 +420,9 @@ function run_main_repl(interactive::Bool, quiet::Bool, banner::Symbol, history_f
end
end
# TODO cleanup REPL_MODULE_REF

if !fallback_repl && interactive && isassigned(REPL_MODULE_REF)
invokelatest(REPL_MODULE_REF[]) do REPL
term_env = get(ENV, "TERM", @static Sys.iswindows() ? "" : "dumb")
global current_terminfo = load_terminfo(term_env)
term = REPL.Terminals.TTYTerminal(term_env, stdin, stdout, stderr)
banner == :no || Base.banner(term, short=banner==:short)
if term.term_type == "dumb"
Expand Down
7 changes: 7 additions & 0 deletions base/exports.jl
Original file line number Diff line number Diff line change
Expand Up @@ -1089,8 +1089,15 @@ public
Generator,
ImmutableDict,
OneTo,
AnnotatedString,
AnnotatedChar,
UUID,

# Annotated strings
annotatedstring,
annotate!,
annotations,

# Semaphores
Semaphore,
acquire,
Expand Down
55 changes: 43 additions & 12 deletions base/regex.jl
Original file line number Diff line number Diff line change
Expand Up @@ -212,14 +212,18 @@ julia> hr
"11"
```
"""
struct RegexMatch <: AbstractMatch
match::SubString{String}
captures::Vector{Union{Nothing,SubString{String}}}
struct RegexMatch{S<:AbstractString} <: AbstractMatch
match::SubString{S}
captures::Vector{Union{Nothing,SubString{S}}}
offset::Int
offsets::Vector{Int}
regex::Regex
end

RegexMatch(match::SubString{S}, captures::Vector{Union{Nothing,SubString{S}}},
offset::Union{Int, UInt}, offsets::Vector{Int}, regex::Regex) where {S<:AbstractString} =
RegexMatch{S}(match, captures, offset, offsets, regex)

"""
keys(m::RegexMatch) -> Vector
Expand Down Expand Up @@ -423,9 +427,35 @@ function match(re::Regex, str::Union{SubString{String}, String}, idx::Integer,
return result
end

function _annotatedmatch(m::RegexMatch{S}, str::AnnotatedString{S}) where {S<:AbstractString}
RegexMatch{AnnotatedString{S}}(
(@inbounds SubString{AnnotatedString{S}}(
str, m.match.offset, m.match.ncodeunits, Val(:noshift))),
Union{Nothing,SubString{AnnotatedString{S}}}[
if !isnothing(cap)
(@inbounds SubString{AnnotatedString{S}}(
str, cap.offset, cap.ncodeunits, Val(:noshift)))
end for cap in m.captures],
m.offset, m.offsets, m.regex)
end

function match(re::Regex, str::AnnotatedString)
m = match(re, str.string)
if !isnothing(m)
_annotatedmatch(m, str)
end
end

function match(re::Regex, str::AnnotatedString, idx::Integer, add_opts::UInt32=UInt32(0))
m = match(re, str.string, idx, add_opts)
if !isnothing(m)
_annotatedmatch(m, str)
end
end

match(r::Regex, s::AbstractString) = match(r, s, firstindex(s))
match(r::Regex, s::AbstractString, i::Integer) = throw(ArgumentError(
"regex matching is only available for the String type; use String(s) to convert"
"regex matching is only available for the String and AnnotatedString types; use String(s) to convert"
))

findnext(re::Regex, str::Union{String,SubString}, idx::Integer) = _findnext_re(re, str, idx, C_NULL)
Expand Down Expand Up @@ -671,18 +701,19 @@ function _replace(io, repl_s::SubstitutionString, str, r, re)
end
end

struct RegexMatchIterator
struct RegexMatchIterator{S <: AbstractString}
regex::Regex
string::String
string::S
overlap::Bool

function RegexMatchIterator(regex::Regex, string::AbstractString, ovr::Bool=false)
new(regex, string, ovr)
end
RegexMatchIterator(regex::Regex, string::AbstractString, ovr::Bool=false) =
new{String}(regex, String(string), ovr)
RegexMatchIterator(regex::Regex, string::AnnotatedString, ovr::Bool=false) =
new{AnnotatedString{String}}(regex, AnnotatedString(String(string.string), string.annotations), ovr)
end
compile(itr::RegexMatchIterator) = (compile(itr.regex); itr)
eltype(::Type{RegexMatchIterator}) = RegexMatch
IteratorSize(::Type{RegexMatchIterator}) = SizeUnknown()
eltype(::Type{<:RegexMatchIterator}) = RegexMatch
IteratorSize(::Type{<:RegexMatchIterator}) = SizeUnknown()

function iterate(itr::RegexMatchIterator, (offset,prevempty)=(1,false))
opts_nonempty = UInt32(PCRE.ANCHORED | PCRE.NOTEMPTY_ATSTART)
Expand Down Expand Up @@ -727,7 +758,7 @@ julia> rx = r"a.a"
r"a.a"
julia> m = eachmatch(rx, "a1a2a3a")
Base.RegexMatchIterator(r"a.a", "a1a2a3a", false)
Base.RegexMatchIterator{String}(r"a.a", "a1a2a3a", false)
julia> collect(m)
2-element Vector{RegexMatch}:
Expand Down
Loading

3 comments on commit abe4303

@vtjnash
Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

@nanosoldier runbenchmarks(ALL, isdaily = true)

@nanosoldier
Copy link
Collaborator

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Your benchmark job has completed - possible performance regressions were detected. A full report can be found here.

@vtjnash
Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Dates printing seemed to get unexpectedly slower. Not sure why. Nothing else seems impacted

Please sign in to comment.