diff --git a/Project.toml b/Project.toml index 4d99381..047e6ac 100644 --- a/Project.toml +++ b/Project.toml @@ -1,7 +1,7 @@ name = "BibInternal" uuid = "2027ae74-3657-4b95-ae00-e2f7d55c3e64" authors = ["azzaare"] -version = "0.3.4" +version = "0.3.5" [compat] julia = "1.6" diff --git a/src/entry.jl b/src/entry.jl index 9ba13a4..f650415 100644 --- a/src/entry.jl +++ b/src/entry.jl @@ -16,6 +16,7 @@ struct Name first::String middle::String end + const Names = Vector{Name} """ @@ -25,11 +26,6 @@ Decompose without ambiguities a name as `particle` (optional) `last`, `junior` ( function Name(str) @assert !isempty(strip(str)) "Name must not be empty or consist of only whitespace" - # split along commas, then along spaces - subnames = map(split(str, ","; keepempty=false)) do aux - return split(aux, r"[\n\r ]+"; keepempty=false) - end - # subnames containers first = "" middle = "" @@ -37,68 +33,79 @@ function Name(str) last = "" junior = "" - # mark for string parsing - mark_in = 1 - mark_out = 0 + if (str[1], str[end]) == ('{', '}') + last = str + else - # BibTeX form 1: First Second von Last - if length(subnames) == 1 - aux = subnames[1] - mark_out = length(aux) - 1 - last = aux[end] - if length(aux) > 1 && isuppercase(aux[1][1]) - first = aux[1] - mark_in += 1 - for s in aux[2:(end - 1)] + # split along commas, then along spaces + subnames = map(split(str, ","; keepempty=false)) do aux + return split(aux, r"[\n\r ]+"; keepempty=false) + end + + # mark for string parsing + mark_in = 1 + mark_out = 0 + + # BibTeX form 1: First Second von Last + if length(subnames) == 1 + aux = subnames[1] + mark_out = length(aux) - 1 + last = aux[end] + if length(aux) > 1 && isuppercase(aux[1][1]) + first = aux[1] mark_in += 1 + for s in aux[2:(end-1)] + mark_in += 1 + islowercase(s[1]) && break + middle *= " $s" + end + for s in reverse(aux[mark_in:mark_out]) + islowercase(s[1]) && break + mark_out -= 1 + last = "$s " * last + end + particle = join(aux[mark_in:mark_out], " ") + end + + # BibTeX form 2: von Last, First Second + elseif length(subnames) == 2 + aux = subnames[1] # von Last + mark_out = length(aux) - 1 + last = aux[end] + for s in reverse(aux[1:mark_out]) islowercase(s[1]) && break - middle *= " $s" + mark_out -= 1 + last = "$s " * last end - for s in reverse(aux[mark_in:mark_out]) + particle = join(aux[1:mark_out], " ") + aux = subnames[end] # First Second + first = aux[1] + middle = join(aux[2:end], " ") + + # BibTeX form 3: von Last, Junior, First Second + elseif length(subnames) == 3 + aux = subnames[1] # von Last + mark_out = length(aux) - 1 + last = aux[end] + for s in reverse(aux[1:mark_out]) islowercase(s[1]) && break mark_out -= 1 last = "$s " * last end - particle = join(aux[mark_in:mark_out], " ") - end + particle = join(aux[1:mark_out], " ") + aux = subnames[2] + @assert length(aux) == 1 "malformed junior subname" + junior = aux[1] + aux = subnames[end] # First Second + first = aux[1] + middle = join(aux[2:end], " ") - # BibTeX form 2: von Last, First Second - elseif length(subnames) == 2 - aux = subnames[1] # von Last - mark_out = length(aux) - 1 - last = aux[end] - for s in reverse(aux[1:mark_out]) - islowercase(s[1]) && break - mark_out -= 1 - last = "$s " * last - end - particle = join(aux[1:mark_out], " ") - aux = subnames[end] # First Second - first = aux[1] - middle = join(aux[2:end], " ") - - # BibTeX form 3: von Last, Junior, First Second - elseif length(subnames) == 3 - aux = subnames[1] # von Last - mark_out = length(aux) - 1 - last = aux[end] - for s in reverse(aux[1:mark_out]) - islowercase(s[1]) && break - mark_out -= 1 - last = "$s " * last + else + # TODO: become more strict here in the future? but beware, right + # now this function sometimes gets called with an empty string + #error("too many commas in name '$(str)'") end - particle = join(aux[1:mark_out], " ") - aux = subnames[2] - @assert length(aux) == 1 "malformed junior subname" - junior = aux[1] - aux = subnames[end] # First Second - first = aux[1] - middle = join(aux[2:end], " ") - else - # TODO: become more strict here in the future? but beware, right - # now this function sometimes gets called with an empty string - #error("too many commas in name '$(str)'") end return Name(particle, last, junior, first, middle)