Skip to content

Commit

Permalink
Fix entity name (#21)
Browse files Browse the repository at this point in the history
* Fix the BibTeX import of an entity name (between braces)

* New version
  • Loading branch information
Azzaare authored Nov 18, 2022
1 parent 083f10d commit 3c39efa
Show file tree
Hide file tree
Showing 2 changed files with 64 additions and 57 deletions.
2 changes: 1 addition & 1 deletion Project.toml
Original file line number Diff line number Diff line change
@@ -1,7 +1,7 @@
name = "BibInternal"
uuid = "2027ae74-3657-4b95-ae00-e2f7d55c3e64"
authors = ["azzaare"]
version = "0.3.4"
version = "0.3.5"

[compat]
julia = "1.6"
119 changes: 63 additions & 56 deletions src/entry.jl
Original file line number Diff line number Diff line change
Expand Up @@ -16,6 +16,7 @@ struct Name
first::String
middle::String
end

const Names = Vector{Name}

"""
Expand All @@ -25,80 +26,86 @@ Decompose without ambiguities a name as `particle` (optional) `last`, `junior` (
function Name(str)
@assert !isempty(strip(str)) "Name must not be empty or consist of only whitespace"

# split along commas, then along spaces
subnames = map(split(str, ","; keepempty=false)) do aux
return split(aux, r"[\n\r ]+"; keepempty=false)
end

# subnames containers
first = ""
middle = ""
particle = ""
last = ""
junior = ""

# mark for string parsing
mark_in = 1
mark_out = 0
if (str[1], str[end]) == ('{', '}')
last = str
else

# BibTeX form 1: First Second von Last
if length(subnames) == 1
aux = subnames[1]
mark_out = length(aux) - 1
last = aux[end]
if length(aux) > 1 && isuppercase(aux[1][1])
first = aux[1]
mark_in += 1
for s in aux[2:(end - 1)]
# split along commas, then along spaces
subnames = map(split(str, ","; keepempty=false)) do aux
return split(aux, r"[\n\r ]+"; keepempty=false)
end

# mark for string parsing
mark_in = 1
mark_out = 0

# BibTeX form 1: First Second von Last
if length(subnames) == 1
aux = subnames[1]
mark_out = length(aux) - 1
last = aux[end]
if length(aux) > 1 && isuppercase(aux[1][1])
first = aux[1]
mark_in += 1
for s in aux[2:(end-1)]
mark_in += 1
islowercase(s[1]) && break
middle *= " $s"
end
for s in reverse(aux[mark_in:mark_out])
islowercase(s[1]) && break
mark_out -= 1
last = "$s " * last
end
particle = join(aux[mark_in:mark_out], " ")
end

# BibTeX form 2: von Last, First Second
elseif length(subnames) == 2
aux = subnames[1] # von Last
mark_out = length(aux) - 1
last = aux[end]
for s in reverse(aux[1:mark_out])
islowercase(s[1]) && break
middle *= " $s"
mark_out -= 1
last = "$s " * last
end
for s in reverse(aux[mark_in:mark_out])
particle = join(aux[1:mark_out], " ")
aux = subnames[end] # First Second
first = aux[1]
middle = join(aux[2:end], " ")

# BibTeX form 3: von Last, Junior, First Second
elseif length(subnames) == 3
aux = subnames[1] # von Last
mark_out = length(aux) - 1
last = aux[end]
for s in reverse(aux[1:mark_out])
islowercase(s[1]) && break
mark_out -= 1
last = "$s " * last
end
particle = join(aux[mark_in:mark_out], " ")
end
particle = join(aux[1:mark_out], " ")
aux = subnames[2]
@assert length(aux) == 1 "malformed junior subname"
junior = aux[1]
aux = subnames[end] # First Second
first = aux[1]
middle = join(aux[2:end], " ")

# BibTeX form 2: von Last, First Second
elseif length(subnames) == 2
aux = subnames[1] # von Last
mark_out = length(aux) - 1
last = aux[end]
for s in reverse(aux[1:mark_out])
islowercase(s[1]) && break
mark_out -= 1
last = "$s " * last
end
particle = join(aux[1:mark_out], " ")
aux = subnames[end] # First Second
first = aux[1]
middle = join(aux[2:end], " ")

# BibTeX form 3: von Last, Junior, First Second
elseif length(subnames) == 3
aux = subnames[1] # von Last
mark_out = length(aux) - 1
last = aux[end]
for s in reverse(aux[1:mark_out])
islowercase(s[1]) && break
mark_out -= 1
last = "$s " * last
else
# TODO: become more strict here in the future? but beware, right
# now this function sometimes gets called with an empty string
#error("too many commas in name '$(str)'")
end
particle = join(aux[1:mark_out], " ")
aux = subnames[2]
@assert length(aux) == 1 "malformed junior subname"
junior = aux[1]
aux = subnames[end] # First Second
first = aux[1]
middle = join(aux[2:end], " ")

else
# TODO: become more strict here in the future? but beware, right
# now this function sometimes gets called with an empty string
#error("too many commas in name '$(str)'")
end

return Name(particle, last, junior, first, middle)
Expand Down

0 comments on commit 3c39efa

Please sign in to comment.