Skip to content

Commit

Permalink
Improve performance of Path#dirname and Path#extension (#11001)
Browse files Browse the repository at this point in the history
  • Loading branch information
BlobCodes authored Sep 30, 2021
1 parent 4264636 commit f90d914
Showing 1 changed file with 48 additions and 66 deletions.
114 changes: 48 additions & 66 deletions src/path.cr
Original file line number Diff line number Diff line change
Expand Up @@ -192,40 +192,40 @@ struct Path
# Path["/foo/bar/file.cr"].dirname # => "/foo/bar"
# ```
def dirname : String
reader = Char::Reader.new(at_end: @name)
separators = self.separators
# skip trailing separators
while separators.includes?(reader.current_char) && reader.pos > 0
reader.previous_char
end

# skip last component
while !separators.includes?(reader.current_char) && reader.pos > 0
reader.previous_char
end

if reader.pos == 0 && !separators.includes?(reader.current_char)
if windows? && windows_drive?
return anchor.to_s
else
return "."
return "." if @name.empty?
slice = @name.to_slice
sep = self.separators.map &.ord
pos = slice.size - 1
stage = 0

slice.reverse_each do |byte|
is_separator = byte.in? sep
# The stages are ordered like this to improve performance
# Trailing separators are possible but unlikely (stage 0)
# There will probably only be one separator between filename and dirname (stage 2)
# There will probably be multiple characters in the filename which need to be skipped (stage 1)
case stage
when 1 # Wait until separator
stage += 1 if is_separator
when 2 # Remove trailing separators
break unless is_separator
when 0 # Wait until past trailing separators
stage += 1 unless is_separator
end
pos -= 1
end

# strip trailing separators
while separators.includes?(reader.current_char) && reader.pos > 0
reader.previous_char
end

if reader.pos == 0
return reader.current_char.to_s
end

if windows? && reader.current_char == ':' && reader.pos == 1 && (anchor = self.anchor)
return anchor.to_s
case stage
when 0 # Path only consists of separators
String.new(slice[0, 1])
when 1 # Path has no parent (ex. "hello/", "C:/", "crystal")
return anchor.to_s if windows? && windows_drive?
"."
else # Path has a parent (ex. "a/a", "/home/user//", "C://Users/mmm")
return String.new(slice[0, 1]) if pos == -1
return anchor.to_s if windows? && pos == 1 && slice.unsafe_fetch(pos) === ':' && (anchor = self.anchor)
String.new(slice[0, pos + 1])
end

@name.byte_slice(0, reader.pos + reader.current_char_width)
end

# Returns the parent path of this path.
Expand Down Expand Up @@ -351,49 +351,31 @@ struct Path
# Path["foo.tar.gz"].extension # => ".gz"
# ```
def extension : String
return "" if @name.bytesize < 3
bytes = @name.to_slice

return "" if bytes.empty?

slice_end = bytes.size
current = slice_end - 1

separators = self.separators.map &.ord

# ignore trailing separators
while separators.includes?(bytes[current]) && current > 0
current -= 1
slice_end -= 1
# Ignore trailing separators
offset = bytes.size - 1
while bytes.unsafe_fetch(offset).in? separators
return "" if offset == 0
offset -= 1
end

# if the pattern is `foo.`, it has no extension
return "" if bytes[current] == '.'.ord

# position the reader at the last `.` or SEPARATOR
# that is not the first char
while !separators.includes?(bytes[current]) &&
bytes[current] != '.'.ord &&
current > 0
current -= 1
end

# if we are the beginning of the string there is no extension
# `/foo` and `.foo` have no extension
return "" unless current > 0

# otherwise we are not at the beginning, and there is a previous char.
# if current is '/', then the pattern is prefix/foo and has no extension
return "" if separators.includes?(bytes[current])
# Get the first occurrence of a separator or a '.' past the trailing separators
dot_index = bytes.rindex(offset: offset) { |byte| byte === '.' || byte.in? separators }

# otherwise the current_char is '.'
# if previous is '/', then the pattern is `prefix/.foo` and has no extension
return "" if separators.includes?(bytes[current - 1])
# Return "" if '.' is the first character (ex. ".dotfile"),
# or if the '.' character follows after a separator (ex. "pathto/.dotfile")
# or if the character at the returned index is a separator (ex. "no/extension")
# or if the filename ends with a '.'
return "" unless dot_index
return "" if dot_index == 0
return "" if dot_index == offset
return "" if bytes.unsafe_fetch(dot_index - 1).in?(separators)
return "" if bytes.unsafe_fetch(dot_index).in?(separators)

# So the current char is '.',
# we are not at the beginning,
# the previous char is not a '/',
# and we have an extension
String.new(bytes[current, slice_end - current])
String.new(bytes[dot_index, offset - dot_index + 1])
end

# Returns the last component of this path without the extension.
Expand Down

0 comments on commit f90d914

Please sign in to comment.