From df08e124cea4f3e42b137a58ea5db7a71154fdd7 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?David=20Rodr=C3=ADguez?= Date: Thu, 7 Oct 2021 11:26:11 +0200 Subject: [PATCH 1/2] Simplify loop to find out segments to be created Doing it this way is simpler and it doesn't end up adding "/" to the list of folders, so it doesn't need to be removed later. --- lib/fileutils.rb | 4 +--- 1 file changed, 1 insertion(+), 3 deletions(-) diff --git a/lib/fileutils.rb b/lib/fileutils.rb index 69de458..7dc46d9 100644 --- a/lib/fileutils.rb +++ b/lib/fileutils.rb @@ -220,12 +220,10 @@ def mkdir_p(list, mode: nil, noop: nil, verbose: nil) end stack = [] - until path == stack.last # dirname("/")=="/", dirname("C:/")=="C:/" + until File.directory?(path) stack.push path path = File.dirname(path) - break if File.directory?(path) end - stack.pop if path == stack.last # root directory should exist stack.reverse_each do |dir| begin fu_mkdir dir, mode From e842a0e70e3267ef5c4f0017d6dbf21b857370e7 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?David=20Rodr=C3=ADguez?= Date: Thu, 7 Oct 2021 11:27:21 +0200 Subject: [PATCH 2/2] Remove counterproductive optimization MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit I think it's debatable which is the most common usage of `FileUtils.mkdir_p`, but even assuming the most common use case is creating a folder when it doesn't previously exist but the parent does, this optimization doesn't seem to have a noticiable effect there while harming other use cases. For benchmarks, I created this script ```ruby require "benchmark/ips" Benchmark.ips do |x| x.report("old mkdir_p - exists") do FileUtils.mkdir_p "/tmp" end x.report("new_mkdir_p - exists") do FileUtils.mkdir_p_new "/tmp" end x.compare! end FileUtils.rm_rf "/tmp/foo" Benchmark.ips do |x| x.report("old mkdir_p - doesnt exist, parent exists") do FileUtils.mkdir_p "/tmp/foo" FileUtils.rm_rf "/tmp/foo" end x.report("new_mkdir_p - doesnt exist, parent exists") do FileUtils.mkdir_p_new "/tmp/foo" FileUtils.rm_rf "/tmp/foo" end x.compare! end Benchmark.ips do |x| x.report("old mkdir_p - doesnt exist, parent either") do FileUtils.mkdir_p "/tmp/foo/bar" FileUtils.rm_rf "/tmp/foo" end x.report("new_mkdir_p - doesnt exist, parent either") do FileUtils.mkdir_p_new "/tmp/foo/bar" FileUtils.rm_rf "/tmp/foo" end x.compare! end Benchmark.ips do |x| x.report("old mkdir_p - more levels") do FileUtils.mkdir_p "/tmp/foo/bar/baz" FileUtils.rm_rf "/tmp/foo" end x.report("new_mkdir_p - more levels") do FileUtils.mkdir_p_new "/tmp/foo/bar/baz" FileUtils.rm_rf "/tmp/foo" end x.compare! end ``` and copied the method with the "optimization" removed as `FileUtils.mkdir_p_new`. The results are as below: ``` Warming up -------------------------------------- old mkdir_p - exists 15.914k i/100ms new_mkdir_p - exists 46.512k i/100ms Calculating ------------------------------------- old mkdir_p - exists 161.461k (± 3.2%) i/s - 811.614k in 5.032315s new_mkdir_p - exists 468.192k (± 2.9%) i/s - 2.372M in 5.071225s Comparison: new_mkdir_p - exists: 468192.1 i/s old mkdir_p - exists: 161461.0 i/s - 2.90x (± 0.00) slower Warming up -------------------------------------- old mkdir_p - doesnt exist, parent exists 2.142k i/100ms new_mkdir_p - doesnt exist, parent exists 1.961k i/100ms Calculating ------------------------------------- old mkdir_p - doesnt exist, parent exists 21.242k (± 6.7%) i/s - 107.100k in 5.069206s new_mkdir_p - doesnt exist, parent exists 19.682k (± 4.2%) i/s - 100.011k in 5.091961s Comparison: old mkdir_p - doesnt exist, parent exists: 21241.7 i/s new_mkdir_p - doesnt exist, parent exists: 19681.7 i/s - same-ish: difference falls within error Warming up -------------------------------------- old mkdir_p - doesnt exist, parent either 945.000 i/100ms new_mkdir_p - doesnt exist, parent either 1.002k i/100ms Calculating ------------------------------------- old mkdir_p - doesnt exist, parent either 9.689k (± 4.4%) i/s - 49.140k in 5.084342s new_mkdir_p - doesnt exist, parent either 10.806k (± 4.6%) i/s - 54.108k in 5.020714s Comparison: new_mkdir_p - doesnt exist, parent either: 10806.3 i/s old mkdir_p - doesnt exist, parent either: 9689.3 i/s - 1.12x (± 0.00) slower Warming up -------------------------------------- old mkdir_p - more levels 702.000 i/100ms new_mkdir_p - more levels 775.000 i/100ms Calculating ------------------------------------- old mkdir_p - more levels 7.046k (± 3.5%) i/s - 35.802k in 5.087548s new_mkdir_p - more levels 7.685k (± 5.5%) i/s - 38.750k in 5.061351s Comparison: new_mkdir_p - more levels: 7685.1 i/s old mkdir_p - more levels: 7046.4 i/s - same-ish: difference falls within error ``` I think it's better to keep the code simpler is the optimization is not so clear like in this case. --- lib/fileutils.rb | 8 -------- 1 file changed, 8 deletions(-) diff --git a/lib/fileutils.rb b/lib/fileutils.rb index 7dc46d9..3333307 100644 --- a/lib/fileutils.rb +++ b/lib/fileutils.rb @@ -211,14 +211,6 @@ def mkdir_p(list, mode: nil, noop: nil, verbose: nil) list.each do |item| path = remove_trailing_slash(item) - # optimize for the most common case - begin - fu_mkdir path, mode - next - rescue SystemCallError - next if File.directory?(path) - end - stack = [] until File.directory?(path) stack.push path