demarque · baptistejub · May 16, 2018 · Oct 29, 2020 · Jan 28, 2021 · Feb 4, 2021
diff --git a/README.md b/README.md
@@ -26,18 +26,66 @@ creek = Creek::Book.new 'spec/fixtures/sample.xlsx'
 sheet = creek.sheets[0]
 
 sheet.rows.each do |row|
-  puts row # => {"A1"=>"Content 1", "B1"=>nil, C1"=>nil, "D1"=>"Content 3"}
+  puts row # => ["Content 1", nil, nil, "Content 3"]
+end
+
+sheet.rows(headers: true).each do |row|
+  puts row # => { 'header1' => "Content 1", 'header2' => nil, 'header3' => nil, 'header4' => "Content 3" }
 end
 
 sheet.rows_with_meta_data.each do |row|
-  puts row # => {"collapsed"=>"false", "customFormat"=>"false", "customHeight"=>"true", "hidden"=>"false", "ht"=>"12.1", "outlineLevel"=>"0", "r"=>"1", "cells"=>{"A1"=>"Content 1", "B1"=>nil, C1"=>nil, "D1"=>"Content 3"}}
+  puts row # => {"collapsed"=>"false", "customFormat"=>"false", "customHeight"=>"true", "hidden"=>"false", "ht"=>"12.1", "outlineLevel"=>"0", "r"=>"1", "header_row" => false, "cells"=>["Content 1", nil, nil, "Content 3"]}
+end
+
+sheet.rows_with_meta_data(headers: true).each do |row|
+  puts row # => {"collapsed"=>"false", "customFormat"=>"false", "customHeight"=>"true", "hidden"=>"false", "ht"=>"12.1", "outlineLevel"=>"0", "r"=>"1", "header_row" => false, "cells"=>{ 'header1' => "Content 1", 'header2' => nil, 'header3' => nil, 'header4' => "Content 3" }}
 end
 
 sheet.state   # => 'visible'
 sheet.name    # => 'Sheet1'
 sheet.rid     # => 'rId2'
 ```
 
+## Headers
+`rows` and `rows_with_meta_data` both accept the kwargs `headers` and `header_row_number` to load
+the rows as hash with the headers as keys. Also, a `header_row` boolean is added to the row metadata.
+See examples above.
+
+Headers (as an array) are loaded once by parsing the file a first time until the `header_row_number` is reached.
+Rows are then returned normally as an Enumerator as usual (new Enumerator instance starting from the beginning of the file, that will include header row as well). It's the caller's responsibility to filter the header row as needed.
+
+`extract_headers` can also be called manually from the sheet instance.
+Once extracted, the headers can be accessed through the `headers` attr_reader.
+As headers are matched to their respective value in the row by index, it's possible to modifies the array in `headers` to customize the headers (to fix typo, make them unique, etc.).
+
+```ruby
+creek = Creek::Book.new 'spec/fixtures/sample.xlsx'
+sheet = creek.sheets[0]
+
+# Parse the file up to row 3 (file starts at row 1)
+sheet.extract_headers(3)
+# => ['Header1', 'Other Header', 'More header']
+
+sheet.headers
+# => ['Header1', 'Other Header', 'More header']
+
+# Headers can be modified before parsing the file to customize them
+sheet.headers[0] = 'A better Header'
+sheet.headers
+# => ['A better Header', 'Other Header', 'More header']
+
+# Parse the rows as hashes, including the (modified) headers
+sheet.rows(headers: true).each do |row|
+  puts row # => { 'A better Header' => "Content 1", 'Other Header' => nil, 'More header' => nil }
+end
+
+# Or both can be done directly when accessing rows
+sheet2 = creek.sheets[1]
+sheet2.rows(headers: true, header_row_number: 3).each do |row|
+  puts row # => { 'Header1' => "Content 2", 'Other Header' => nil, 'More header' => nil }
+end
+```
+
 ## Filename considerations
 By default, Creek will ensure that the file extension is either *.xlsx or *.xlsm, but this check can be circumvented as needed:
 
@@ -82,13 +130,6 @@ puts sheet.images_at('C1') # => nil
 
 Creek will most likely return nil for a cell with images if there is no other text cell in that row - you can use *images_at* method for retrieving images in that cell.
 
-## Remote files
-
-```ruby
-remote_url = 'http://dev-builds.libreoffice.org/tmp/test.xlsx'
-Creek::Book.new remote_url, remote: true
-```
-
 ## Contributing
 
 Contributions are welcomed. You can fork a repository, add your code changes to the forked branch, ensure all existing unit tests pass, create new unit tests which cover your new changes and finally create a pull request.

diff --git a/creek.gemspec b/creek.gemspec
@@ -28,5 +28,4 @@ Gem::Specification.new do |spec|
 
   spec.add_dependency 'nokogiri', '>= 1.7.0'
   spec.add_dependency 'rubyzip', '>= 1.0.0'
-  spec.add_dependency 'httparty', '~> 0.15.5'
 end
diff --git a/lib/creek/book.rb b/lib/creek/book.rb
@@ -1,7 +1,6 @@
 require 'zip/filesystem'
 require 'nokogiri'
 require 'date'
-require 'httparty'
 
 module Creek
 
@@ -20,13 +19,7 @@ def initialize path, options = {}
         extension = File.extname(options[:original_filename] || path).downcase
         raise 'Not a valid file format.' unless (['.xlsx', '.xlsm'].include? extension)
       end
-      if options[:remote]
-        zipfile = Tempfile.new("file")
-        zipfile.binmode
-        zipfile.write(HTTParty.get(path).body)
-        zipfile.close
-        path = zipfile.path
-      end
+
       @files = Zip::File.open(path)
       @shared_strings = SharedStrings.new(self)
     end

diff --git a/lib/creek/sheet.rb b/lib/creek/sheet.rb
@@ -11,10 +11,17 @@ class Creek::Sheet
                 :state,
                 :visible,
                 :rid,
-                :index
-
+                :index,
+                :headers
+
+    # An XLS file has only 256 columns, however, an XLSX or XLSM file can contain up to 16384 columns.
+    # This function creates a hash with all valid XLSX column names and associated indices.
+    # Note: load and memoize on demand
+    def self.column_indexes
+      @column_indexes ||= ('A'..'XFD').each_with_index.to_h.freeze
+    end
 
-    def initialize book, name, sheetid, state, visible, rid, sheetfile
+    def initialize(book, name, sheetid, state, visible, rid, sheetfile)
       @book = book
       @name = name
       @sheetid = sheetid
@@ -23,13 +30,10 @@ def initialize book, name, sheetid, state, visible, rid, sheetfile
       @state = state
       @sheetfile = sheetfile
       @images_present = false
+    end
 
-      # An XLS file has only 256 columns, however, an XLSX or XLSM file can contain up to 16384 columns.
-      # This function creates a hash with all valid XLSX column names and associated indices.
-      @excel_col_names = Hash.new
-      ('A'..'XFD').each_with_index do |col_name, index|
-        @excel_col_names[col_name] = index
-      end
+    def column_indexes
+      self.class.column_indexes
     end
 
     ##
@@ -56,15 +60,30 @@ def images_at(cell)
     ##
     # Provides an Enumerator that returns a hash representing each row.
     # The key of the hash is the Cell id and the value is the value of the cell.
-    def rows
-      rows_generator
+    def rows(headers: false, header_row_number: 1, metadata: false)
+      extract_headers(header_row_number) if headers
+
+      rows_generator(include_headers: headers, include_meta_data: metadata)
     end
 
     ##
     # Provides an Enumerator that returns a hash representing each row.
     # The hash contains meta data of the row and a 'cells' embended hash which contains the cell contents.
-    def rows_with_meta_data
-      rows_generator true
+    def rows_with_meta_data(headers: false, header_row_number: 1)
+      rows(headers: headers, header_row_number: header_row_number, metadata: true)
+    end
+
+    # Parses the file until the header row is reached.
+    # Returns the headers as an array.
+    def extract_headers(row_number = 1)
+      return @headers if defined?(@headers)
+
+      # Extracted row numbers are String, convert it here to facilite comparison
+      @header_row_number = row_number.to_s
+
+      rows_with_meta_data.each do |row|
+        return (@headers = row['cells'].any? && row['cells']) if @header_row_number == row['r']
+      end
     end
 
     private
@@ -79,52 +98,65 @@ def rows_with_meta_data
     TEXT = 't'.freeze
 
     ##
-    # Returns a hash per row that includes the cell ids and values.
-    # Empty cells will be also included in the hash with a nil value.
-    def rows_generator include_meta_data=false
-      path = if @sheetfile.start_with? "/xl/" or @sheetfile.start_with? "xl/" then @sheetfile else "xl/#{@sheetfile}" end
+    # Returns an array or hash (with headers as key) per row that includes the cell ids and values.
+    # Empty cells will be also included with a nil value.
+    def rows_generator(include_meta_data: false, include_headers: false)
+      path =
+        if @sheetfile.start_with?("/xl/") || @sheetfile.start_with?("xl/")
+          @sheetfile
+        else
+          "xl/#{@sheetfile}"
+        end
+
       if @book.files.file.exist?(path)
         # SAX parsing, Each element in the stream comes through as two events:
         # one to open the element and one to close it.
         opener = Nokogiri::XML::Reader::TYPE_ELEMENT
         closer = Nokogiri::XML::Reader::TYPE_END_ELEMENT
+
         Enumerator.new do |y|
-          row, cells, cell = nil, {}, nil
+          row, cells, cell = nil, [], nil
           row_number = nil
           cell_type  = nil
           cell_style_idx = nil
+
           @book.files.file.open(path) do |xml|
             Nokogiri::XML::Reader.from_io(xml).each do |node|
               node_name = node.name
-              next unless node_name == CELL || node_name == ROW || node_name == VALUE || node_name == TEXT
+              next if node.node_type != opener && node_name != ROW
+
               if node_name == ROW
                 case node.node_type
-                when opener then
+                when opener
                   row = node.attributes
                   row_number = row[ROW_NUMBER]
-                  if spans = row['spans']
+
+                  if (spans = row['spans'])
                     spans = spans.split(":").last.to_i - 1
                   else
                     spans = 0
                   end
+
                   cells = Array.new(spans)
-                  row['cells'] = cells
-                  y << (include_meta_data ? row : cells) if node.self_closing?
+
+                  if node.self_closing?
+                    y << to_formatted_row(row, cells, include_meta_data, include_headers)
+                  end
                 when closer
-                  y << (include_meta_data ? row : cells)
+                  y << to_formatted_row(row, cells, include_meta_data, include_headers)
                 end
-              elsif (node_name == CELL) && node.node_type == opener
+              elsif node_name == CELL
                 attributes = node.attributes
                 cell_type      = attributes[CELL_TYPE]
                 cell_style_idx = attributes[STYLE_INDEX]
                 cell           = attributes[CELL_REF]
-              elsif node_name == VALUE && node.node_type == opener
+              elsif node_name == VALUE
                 if cell
-                  cells[@excel_col_names[cell.sub(row_number, '')]] = convert(node.inner_xml, cell_type, cell_style_idx)
+                  cells[column_indexes[cell.sub(row_number, '')]] = convert(node.inner_xml, cell_type, cell_style_idx)
                 end
-              elsif node_name == TEXT && node.node_type == opener
+              elsif node_name == TEXT
                 if cell
-                  cells[@excel_col_names[cell.sub(row_number, '')]] = convert(node.inner_xml, cell_type, cell_style_idx)
+                  cells[column_indexes[cell.sub(row_number, '')]] = convert(node.inner_xml, cell_type, cell_style_idx)
                 end
               end
             end
@@ -133,6 +165,24 @@ def rows_generator include_meta_data=false
       end
     end
 
+    def to_formatted_row(row, cells, include_meta_data, include_headers)
+      if include_headers
+        row['header_row'] = row[ROW_NUMBER] == @header_row_number
+        cells = cells_with_headers(cells) if @headers
+      end
+
+      if include_meta_data
+        row['cells'] = cells
+        row
+      else
+        cells
+      end
+    end
+
+    def cells_with_headers(cells)
+      cells.empty? ? {} : @headers.zip(cells).to_h
+    end
+
     def convert(value, type, style_idx)
       style = @book.style_types[style_idx.to_i]
       Creek::Styles::Converter.call(value, type, style, converter_options)

diff --git a/lib/creek/styles/constants.rb b/lib/creek/styles/constants.rb
@@ -36,9 +36,6 @@ module Constants
         48 => :bignum,         # ##0.0E+0
         49 => :unsupported     # @
       }
-
-      DATE_SYSTEM_1900 = 25569 #Date.new(1899, 12, 30)
-      DATE_SYSTEM_1904 = Date.new(1904, 1, 1)
     end
   end
 end
diff --git a/lib/creek/styles/converter.rb b/lib/creek/styles/converter.rb
@@ -60,8 +60,10 @@ def self.call(value, type, style, options = {})
           value.to_i
         when :float, :percentage
           value.to_f
-        when :date, :time, :date_time
+        when :date
           convert_date(value, options)
+        when :time, :date_time
+          convert_datetime(value, options)
         when :bignum
           convert_bignum(value)
 
@@ -71,21 +73,25 @@ def self.call(value, type, style, options = {})
         end
       end
 
-      # the trickiest. note that  all these formats can vary on
-      # whether they actually contain a date, time, or datetime.
       def self.convert_date(value, options)
-        value = value.to_f
+        base_date(options) + value.to_i
+      end
 
-        Time.at(((value - DATE_SYSTEM_1900) * 86400).round)
+      def self.convert_datetime(value, options)
+        base_date(options).to_datetime + value.to_f.round(6)
       end
 
       def self.convert_bignum(value)
         if defined?(BigDecimal)
-          BigDecimal.new(value)
+          BigDecimal(value)
         else
           value.to_f
         end
       end
+
+      def self.base_date(options)
+        options.fetch(:base_date, Date.new(1899, 12, 30))
+      end
     end
   end
 end
diff --git a/spec/fixtures/sample_dates.xlsx b/spec/fixtures/sample_dates.xlsx
diff --git a/spec/fixtures/sheets/sample_dates.xlsx b/spec/fixtures/sheets/sample_dates.xlsx
diff --git a/spec/fixtures/sheets/single_data_programme.xlsx b/spec/fixtures/sheets/single_data_programme.xlsx
diff --git a/spec/styles/converter_spec.rb b/spec/styles/converter_spec.rb
@@ -3,13 +3,20 @@
 describe Creek::Styles::Converter do
 
   describe :call do
+
     def convert(value, type, style)
       Creek::Styles::Converter.call(value, type, style)
     end
 
+    describe :date do
+      it "works" do
+        expect(convert('41275', 'n', :date)).to eq(Date.new(2013,01,01))
+      end
+    end
+
     describe :date_time do
       it "works" do
-        expect(convert('41275', 'n', :date_time)).to eq(Date.new(2013,01,01))
+        expect(convert('41275', 'n', :date_time)).to eq(DateTime.new(2013,01,01))
       end
     end
   end