From c2bb7b8614f4ff1dff6b7bdbda0ded125ae549c7 Mon Sep 17 00:00:00 2001 From: Ben Woosley Date: Sat, 22 Nov 2014 14:59:05 -0800 Subject: [PATCH] Move Excelx cell management into Excelx::Sheet, and the new Excelx::Cell. Move the existing SharedStrings, Styles, and Workbook management into new extractor classes. --- lib/roo/excelx.rb | 741 ++++++++++++++----------------- lib/roo/excelx/extractor.rb | 20 + lib/roo/excelx/shared_strings.rb | 34 ++ lib/roo/excelx/styles.rb | 61 +++ lib/roo/excelx/workbook.rb | 52 +++ test/test_roo.rb | 4 +- 6 files changed, 506 insertions(+), 406 deletions(-) create mode 100644 lib/roo/excelx/extractor.rb create mode 100644 lib/roo/excelx/shared_strings.rb create mode 100644 lib/roo/excelx/styles.rb create mode 100644 lib/roo/excelx/workbook.rb diff --git a/lib/roo/excelx.rb b/lib/roo/excelx.rb index 64637a02..6353caf5 100644 --- a/lib/roo/excelx.rb +++ b/lib/roo/excelx.rb @@ -3,6 +3,10 @@ require 'roo/link' class Roo::Excelx < Roo::Base + autoload :Workbook, 'roo/excelx/workbook' + autoload :SharedStrings, 'roo/excelx/shared_strings' + autoload :Styles, 'roo/excelx/styles' + module Format EXCEPTIONAL_FORMATS = { 'h:mm am/pm' => :date, @@ -64,44 +68,256 @@ def to_type(format) module_function :to_type end + class Cell + attr_reader :type, :formula, :value, :excelx_type, :excelx_value, :style, :hyperlink + + def initialize(value, type, formula, excelx_type, excelx_value, style, hyperlink, base_date) + @type = type + @formula = formula + @base_date = base_date if [:date, :datetime].include?(@type) + @excelx_type = excelx_type + @excelx_value = excelx_value + @style = style + @value = type_cast_value(value) + if hyperlink + @value = Roo::Link.new(hyperlink, @value.to_s) + end + end + + def type + if @formula + :formula + elsif @value.is_a?(Roo::Link) + :link + else + @type + end + end + + private + + def type_cast_value(value) + case @type + when :float, :percentage + value.to_f + when :date + yyyy,mm,dd = (@base_date+value.to_i).strftime("%Y-%m-%d").split('-') + Date.new(yyyy.to_i,mm.to_i,dd.to_i) + when :datetime + create_datetime_from((@base_date+value.to_f.round(6)).strftime("%Y-%m-%d %H:%M:%S.%N")) + when :time + value.to_f*(24*60*60) + when :string + value + else + value + end + end + + def create_datetime_from(datetime_string) + date_part,time_part = round_time_from(datetime_string).split(' ') + yyyy,mm,dd = date_part.split('-') + hh,mi,ss = time_part.split(':') + DateTime.civil(yyyy.to_i,mm.to_i,dd.to_i,hh.to_i,mi.to_i,ss.to_i) + end + + def round_time_from(datetime_string) + date_part,time_part = datetime_string.split(' ') + yyyy,mm,dd = date_part.split('-') + hh,mi,ss = time_part.split(':') + Time.new(yyyy.to_i, mm.to_i, dd.to_i, hh.to_i, mi.to_i, ss.to_r).round(0).strftime("%Y-%m-%d %H:%M:%S") + end + end + class Sheet - def initialize(name, rels_doc, sheet_doc, comments_doc) + def initialize(name, rels_doc, sheet_doc, comments_doc, styles, shared_strings, workbook) @name = name @rels_doc = rels_doc @sheet_doc = sheet_doc @comments_doc = comments_doc + @styles = styles + @shared_strings = shared_strings + @workbook = workbook + end + + def cells + @cells ||= Hash[@sheet_doc.xpath("/worksheet/sheetData/row/c").map do |cell_xml| + key = ref_to_key(cell_xml['r']) + [key, cell_from_xml(cell_xml, hyperlinks[key])] + end] + end + + def present_cells + @present_cells ||= cells.select {|key, cell| cell && cell.value } + end + + def row(row_number) + first_column.upto(last_column).map do |col| + cells[[row_number,col]] + end.map {|cell| cell && cell.value } + end + + def column(col_number) + first_row.upto(last_row).map do |row| + cells[[row,col_number]] + end.map {|cell| cell && cell.value } + end + + # returns the number of the first non-empty row + def first_row + @first_row ||= present_cells.keys.map {|row, col| row }.min + end + + def last_row + @last_row ||= present_cells.keys.map {|row, col| row }.max + end + + # returns the number of the first non-empty column + def first_column(sheet=nil) + @first_column ||= present_cells.keys.map {|row, col| col }.min end - def comment(key) - comments[key] + # returns the number of the last non-empty column + def last_column(sheet=nil) + @last_column ||= present_cells.keys.map {|row, col| col }.max + end + + def excelx_format(key) + @styles.style_format(cells[key].style).to_s end def comments @comments ||= if @comments_doc Hash[@comments_doc.xpath("//comments/commentList/comment").map do |comment| - [ref_to_key(comment), comment.at_xpath('./text/r/t').text ] + [ref_to_key(comment.attributes['ref'].to_s), comment.at_xpath('./text/r/t').text] end] else {} end end - def hyperlink(key) - hyperlinks[key] + private + +=begin +Datei xl/comments1.xml + + + + + + + + + + + + + + + Kommentar fuer B4 + + + + + + + + + + + + Kommentar fuer B5 + + + + + +=end +=begin + if @comments_doc[self.sheets.index(sheet)] + read_comments(sheet) end +=end - private + def cell_from_xml(cell_xml, hyperlink) + style = cell_xml['s'].to_i # should be here + # c: + # 22606 + # , format: , tmp_type: float + value_type = + case cell_xml['t'] + when 's' + :shared + when 'b' + :boolean + # 2011-02-25 BEGIN + when 'str' + :string + # 2011-02-25 END + # 2011-09-15 BEGIN + when 'inlineStr' + :inlinestr + # 2011-09-15 END + else + format = @styles.style_format(style) + Format.to_type(format) + end + formula = nil + cell_xml.children.each do |cell| + case cell.name + when 'is' + cell.children.each do |inline_str| + if inline_str.name == 't' + return Cell.new(inline_str.content,:string,formula,:string,inline_str.content,style, hyperlink, @workbook.base_date) + end + end + when 'f' + formula = cell.content + when 'v' + if [:time, :datetime].include?(value_type) && cell.content.to_f >= 1.0 + value_type = + if (cell.content.to_f - cell.content.to_f.floor).abs > 0.000001 + :datetime + else + :date + end + end + excelx_type = [:numeric_or_formula,format.to_s] + value = + case value_type + when :shared + value_type = :string + excelx_type = :string + @shared_strings[cell.content.to_i] + when :boolean + (cell.content.to_i == 1 ? 'TRUE' : 'FALSE') + when :date, :time, :datetime + cell.content + when :formula + cell.content.to_f + when :string + excelx_type = :string + cell.content + else + value_type = :float + cell.content + end + return Cell.new(value,value_type,formula,excelx_type,cell.content,style, hyperlink, @workbook.base_date) + end + end + nil + end - def ref_to_key(element) - Roo::Base.split_coordinate(element.attributes['ref'].to_s) + def ref_to_key(ref) + Roo::Base.split_coordinate(ref) end def hyperlinks @hyperlinks ||= Hash[@sheet_doc.xpath("/worksheet/hyperlinks/hyperlink").map do |hyperlink| if hyperlink.attribute('id') && relationship = relationships[hyperlink.attribute('id').text] - [ref_to_key(hyperlink), relationship.attribute('Target').text] + [ref_to_key(hyperlink.attributes['ref'].to_s), relationship.attribute('Target').text] end end.compact] end @@ -116,6 +332,13 @@ def relationships end end + + class << self + def load_xml(path) + super.remove_namespaces! + end + end + # initialization and opening of a spreadsheet file # values for packed: :zip def initialize(filename, options = {}) @@ -129,25 +352,18 @@ def initialize(filename, options = {}) @comments_files = [] @rels_files = [] process_zipfile(@tmpdir, @filename) + @sheet_doc = load_xmls(@sheet_files) @comments_doc = load_xmls(@comments_files) @rels_doc = load_xmls(@rels_files) + @sheets = [] super(filename, options) - @formula = {} - @excelx_type = {} - @excelx_value = {} - @style = {} - end - - def method_missing(m,*args) - # is method name a label name - read_labels - if @label.has_key?(m.to_s) - sheet ||= default_sheet - read_cells(sheet) - row,col = label(m.to_s) - cell(row,col) + end + + def method_missing(method,*args) + if label = workbook.defined_names[method.to_s] + sheet_for(label.sheet).cells[label.key].value else # call super for methods like #a1 super @@ -159,7 +375,8 @@ def sheet_for(sheet) validate_sheet!(sheet) n = self.sheets.index(sheet) - Sheet.new(sheet, @rels_doc[n], @sheet_doc[n], @comments_doc[n]) + @sheets[n] ||= + Sheet.new(sheet, @rels_doc[n], @sheet_doc[n], @comments_doc[n], styles, shared_strings, workbook) end # Returns the content of a spreadsheet-cell. @@ -167,70 +384,80 @@ def sheet_for(sheet) # (1,1), (1,'A'), ('A',1), ('a',1) all refers to the # cell at the first line and first row. def cell(row, col, sheet=nil) - sheet ||= default_sheet - sheet_object = sheet_for(sheet) - read_cells(sheet) - row,col = key = normalize(row,col) - case celltype(row,col,sheet) - when :date - yyyy,mm,dd = @cell[sheet][key].split('-') - Date.new(yyyy.to_i,mm.to_i,dd.to_i) - when :datetime - create_datetime_from(@cell[sheet][key]) - when :link - Roo::Link.new(sheet_object.hyperlink(key), @cell[sheet][key].to_s) - else - @cell[sheet][key] + key = normalize(row,col) + cell = sheet_for(sheet).cells[key] + cell.value if cell + end + + def row(rownumber,sheet=nil) + sheet_for(sheet).row(rownumber) + end + + # returns all values in this column as an array + # column numbers are 1,2,3,... like in the spreadsheet + def column(column_number,sheet=nil) + if column_number.class == String + column_number = self.class.letter_to_number(column_number) end + sheet_for(sheet).column(column_number) + end + + # returns the number of the first non-empty row + def first_row(sheet=nil) + sheet_for(sheet).first_row + end + + # returns the number of the last non-empty row + def last_row(sheet=nil) + sheet_for(sheet).last_row end + # returns the number of the first non-empty column + def first_column(sheet=nil) + sheet_for(sheet).first_column + end + + # returns the number of the last non-empty column + def last_column(sheet=nil) + sheet_for(sheet).last_column + end + + # set a cell to a certain value + # (this will not be saved back to the spreadsheet file!) + def set(row,col,value,sheet=nil) #:nodoc: + key = normalize(row,col) + cell_type = + case value + when Fixnum then :float + when String, Float then :string + else + raise ArgumentError, "Type for #{value} not set" + end + sheet_for(sheet).cells[key] = Cell.new(value, cell_type, nil, cell_type, value, nil, nil, nil) + end + + # Returns the formula at (row,col). # Returns nil if there is no formula. # The method #formula? checks if there is a formula. def formula(row,col,sheet=nil) - sheet ||= default_sheet - read_cells(sheet) - row,col = normalize(row,col) - @formula[sheet][[row,col]] && @formula[sheet][[row,col]] + key = normalize(row,col) + sheet_for(sheet).cells[key].formula end alias_method :formula?, :formula # returns each formula in the selected sheet as an array of elements # [row, col, formula] def formulas(sheet=nil) - sheet ||= default_sheet - read_cells(sheet) - if @formula[sheet] - @formula[sheet].map do |coord, formula| - [coord[0], coord[1], formula] - end - else - [] - end - end - - class Font - attr_accessor :bold, :italic, :underline - - def bold? - @bold == true - end - - def italic? - @italic == true - end - - def underline? - @underline == true + sheet_for(sheet).cells.select {|key, cell| cell.formula }.map do |(x, y), cell| + [x, y, cell.formula] end end # Given a cell, return the cell's style def font(row, col, sheet=nil) - sheet ||= default_sheet - read_cells(sheet) - row,col = normalize(row,col) - style_definitions[@style[sheet][[row,col]].to_i] + key = normalize(row,col) + styles.definitions[sheet_for(sheet).cells[key].style] end # returns the type of a cell: @@ -242,18 +469,8 @@ def font(row, col, sheet=nil) # * :time # * :datetime def celltype(row,col,sheet=nil) - sheet ||= default_sheet - read_cells(sheet) - sheet_object = sheet_for(sheet) - - key = normalize(row,col) - if @formula[sheet][key] - :formula - elsif sheet_object.hyperlink(key) - :link - else - @cell_type[sheet][key] - end + key = normalize(row, col) + sheet_for(sheet).cells[key].type end # returns the internal type of an excel cell @@ -261,68 +478,66 @@ def celltype(row,col,sheet=nil) # * :string # Note: this is only available within the Excelx class def excelx_type(row,col,sheet=nil) - sheet ||= default_sheet - read_cells(sheet) - row,col = normalize(row,col) - @excelx_type[sheet][[row,col]] + key = normalize(row,col) + sheet_for(sheet).cells[key].excelx_type end # returns the internal value of an excelx cell # Note: this is only available within the Excelx class def excelx_value(row,col,sheet=nil) - sheet ||= default_sheet - read_cells(sheet) - row,col = normalize(row,col) - @excelx_value[sheet][[row,col]] + key = normalize(row,col) + sheet_for(sheet).cells[key].excelx_value end # returns the internal format of an excel cell def excelx_format(row,col,sheet=nil) - sheet ||= default_sheet - read_cells(sheet) - row,col = normalize(row,col) - style_format(@style[sheet][[row,col]]).to_s + key = normalize(row,col) + sheet_for(sheet).excelx_format(key) end # returns an array of sheet names in the spreadsheet def sheets - workbook_doc.xpath("//sheet").map do |sheet| + workbook.sheets.map do |sheet| sheet['name'] end end + def empty?(row,col,sheet=nil) + sheet = sheet_for(sheet) + key = normalize(row,col) + cell = sheet.cells[key] + !cell || !cell.value || (cell.type == :string && cell.value.empty?) \ + || (row < sheet.first_row || row > sheet.last_row || col < sheet.first_column || col > sheet.last_column) + + end + # shows the internal representation of all cells # for debugging purposes def to_s(sheet=nil) - sheet ||= default_sheet - read_cells(sheet) - @cell[sheet].inspect + sheet_for(sheet).cells.inspect end # returns the row,col values of the labelled cell # (nil,nil) if label is not defined - def label(labelname) - read_labels - if @label.empty? || !@label.has_key?(labelname) + def label(name) + labels = workbook.defined_names + if labels.empty? || !labels.key?(name) [nil,nil,nil] else - [@label[labelname][1].to_i, - self.class.letter_to_number(@label[labelname][2]), - @label[labelname][0]] + [labels[name].row, + labels[name].col, + labels[name].sheet] end end # Returns an array which all labels. Each element is an array with # [labelname, [row,col,sheetname]] def labels - # sheet ||= default_sheet - # read_cells(sheet) - read_labels - @label.map do |label| - [ label[0], # name - [ label[1][1].to_i, # row - self.class.letter_to_number(label[1][2]), # column - label[1][0], # sheet + @labels ||= workbook.defined_names.map do |name, label| + [ name, + [ label.row, + label.col, + label.sheet, ] ] end end @@ -335,14 +550,14 @@ def hyperlink?(row,col,sheet=nil) # nil if there is no hyperlink def hyperlink(row,col,sheet=nil) key = normalize(row,col) - sheet_for(sheet).hyperlink(key) + sheet_for(sheet).hyperlinks[key] end # returns the comment at (row/col) # nil if there is no comment def comment(row,col,sheet=nil) key = normalize(row,col) - sheet_for(sheet).comment(key) + sheet_for(sheet).comments[key] end # true, if there is a comment @@ -358,198 +573,10 @@ def comments(sheet=nil) private - def workbook_doc - @workbook_doc ||= load_xml(File.join(@tmpdir, "roo_workbook.xml")) - end - - def load_xml(path) - super.remove_namespaces! - end - def load_xmls(paths) paths.compact.map do |item| - load_xml(item).remove_namespaces! - end - end - - # helper function to set the internal representation of cells - def set_cell_values(sheet,x,y,i,v,value_type,formula, - excelx_type=nil, - excelx_value=nil, - style=nil) - key = [y,x+i] - @cell_type[sheet] ||= {} - @cell_type[sheet][key] = value_type - @formula[sheet] ||= {} - @formula[sheet][key] = formula if formula - @cell[sheet] ||= {} - @cell[sheet][key] = - case @cell_type[sheet][key] - when :float - v.to_f - when :string - v - when :date - (base_date+v.to_i).strftime("%Y-%m-%d") - when :datetime - (base_date+v.to_f.round(6)).strftime("%Y-%m-%d %H:%M:%S.%N") - when :percentage - v.to_f - when :time - v.to_f*(24*60*60) - else - v - end - - @excelx_type[sheet] ||= {} - @excelx_type[sheet][key] = excelx_type - @excelx_value[sheet] ||= {} - @excelx_value[sheet][key] = excelx_value - @style[sheet] ||= {} - @style[sheet][key] = style - end - - def read_cell_from_xml(sheet, cell_xml) - style = cell_xml['s'].to_i # should be here - # c: - # 22606 - # , format: , tmp_type: float - value_type = - case cell_xml['t'] - when 's' - :shared - when 'b' - :boolean - # 2011-02-25 BEGIN - when 'str' - :string - # 2011-02-25 END - # 2011-09-15 BEGIN - when 'inlineStr' - :inlinestr - # 2011-09-15 END - else - format = style_format(style) - Format.to_type(format) - end - formula = nil - cell_xml.children.each do |cell| - case cell.name - when 'is' - cell.children.each do |is| - if is.name == 't' - inlinestr_content = is.content - value_type = :string - v = inlinestr_content - excelx_type = :string - y, x = self.class.split_coordinate(cell_xml['r']) - excelx_value = inlinestr_content #cell.content - set_cell_values(sheet,x,y,0,v,value_type,formula,excelx_type,excelx_value,style) - end - end - when 'f' - formula = cell.content - when 'v' - if [:time, :datetime].include?(value_type) && cell.content.to_f >= 1.0 - value_type = - if (cell.content.to_f - cell.content.to_f.floor).abs > 0.000001 - :datetime - else - :date - end - end - excelx_type = [:numeric_or_formula,format.to_s] - excelx_value = cell.content - v = - case value_type - when :shared - value_type = :string - excelx_type = :string - shared_strings[cell.content.to_i] - when :boolean - (cell.content.to_i == 1 ? 'TRUE' : 'FALSE') - when :date - cell.content - when :time - cell.content - when :datetime - cell.content - when :formula - cell.content.to_f #TODO: !!!! - when :string - excelx_type = :string - cell.content - else - value_type = :float - cell.content - end - y, x = self.class.split_coordinate(cell_xml['r']) - set_cell_values(sheet,x,y,0,v,value_type,formula,excelx_type,excelx_value,style) - end - end - end - - # read all cells in the selected sheet - def read_cells(sheet=nil) - sheet ||= default_sheet - validate_sheet!(sheet) - return if @cells_read[sheet] - - @sheet_doc[sheets.index(sheet)].xpath("/worksheet/sheetData/row/c").each do |c| - read_cell_from_xml(sheet, c) - end - @cells_read[sheet] = true - # begin comments -=begin -Datei xl/comments1.xml - - - - - - - - - - - - - - - Kommentar fuer B4 - - - - - - - - - - - - Kommentar fuer B5 - - - - - -=end -=begin - if @comments_doc[self.sheets.index(sheet)] - read_comments(sheet) + load_xml(item) end -=end - #end comments - end - - def read_labels - @label ||= Hash[workbook_doc.xpath("//definedName").map do |defined_name| - # "Sheet1!$C$5" - sheet, coordinates = defined_name.text.split('!$', 2) - col,row = coordinates.split('$') - [defined_name['name'], [sheet,row,col]] - end] end # Extracts all needed files from the zip file @@ -595,109 +622,15 @@ def process_zipfile(tmpdir, zipfilename) end end - def shared_strings - @shared_strings ||= - if File.exist?(shared_strings_path) - # read the shared strings xml document - xml = load_xml(shared_strings_path) - xml.xpath("/sst/si").map do |si| - shared_string = '' - si.children.each do |elem| - case elem.name - when 'r' - elem.children.each do |r_elem| - if r_elem.name == 't' - shared_string << r_elem.content - end - end - when 't' - shared_string = elem.content - end - end - shared_string - end - else - [] - end + def styles + @styles ||= Styles.new(File.join(@tmpdir, 'roo_styles.xml')) end - def shared_strings_path - @shared_strings_path ||= File.join(@tmpdir, 'roo_sharedStrings.xml') - end - - ##### STYLES - def style_definitions - @style_definitions ||= styles_doc.xpath("//cellXfs").flat_map do |xfs| - xfs.children.map do |xf| - fonts[xf['fontId'].to_i] - end - end - end - - def num_fmt_ids - @num_fmt_ids ||= styles_doc.xpath("//cellXfs").flat_map do |xfs| - xfs.children.map do |xf| - xf['numFmtId'] - end - end - end - - def num_fmts - @num_fmts ||= Hash[styles_doc.xpath("//numFmt").map do |num_fmt| - [num_fmt['numFmtId'], num_fmt['formatCode']] - end] - end - - def fonts - @fonts ||= styles_doc.xpath("//fonts/font").map do |font_el| - Font.new.tap do |font| - font.bold = !font_el.xpath('./b').empty? - font.italic = !font_el.xpath('./i').empty? - font.underline = !font_el.xpath('./u').empty? - end - end - end - - def styles_doc - @styles_doc ||= - if File.exist?(File.join(@tmpdir, 'roo_styles.xml')) - load_xml(File.join(@tmpdir, 'roo_styles.xml')) - end - end - - # convert internal excelx attribute to a format - def style_format(style) - id = num_fmt_ids[style.to_i] - num_fmts[id] || Format::STANDARD_FORMATS[id.to_i] - end - ###### END STYLES - - def base_date - @base_date ||= - begin - # Default to 1900 (minus one day due to excel quirk) but use 1904 if - # it's set in the Workbook's workbookPr - # http://msdn.microsoft.com/en-us/library/ff530155(v=office.12).aspx - workbook_doc.css("workbookPr[date1904]").each do |workbookPr| - if workbookPr["date1904"] =~ /true|1/i - return Date.new(1904,01,01) - end - end - Date.new(1899,12,30) - end - end - - def create_datetime_from(datetime_string) - date_part,time_part = round_time_from(datetime_string).split(' ') - yyyy,mm,dd = date_part.split('-') - hh,mi,ss = time_part.split(':') - DateTime.civil(yyyy.to_i,mm.to_i,dd.to_i,hh.to_i,mi.to_i,ss.to_i) + def shared_strings + @shared_strings ||= SharedStrings.new(File.join(@tmpdir, 'roo_sharedStrings.xml')) end - def round_time_from(datetime_string) - date_part,time_part = datetime_string.split(' ') - yyyy,mm,dd = date_part.split('-') - hh,mi,ss = time_part.split(':') - Time.new(yyyy.to_i, mm.to_i, dd.to_i, hh.to_i, mi.to_i, ss.to_r).round(0).strftime("%Y-%m-%d %H:%M:%S") + def workbook + @workbook ||= Workbook.new(File.join(@tmpdir, "roo_workbook.xml")) end end diff --git a/lib/roo/excelx/extractor.rb b/lib/roo/excelx/extractor.rb new file mode 100644 index 00000000..565e3cd5 --- /dev/null +++ b/lib/roo/excelx/extractor.rb @@ -0,0 +1,20 @@ +module Roo + class Excelx::Extractor + def initialize(path) + @path = path + end + + private + + def doc + @doc ||= + if doc_exists? + Roo::Excelx.load_xml(@path) + end + end + + def doc_exists? + File.exist?(@path) + end + end +end diff --git a/lib/roo/excelx/shared_strings.rb b/lib/roo/excelx/shared_strings.rb new file mode 100644 index 00000000..feed958c --- /dev/null +++ b/lib/roo/excelx/shared_strings.rb @@ -0,0 +1,34 @@ +require 'roo/excelx/extractor' + +module Roo + class Excelx::SharedStrings < Excelx::Extractor + def [](index) + to_a[index] + end + + def to_a + @array ||= + if doc_exists? + # read the shared strings xml document + doc.xpath("/sst/si").map do |si| + shared_string = '' + si.children.each do |elem| + case elem.name + when 'r' + elem.children.each do |r_elem| + if r_elem.name == 't' + shared_string << r_elem.content + end + end + when 't' + shared_string = elem.content + end + end + shared_string + end + else + [] + end + end + end +end diff --git a/lib/roo/excelx/styles.rb b/lib/roo/excelx/styles.rb new file mode 100644 index 00000000..98fbdd5c --- /dev/null +++ b/lib/roo/excelx/styles.rb @@ -0,0 +1,61 @@ +require 'roo/excelx/extractor' + +module Roo + class Excelx::Styles < Excelx::Extractor + class Font + attr_accessor :bold, :italic, :underline + + def bold? + @bold == true + end + + def italic? + @italic == true + end + + def underline? + @underline == true + end + end + + # convert internal excelx attribute to a format + def style_format(style) + id = num_fmt_ids[style.to_i] + num_fmts[id] || Excelx::Format::STANDARD_FORMATS[id.to_i] + end + + def definitions + @definitions ||= doc.xpath("//cellXfs").flat_map do |xfs| + xfs.children.map do |xf| + fonts[xf['fontId'].to_i] + end + end + end + + private + + def num_fmt_ids + @num_fmt_ids ||= doc.xpath("//cellXfs").flat_map do |xfs| + xfs.children.map do |xf| + xf['numFmtId'] + end + end + end + + def num_fmts + @num_fmts ||= Hash[doc.xpath("//numFmt").map do |num_fmt| + [num_fmt['numFmtId'], num_fmt['formatCode']] + end] + end + + def fonts + @fonts ||= doc.xpath("//fonts/font").map do |font_el| + Font.new.tap do |font| + font.bold = !font_el.xpath('./b').empty? + font.italic = !font_el.xpath('./i').empty? + font.underline = !font_el.xpath('./u').empty? + end + end + end + end +end diff --git a/lib/roo/excelx/workbook.rb b/lib/roo/excelx/workbook.rb new file mode 100644 index 00000000..537aaae2 --- /dev/null +++ b/lib/roo/excelx/workbook.rb @@ -0,0 +1,52 @@ +require 'roo/excelx/extractor' + +module Roo + class Excelx::Workbook < Excelx::Extractor + class Label + attr_reader :sheet, :row, :col, :name + + def initialize(name, sheet, row, col) + @name = name + @sheet = sheet + @row = row.to_i + @col = Roo::Base.letter_to_number(col) + end + + def key + [@row, @col] + end + end + + def sheets + doc.xpath("//sheet") + end + + # aka labels + def defined_names + Hash[doc.xpath("//definedName").map do |defined_name| + # "Sheet1!$C$5" + sheet, coordinates = defined_name.text.split('!$', 2) + col,row = coordinates.split('$') + name = defined_name['name'] + [name, Label.new(name, sheet,row,col)] + end] + end + + def base_date + @base_date ||= + begin + # Default to 1900 (minus one day due to excel quirk) but use 1904 if + # it's set in the Workbook's workbookPr + # http://msdn.microsoft.com/en-us/library/ff530155(v=office.12).aspx + result = Date.new(1899,12,30) # default + doc.xpath("//workbookPr[date1904]").each do |workbookPr| + if workbookPr["date1904"] =~ /true|1/i + result = Date.new(1904,01,01) + break + end + end + result + end + end + end +end diff --git a/test/test_roo.rb b/test/test_roo.rb index d2bac112..7b269a83 100644 --- a/test/test_roo.rb +++ b/test/test_roo.rb @@ -1104,12 +1104,12 @@ def test_simple2_excelx def test_datetime with_each_spreadsheet(:name=>'datetime') do |oo| val = oo.cell('c',3) - assert_kind_of DateTime, val assert_equal :datetime, oo.celltype('c',3) assert_equal DateTime.new(1961,11,21,12,17,18), val + assert_kind_of DateTime, val val = oo.cell('a',1) - assert_kind_of Date, val assert_equal :date, oo.celltype('a',1) + assert_kind_of Date, val assert_equal Date.new(1961,11,21), val assert_equal Date.new(1961,11,21), oo.cell('a',1) assert_equal DateTime.new(1961,11,21,12,17,18), oo.cell('a',3)