diff --git a/lib/roo/excelx.rb b/lib/roo/excelx.rb index 31f83487..d4e00ae3 100644 --- a/lib/roo/excelx.rb +++ b/lib/roo/excelx.rb @@ -1,667 +1,490 @@ -require 'date' require 'nokogiri' +require 'zip/filesystem' require 'roo/link' require 'roo/utils' -require 'zip/filesystem' -class Roo::Excelx < Roo::Base - autoload :Workbook, 'roo/excelx/workbook' - autoload :SharedStrings, 'roo/excelx/shared_strings' - autoload :Styles, 'roo/excelx/styles' - - autoload :Relationships, 'roo/excelx/relationships' - autoload :Comments, 'roo/excelx/comments' - autoload :SheetDoc, 'roo/excelx/sheet_doc' - - module Format - EXCEPTIONAL_FORMATS = { - 'h:mm am/pm' => :date, - 'h:mm:ss am/pm' => :date, - } - - STANDARD_FORMATS = { - 0 => 'General', - 1 => '0', - 2 => '0.00', - 3 => '#,##0', - 4 => '#,##0.00', - 9 => '0%', - 10 => '0.00%', - 11 => '0.00E+00', - 12 => '# ?/?', - 13 => '# ??/??', - 14 => 'mm-dd-yy', - 15 => 'd-mmm-yy', - 16 => 'd-mmm', - 17 => 'mmm-yy', - 18 => 'h:mm AM/PM', - 19 => 'h:mm:ss AM/PM', - 20 => 'h:mm', - 21 => 'h:mm:ss', - 22 => 'm/d/yy h:mm', - 37 => '#,##0 ;(#,##0)', - 38 => '#,##0 ;[Red](#,##0)', - 39 => '#,##0.00;(#,##0.00)', - 40 => '#,##0.00;[Red](#,##0.00)', - 45 => 'mm:ss', - 46 => '[h]:mm:ss', - 47 => 'mmss.0', - 48 => '##0.0E+0', - 49 => '@', - } - - def to_type(format) - format = format.to_s.downcase - if type = EXCEPTIONAL_FORMATS[format] - type - elsif format.include?('#') - :float - elsif !format.match(/d+(?![\]])/).nil? || format.include?('y') - if format.include?('h') || format.include?('s') - :datetime +module Roo + class Excelx < Roo::Base + require 'roo/excelx/workbook' + require 'roo/excelx/shared_strings' + require 'roo/excelx/styles' + require 'roo/excelx/cell' + require 'roo/excelx/sheet' + require 'roo/excelx/relationships' + require 'roo/excelx/comments' + require 'roo/excelx/sheet_doc' + + module Format + EXCEPTIONAL_FORMATS = { + 'h:mm am/pm' => :date, + 'h:mm:ss am/pm' => :date + } + + STANDARD_FORMATS = { + 0 => 'General'.freeze, + 1 => '0'.freeze, + 2 => '0.00'.freeze, + 3 => '#,##0'.freeze, + 4 => '#,##0.00'.freeze, + 9 => '0%'.freeze, + 10 => '0.00%'.freeze, + 11 => '0.00E+00'.freeze, + 12 => '# ?/?'.freeze, + 13 => '# ??/??'.freeze, + 14 => 'mm-dd-yy'.freeze, + 15 => 'd-mmm-yy'.freeze, + 16 => 'd-mmm'.freeze, + 17 => 'mmm-yy'.freeze, + 18 => 'h:mm AM/PM'.freeze, + 19 => 'h:mm:ss AM/PM'.freeze, + 20 => 'h:mm'.freeze, + 21 => 'h:mm:ss'.freeze, + 22 => 'm/d/yy h:mm'.freeze, + 37 => '#,##0 ;(#,##0)'.freeze, + 38 => '#,##0 ;[Red](#,##0)'.freeze, + 39 => '#,##0.00;(#,##0.00)'.freeze, + 40 => '#,##0.00;[Red](#,##0.00)'.freeze, + 45 => 'mm:ss'.freeze, + 46 => '[h]:mm:ss'.freeze, + 47 => 'mmss.0'.freeze, + 48 => '##0.0E+0'.freeze, + 49 => '@'.freeze + } + + def to_type(format) + format = format.to_s.downcase + if (type = EXCEPTIONAL_FORMATS[format]) + type + elsif format.include?('#') + :float + elsif !format.match(/d+(?![\]])/).nil? || format.include?('y') + if format.include?('h') || format.include?('s') + :datetime + else + :date + end + elsif format.include?('h') || format.include?('s') + :time + elsif format.include?('%') + :percentage else - :date + :float end - elsif format.include?('h') || format.include?('s') - :time - elsif format.include?('%') - :percentage - else - :float end + + module_function :to_type end - module_function :to_type - end + ExceedsMaxError = Class.new(StandardError) - class Cell - attr_reader :type, :formula, :value, :excelx_type, :excelx_value, :style, :hyperlink, :coordinate - attr_writer :value - - def initialize(value, type, formula, excelx_type, excelx_value, style, hyperlink, base_date, coordinate) - @type = type - @formula = formula - @base_date = base_date if [:date, :datetime].include?(@type) - @excelx_type = excelx_type - @excelx_value = excelx_value - @style = style - @value = type_cast_value(value) - @value = Roo::Link.new(hyperlink, @value.to_s) if hyperlink - @coordinate = coordinate - end - - def type - if @formula - :formula - elsif @value.is_a?(Roo::Link) - :link - else - @type + # initialization and opening of a spreadsheet file + # values for packed: :zip + # optional cell_max (int) parameter for early aborting attempts to parse + # enormous documents. + def initialize(filename_or_stream, options = {}) + packed = options[:packed] + file_warning = options.fetch(:file_warning, :error) + cell_max = options.delete(:cell_max) + sheet_options = {} + sheet_options[:expand_merged_ranges] = (options[:expand_merged_ranges] || false) + + unless is_stream?(filename_or_stream) + file_type_check(filename_or_stream, '.xlsx', 'an Excel-xlsx', file_warning, packed) + basename = File.basename(filename_or_stream) end - end - class Coordinate - attr_accessor :row, :column + @tmpdir = make_tmpdir(basename, options[:tmpdir_root]) + @filename = local_filename(filename_or_stream, @tmpdir, packed) + @comments_files = [] + @rels_files = [] + process_zipfile(@filename || filename_or_stream) - def initialize(row, column) - @row, @column = row, column + @sheet_names = workbook.sheets.map do |sheet| + unless options[:only_visible_sheets] && sheet['state'] == 'hidden' + sheet['name'] + end + end.compact + @sheets = [] + @sheets_by_name = Hash[@sheet_names.map.with_index do |sheet_name, n| + @sheets[n] = Sheet.new(sheet_name, @rels_files[n], @sheet_files[n], @comments_files[n], styles, shared_strings, workbook, sheet_options) + [sheet_name, @sheets[n]] + end] + + if cell_max + cell_count = ::Roo::Utils.num_cells_in_range(sheet_for(options.delete(:sheet)).dimensions) + raise ExceedsMaxError.new("Excel file exceeds cell maximum: #{cell_count} > #{cell_max}") if cell_count > cell_max end - end - private + super + rescue => e # clean up any temp files, but only if an error was raised + close + raise e + end - def type_cast_value(value) - case @type - when :float, :percentage - value.to_f - when :date - yyyy,mm,dd = (@base_date+value.to_i).strftime("%Y-%m-%d").split('-') - Date.new(yyyy.to_i,mm.to_i,dd.to_i) - when :datetime - create_datetime_from((@base_date+value.to_f.round(6)).strftime("%Y-%m-%d %H:%M:%S.%N")) - when :time - value.to_f*(24*60*60) - when :string - value + def method_missing(method, *args) + if (label = workbook.defined_names[method.to_s]) + safe_send(sheet_for(label.sheet).cells[label.key], :value) else - value + # call super for methods like #a1 + super end end - def create_datetime_from(datetime_string) - date_part,time_part = round_time_from(datetime_string).split(' ') - yyyy,mm,dd = date_part.split('-') - hh,mi,ss = time_part.split(':') - DateTime.civil(yyyy.to_i,mm.to_i,dd.to_i,hh.to_i,mi.to_i,ss.to_i) - end - - def round_time_from(datetime_string) - date_part,time_part = datetime_string.split(' ') - yyyy,mm,dd = date_part.split('-') - hh,mi,ss = time_part.split(':') - Time.new(yyyy.to_i, mm.to_i, dd.to_i, hh.to_i, mi.to_i, ss.to_r).round(0).strftime("%Y-%m-%d %H:%M:%S") + def sheets + @sheet_names end - end - class Sheet - def initialize(name, rels_path, sheet_path, comments_path, styles, shared_strings, workbook, options = {}) - @name = name - @rels = Relationships.new(rels_path) - @comments = Comments.new(comments_path) - @styles = styles - @sheet = SheetDoc.new(sheet_path, @rels, @styles, shared_strings, workbook, options) + def sheet_for(sheet) + sheet ||= default_sheet + validate_sheet!(sheet) + @sheets_by_name[sheet] end - def cells - @cells ||= @sheet.cells(@rels) + # Returns the content of a spreadsheet-cell. + # (1,1) is the upper left corner. + # (1,1), (1,'A'), ('A',1), ('a',1) all refers to the + # cell at the first line and first row. + def cell(row, col, sheet = nil) + key = normalize(row, col) + safe_send(sheet_for(sheet).cells[key], :value) end - def present_cells - @present_cells ||= cells.select {|key, cell| cell && cell.value } + def row(rownumber, sheet = nil) + sheet_for(sheet).row(rownumber) end - # Yield each row as array of Excelx::Cell objects - # accepts options max_rows (int) (offset by 1 for header), - # pad_cells (boolean) and offset (int) - def each_row(options = {}, &block) - row_count = 0 - options[:offset] ||= 0 - @sheet.each_row_streaming do |row| - break if options[:max_rows] && row_count == options[:max_rows] + options[:offset] + 1 - if block_given? && !(options[:offset] && row_count < options[:offset]) - block.call(cells_for_row_element(row, options)) - end - row_count += 1 + # returns all values in this column as an array + # column numbers are 1,2,3,... like in the spreadsheet + def column(column_number, sheet = nil) + if column_number.is_a?(::String) + column_number = ::Roo::Utils.letter_to_number(column_number) end - end - - def row(row_number) - first_column.upto(last_column).map do |col| - cells[[row_number,col]] - end.map {|cell| cell && cell.value } - end - - def column(col_number) - first_row.upto(last_row).map do |row| - cells[[row,col_number]] - end.map {|cell| cell && cell.value } + sheet_for(sheet).column(column_number) end # returns the number of the first non-empty row - def first_row - @first_row ||= present_cells.keys.map {|row, _| row }.min + def first_row(sheet = nil) + sheet_for(sheet).first_row end - def last_row - @last_row ||= present_cells.keys.map {|row, _| row }.max + # returns the number of the last non-empty row + def last_row(sheet = nil) + sheet_for(sheet).last_row end # returns the number of the first non-empty column - def first_column - @first_column ||= present_cells.keys.map {|_, col| col }.min + def first_column(sheet = nil) + sheet_for(sheet).first_column end # returns the number of the last non-empty column - def last_column - @last_column ||= present_cells.keys.map {|_, col| col }.max - end - - def excelx_format(key) - cell = cells[key] - @styles.style_format(cell.style).to_s if cell + def last_column(sheet = nil) + sheet_for(sheet).last_column end - def hyperlinks - @hyperlinks ||= @sheet.hyperlinks(@rels) + # set a cell to a certain value + # (this will not be saved back to the spreadsheet file!) + def set(row, col, value, sheet = nil) #:nodoc: + key = normalize(row, col) + cell_type = cell_type_by_value(value) + sheet_for(sheet).cells[key] = Cell.new(value, cell_type, nil, cell_type, value, nil, nil, nil, Cell::Coordinate.new(row, col)) end - def comments - @comments.comments + # Returns the formula at (row,col). + # Returns nil if there is no formula. + # The method #formula? checks if there is a formula. + def formula(row, col, sheet = nil) + key = normalize(row, col) + safe_send(sheet_for(sheet).cells[key], :formula) end - def dimensions - @sheet.dimensions + # Predicate methods really should return a boolean + # value. Hopefully no one was relying on the fact that this + # previously returned either nil/formula + def formula?(*args) + !!formula(*args) end - private - - # Take an xml row and return an array of Excelx::Cell objects - # optionally pad array to header width(assumed 1st row). - # takes option pad_cells (boolean) defaults false - def cells_for_row_element(row_element, options = {}) - return [] unless row_element - cell_col = 0 - cells = [] - @sheet.each_cell(row_element) do |cell| - cells.concat(pad_cells(cell, cell_col)) if options[:pad_cells] - cells << cell - cell_col = cell.coordinate.column + # returns each formula in the selected sheet as an array of tuples in following format + # [[row, col, formula], [row, col, formula],...] + def formulas(sheet = nil) + sheet_for(sheet).cells.select { |_, cell| cell.formula }.map do |(x, y), cell| + [x, y, cell.formula] end - cells end - def pad_cells(cell, last_column) - pad = [] - (cell.coordinate.column - 1 - last_column).times { pad << nil } - pad + # Given a cell, return the cell's style + def font(row, col, sheet = nil) + key = normalize(row, col) + definition_index = safe_send(sheet_for(sheet).cells[key], :style) + styles.definitions[definition_index] if definition_index end - end - ExceedsMaxError = Class.new(StandardError) - - # initialization and opening of a spreadsheet file - # values for packed: :zip - # optional cell_max (int) parameter for early aborting attempts to parse - # enormous documents. - def initialize(filename_or_stream, options = {}) - packed = options[:packed] - file_warning = options.fetch(:file_warning, :error) - cell_max = options.delete(:cell_max) - sheet_options = {} - sheet_options[:expand_merged_ranges] = (options[:expand_merged_ranges] || false) - - unless is_stream?(filename_or_stream) - file_type_check(filename_or_stream,'.xlsx','an Excel-xlsx', file_warning, packed) - basename = File.basename(filename_or_stream) - end - - @tmpdir = make_tmpdir(basename, options[:tmpdir_root]) - @filename = local_filename(filename_or_stream, @tmpdir, packed) - @comments_files = [] - @rels_files = [] - process_zipfile(@filename || filename_or_stream) - - @sheet_names = workbook.sheets.map do |sheet| - unless options[:only_visible_sheets] && sheet['state'] == 'hidden' - sheet['name'] - end - end.compact - @sheets = [] - @sheets_by_name = Hash[@sheet_names.map.with_index do |sheet_name, n| - @sheets[n] = Sheet.new(sheet_name, @rels_files[n], @sheet_files[n], @comments_files[n], styles, shared_strings, workbook, sheet_options) - [sheet_name, @sheets[n]] - end] - - if cell_max - cell_count = ::Roo::Utils.num_cells_in_range(sheet_for(options.delete(:sheet)).dimensions) - raise ExceedsMaxError.new("Excel file exceeds cell maximum: #{cell_count} > #{cell_max}") if cell_count > cell_max - end - - super - rescue => e # clean up any temp files, but only if an error was raised - close - raise e - end - - def method_missing(method,*args) - if label = workbook.defined_names[method.to_s] - safe_send(sheet_for(label.sheet).cells[label.key], :value) - else - # call super for methods like #a1 - super + # returns the type of a cell: + # * :float + # * :string, + # * :date + # * :percentage + # * :formula + # * :time + # * :datetime + def celltype(row, col, sheet = nil) + key = normalize(row, col) + safe_send(sheet_for(sheet).cells[key], :type) end - end - def sheets - @sheet_names - end - - def sheet_for(sheet) - sheet ||= default_sheet - validate_sheet!(sheet) - @sheets_by_name[sheet] - end - - # Returns the content of a spreadsheet-cell. - # (1,1) is the upper left corner. - # (1,1), (1,'A'), ('A',1), ('a',1) all refers to the - # cell at the first line and first row. - def cell(row, col, sheet=nil) - key = normalize(row,col) - safe_send(sheet_for(sheet).cells[key], :value) - end - - def row(rownumber,sheet=nil) - sheet_for(sheet).row(rownumber) - end - - # returns all values in this column as an array - # column numbers are 1,2,3,... like in the spreadsheet - def column(column_number,sheet=nil) - if column_number.is_a?(::String) - column_number = ::Roo::Utils.letter_to_number(column_number) + # returns the internal type of an excel cell + # * :numeric_or_formula + # * :string + # Note: this is only available within the Excelx class + def excelx_type(row, col, sheet = nil) + key = normalize(row, col) + safe_send(sheet_for(sheet).cells[key], :excelx_type) end - sheet_for(sheet).column(column_number) - end - - # returns the number of the first non-empty row - def first_row(sheet=nil) - sheet_for(sheet).first_row - end - - # returns the number of the last non-empty row - def last_row(sheet=nil) - sheet_for(sheet).last_row - end - - # returns the number of the first non-empty column - def first_column(sheet=nil) - sheet_for(sheet).first_column - end - # returns the number of the last non-empty column - def last_column(sheet=nil) - sheet_for(sheet).last_column - end - - # set a cell to a certain value - # (this will not be saved back to the spreadsheet file!) - def set(row,col,value, sheet = nil) #:nodoc: - key = normalize(row,col) - cell_type = cell_type_by_value(value) - sheet_for(sheet).cells[key] = Cell.new(value, cell_type, nil, cell_type, value, nil, nil, nil, Cell::Coordinate.new(row, col)) - end - - - # Returns the formula at (row,col). - # Returns nil if there is no formula. - # The method #formula? checks if there is a formula. - def formula(row,col,sheet=nil) - key = normalize(row,col) - safe_send(sheet_for(sheet).cells[key], :formula) - end - - # Predicate methods really should return a boolean - # value. Hopefully no one was relying on the fact that this - # previously returned either nil/formula - def formula?(*args) - !!formula(*args) - end - - # returns each formula in the selected sheet as an array of tuples in following format - # [[row, col, formula], [row, col, formula],...] - def formulas(sheet=nil) - sheet_for(sheet).cells.select {|_, cell| cell.formula }.map do |(x, y), cell| - [x, y, cell.formula] + # returns the internal value of an excelx cell + # Note: this is only available within the Excelx class + def excelx_value(row, col, sheet = nil) + key = normalize(row, col) + safe_send(sheet_for(sheet).cells[key], :excelx_value) end - end - - # Given a cell, return the cell's style - def font(row, col, sheet=nil) - key = normalize(row,col) - definition_index = safe_send(sheet_for(sheet).cells[key], :style) - styles.definitions[definition_index] if definition_index - end - # returns the type of a cell: - # * :float - # * :string, - # * :date - # * :percentage - # * :formula - # * :time - # * :datetime - def celltype(row,col,sheet=nil) - key = normalize(row, col) - safe_send(sheet_for(sheet).cells[key], :type) - end - - # returns the internal type of an excel cell - # * :numeric_or_formula - # * :string - # Note: this is only available within the Excelx class - def excelx_type(row,col,sheet=nil) - key = normalize(row,col) - safe_send(sheet_for(sheet).cells[key], :excelx_type) - end + # returns the internal format of an excel cell + def excelx_format(row, col, sheet = nil) + key = normalize(row, col) + sheet_for(sheet).excelx_format(key) + end - # returns the internal value of an excelx cell - # Note: this is only available within the Excelx class - def excelx_value(row,col,sheet=nil) - key = normalize(row,col) - safe_send(sheet_for(sheet).cells[key], :excelx_value) - end + def empty?(row, col, sheet = nil) + sheet = sheet_for(sheet) + key = normalize(row, col) + cell = sheet.cells[key] + !cell || !cell.value || (cell.type == :string && cell.value.empty?) \ + || (row < sheet.first_row || row > sheet.last_row || col < sheet.first_column || col > sheet.last_column) + end - # returns the internal format of an excel cell - def excelx_format(row,col,sheet=nil) - key = normalize(row,col) - sheet_for(sheet).excelx_format(key) - end + # shows the internal representation of all cells + # for debugging purposes + def to_s(sheet = nil) + sheet_for(sheet).cells.inspect + end - def empty?(row,col,sheet=nil) - sheet = sheet_for(sheet) - key = normalize(row,col) - cell = sheet.cells[key] - !cell || !cell.value || (cell.type == :string && cell.value.empty?) \ - || (row < sheet.first_row || row > sheet.last_row || col < sheet.first_column || col > sheet.last_column) - end + # returns the row,col values of the labelled cell + # (nil,nil) if label is not defined + def label(name) + labels = workbook.defined_names + return [nil, nil, nil] if labels.empty? || !labels.key?(name) - # shows the internal representation of all cells - # for debugging purposes - def to_s(sheet=nil) - sheet_for(sheet).cells.inspect - end + [labels[name].row, labels[name].col, labels[name].sheet] + end - # returns the row,col values of the labelled cell - # (nil,nil) if label is not defined - def label(name) - labels = workbook.defined_names - if labels.empty? || !labels.key?(name) - [nil,nil,nil] - else - [labels[name].row, - labels[name].col, - labels[name].sheet] + # Returns an array which all labels. Each element is an array with + # [labelname, [row,col,sheetname]] + def labels + @labels ||= workbook.defined_names.map do |name, label| + [ + name, + [label.row, label.col, label.sheet] + ] + end end - end - # Returns an array which all labels. Each element is an array with - # [labelname, [row,col,sheetname]] - def labels - @labels ||= workbook.defined_names.map do |name, label| - [ name, - [ label.row, - label.col, - label.sheet, - ] ] + def hyperlink?(row, col, sheet = nil) + !!hyperlink(row, col, sheet) end - end - def hyperlink?(row,col,sheet=nil) - !!hyperlink(row, col, sheet) - end + # returns the hyperlink at (row/col) + # nil if there is no hyperlink + def hyperlink(row, col, sheet = nil) + key = normalize(row, col) + sheet_for(sheet).hyperlinks[key] + end - # returns the hyperlink at (row/col) - # nil if there is no hyperlink - def hyperlink(row,col,sheet=nil) - key = normalize(row,col) - sheet_for(sheet).hyperlinks[key] - end + # returns the comment at (row/col) + # nil if there is no comment + def comment(row, col, sheet = nil) + key = normalize(row, col) + sheet_for(sheet).comments[key] + end - # returns the comment at (row/col) - # nil if there is no comment - def comment(row,col,sheet=nil) - key = normalize(row,col) - sheet_for(sheet).comments[key] - end + # true, if there is a comment + def comment?(row, col, sheet = nil) + !!comment(row, col, sheet) + end - # true, if there is a comment - def comment?(row,col,sheet=nil) - !!comment(row,col,sheet) - end + def comments(sheet = nil) + sheet_for(sheet).comments.map do |(x, y), comment| + [x, y, comment] + end + end - def comments(sheet=nil) - sheet_for(sheet).comments.map do |(x, y), comment| - [x, y, comment] + # Yield an array of Excelx::Cell + # Takes options for sheet, pad_cells, and max_rows + def each_row_streaming(options = {}) + sheet_for(options.delete(:sheet)).each_row(options) { |row| yield row } end - end - # Yield an array of Excelx::Cell - # Takes options for sheet, pad_cells, and max_rows - def each_row_streaming(options={}) - sheet_for(options.delete(:sheet)).each_row(options) { |row| yield row } - end + private - private + def clean_sheet(sheet) + @sheets_by_name[sheet].cells.each_pair do |coord, value| + next unless value.value.is_a?(::String) - def clean_sheet(sheet) - @sheets_by_name[sheet].cells.each_pair do |coord, value| - next unless value.value.is_a?(::String) + @sheets_by_name[sheet].cells[coord].value = sanitize_value(value.value) + end - @sheets_by_name[sheet].cells[coord].value = sanitize_value(value.value) + @cleaned[sheet] = true end - @cleaned[sheet] = true - end - - # Internal: extracts the worksheet_ids from the workbook.xml file. xlsx - # documents require a workbook.xml file, so a if the file is missing - # it is not a valid xlsx file. In these cases, an ArgumentError is - # raised. - # - # wb - a Zip::Entry for the workbook.xml file. - # path - A String for Zip::Entry's destination path. - # - # Examples - # - # extract_worksheet_ids(, 'tmpdir/roo_workbook.xml') - # # => ["rId1", "rId2", "rId3"] - # - # Returns an Array of Strings. - def extract_worksheet_ids(entries, path) + # Internal: extracts the worksheet_ids from the workbook.xml file. xlsx + # documents require a workbook.xml file, so a if the file is missing + # it is not a valid xlsx file. In these cases, an ArgumentError is + # raised. + # + # wb - a Zip::Entry for the workbook.xml file. + # path - A String for Zip::Entry's destination path. + # + # Examples + # + # extract_worksheet_ids(, 'tmpdir/roo_workbook.xml') + # # => ["rId1", "rId2", "rId3"] + # + # Returns an Array of Strings. + def extract_worksheet_ids(entries, path) wb = entries.find { |e| e.name[/workbook.xml$/] } fail ArgumentError 'missing required workbook file' if wb.nil? wb.extract(path) workbook_doc = Roo::Utils.load_xml(path).remove_namespaces! - workbook_doc.xpath('//sheet').map{ |s| s.attributes['id'].value } - end + workbook_doc.xpath('//sheet').map { |s| s.attributes['id'].value } + end + + # Internal + # + # wb_rels - A Zip::Entry for the workbook.xml.rels file. + # path - A String for the Zip::Entry's destination path. + # + # Examples + # + # extract_worksheets(, 'tmpdir/roo_workbook.xml.rels') + # # => { + # "rId1"=>"worksheets/sheet1.xml", + # "rId2"=>"worksheets/sheet2.xml", + # "rId3"=>"worksheets/sheet3.xml" + # } + # + # Returns a Hash. + def extract_worksheet_rels(entries, path) + wb_rels = entries.find { |e| e.name[/workbook.xml.rels$/] } + fail ArgumentError 'missing required workbook file' if wb_rels.nil? + + wb_rels.extract(path) + rels_doc = Roo::Utils.load_xml(path).remove_namespaces! + worksheet_type = 'http://schemas.openxmlformats.org/officeDocument/2006/relationships/worksheet' - # Internal - # - # wb_rels - A Zip::Entry for the workbook.xml.rels file. - # path - A String for the Zip::Entry's destination path. - # - # Examples - # - # extract_worksheets(, 'tmpdir/roo_workbook.xml.rels') - # # => { - # "rId1"=>"worksheets/sheet1.xml", - # "rId2"=>"worksheets/sheet2.xml", - # "rId3"=>"worksheets/sheet3.xml" - # } - # - # Returns a Hash. - def extract_worksheet_rels(entries, path) - wb_rels = entries.find { |e| e.name[/workbook.xml.rels$/] } - fail ArgumentError 'missing required workbook file' if wb_rels.nil? - - wb_rels.extract(path) - rels_doc = Roo::Utils.load_xml(path).remove_namespaces! - worksheet_type ='http://schemas.openxmlformats.org/officeDocument/2006/relationships/worksheet' - - relationships = rels_doc.xpath('//Relationship').select do |relationship| - relationship.attributes['Type'].value == worksheet_type - end - - relationships.inject({}) do |hash, relationship| - attributes = relationship.attributes - id = attributes['Id']; - hash[id.value] = attributes['Target'].value - hash + relationships = rels_doc.xpath('//Relationship').select do |relationship| + relationship.attributes['Type'].value == worksheet_type + end + + relationships.inject({}) do |hash, relationship| + attributes = relationship.attributes + id = attributes['Id'] + hash[id.value] = attributes['Target'].value + hash + end end - end - def extract_sheets_in_order(entries, sheet_ids, sheets, tmpdir) - sheet_ids.each_with_index do |id, i| - name = sheets[id] - entry = entries.find { |entry| entry.name =~ /#{name}$/ } - path = "#{tmpdir}/roo_sheet#{i + 1}" - @sheet_files << path - entry.extract(path) + def extract_sheets_in_order(entries, sheet_ids, sheets, tmpdir) + sheet_ids.each_with_index do |id, i| + name = sheets[id] + entry = entries.find { |e| e.name =~ /#{name}$/ } + path = "#{tmpdir}/roo_sheet#{i + 1}" + @sheet_files << path + entry.extract(path) + end end - end - # Extracts all needed files from the zip file - def process_zipfile(zipfilename_or_stream) - @sheet_files = [] - - unless is_stream?(zipfilename_or_stream) - process_zipfile_entries Zip::File.open(zipfilename_or_stream).to_a.sort_by(&:name) - else - stream = Zip::InputStream.open zipfilename_or_stream - begin - entries = [] - while entry = stream.get_next_entry - entries << entry + # Extracts all needed files from the zip file + def process_zipfile(zipfilename_or_stream) + @sheet_files = [] + + unless is_stream?(zipfilename_or_stream) + process_zipfile_entries Zip::File.open(zipfilename_or_stream).to_a.sort_by(&:name) + else + stream = Zip::InputStream.open zipfilename_or_stream + begin + entries = [] + while (entry = stream.get_next_entry) + entries << entry + end + process_zipfile_entries entries + ensure + stream.close end - process_zipfile_entries entries - ensure - stream.close end end - end - def process_zipfile_entries entries - # NOTE: When Google or Numbers 3.1 exports to xlsx, the worksheet filenames - # are not in order. With Numbers 3.1, the first sheet is always - # sheet.xml, not sheet1.xml. With Google, the order of the worksheets is - # independent of a worksheet's filename (i.e. sheet6.xml can be the - # first worksheet). - # - # workbook.xml lists the correct order of worksheets and - # workbook.xml.rels lists the filenames for those worksheets. - # - # workbook.xml: - # - # - # workbook.xml.rel: - # - # - sheet_ids = extract_worksheet_ids(entries, "#{@tmpdir}/roo_workbook.xml") - sheets = extract_worksheet_rels(entries, "#{@tmpdir}/roo_workbook.xml.rels") - extract_sheets_in_order(entries, sheet_ids, sheets, @tmpdir) - - entries.each do |entry| - path = - case entry.name.downcase - when /sharedstrings.xml$/ - "#{@tmpdir}/roo_sharedStrings.xml" - when /styles.xml$/ - "#{@tmpdir}/roo_styles.xml" - when /comments([0-9]+).xml$/ - # FIXME: Most of the time, The order of the comment files are the same - # the sheet order, i.e. sheet1.xml's comments are in comments1.xml. - # In some situations, this isn't true. The true location of a - # sheet's comment file is in the sheet1.xml.rels file. SEE - # ECMA-376 12.3.3 in "Ecma Office Open XML Part 1". - nr = Regexp.last_match[1].to_i - @comments_files[nr - 1] = "#{@tmpdir}/roo_comments#{nr}" - when /sheet([0-9]+).xml.rels$/ - # FIXME: Roo seems to use sheet[\d].xml.rels for hyperlinks only, but - # it also stores the location for sharedStrings, comments, - # drawings, etc. - nr = Regexp.last_match[1].to_i - @rels_files[nr - 1] = "#{@tmpdir}/roo_rels#{nr}" - end + def process_zipfile_entries(entries) + # NOTE: When Google or Numbers 3.1 exports to xlsx, the worksheet filenames + # are not in order. With Numbers 3.1, the first sheet is always + # sheet.xml, not sheet1.xml. With Google, the order of the worksheets is + # independent of a worksheet's filename (i.e. sheet6.xml can be the + # first worksheet). + # + # workbook.xml lists the correct order of worksheets and + # workbook.xml.rels lists the filenames for those worksheets. + # + # workbook.xml: + # + # + # workbook.xml.rel: + # + # + sheet_ids = extract_worksheet_ids(entries, "#{@tmpdir}/roo_workbook.xml") + sheets = extract_worksheet_rels(entries, "#{@tmpdir}/roo_workbook.xml.rels") + extract_sheets_in_order(entries, sheet_ids, sheets, @tmpdir) + + entries.each do |entry| + path = + case entry.name.downcase + when /sharedstrings.xml$/ + "#{@tmpdir}/roo_sharedStrings.xml" + when /styles.xml$/ + "#{@tmpdir}/roo_styles.xml" + when /comments([0-9]+).xml$/ + # FIXME: Most of the time, The order of the comment files are the same + # the sheet order, i.e. sheet1.xml's comments are in comments1.xml. + # In some situations, this isn't true. The true location of a + # sheet's comment file is in the sheet1.xml.rels file. SEE + # ECMA-376 12.3.3 in "Ecma Office Open XML Part 1". + nr = Regexp.last_match[1].to_i + @comments_files[nr - 1] = "#{@tmpdir}/roo_comments#{nr}" + when /sheet([0-9]+).xml.rels$/ + # FIXME: Roo seems to use sheet[\d].xml.rels for hyperlinks only, but + # it also stores the location for sharedStrings, comments, + # drawings, etc. + nr = Regexp.last_match[1].to_i + @rels_files[nr - 1] = "#{@tmpdir}/roo_rels#{nr}" + end - entry.extract(path) if path + entry.extract(path) if path + end end - end - def styles - @styles ||= Styles.new(File.join(@tmpdir, 'roo_styles.xml')) - end + def styles + @styles ||= Styles.new(File.join(@tmpdir, 'roo_styles.xml')) + end - def shared_strings - @shared_strings ||= SharedStrings.new(File.join(@tmpdir, 'roo_sharedStrings.xml')) - end + def shared_strings + @shared_strings ||= SharedStrings.new(File.join(@tmpdir, 'roo_sharedStrings.xml')) + end - def workbook - @workbook ||= Workbook.new(File.join(@tmpdir, "roo_workbook.xml")) - end + def workbook + @workbook ||= Workbook.new(File.join(@tmpdir, 'roo_workbook.xml')) + end - def safe_send(object, method, *args) - object.send(method, *args) if object && object.respond_to?(method) + def safe_send(object, method, *args) + object.send(method, *args) if object && object.respond_to?(method) + end end end diff --git a/lib/roo/excelx/cell.rb b/lib/roo/excelx/cell.rb new file mode 100644 index 00000000..875d4b45 --- /dev/null +++ b/lib/roo/excelx/cell.rb @@ -0,0 +1,77 @@ +require 'date' + +module Roo + class Excelx + class Cell + attr_reader :type, :formula, :value, :excelx_type, :excelx_value, :style, :hyperlink, :coordinate + attr_writer :value + + def initialize(value, type, formula, excelx_type, excelx_value, style, hyperlink, base_date, coordinate) + @type = type + @formula = formula + @base_date = base_date if [:date, :datetime].include?(@type) + @excelx_type = excelx_type + @excelx_value = excelx_value + @style = style + @value = type_cast_value(value) + @value = Roo::Link.new(hyperlink, @value.to_s) if hyperlink + @coordinate = coordinate + end + + def type + case + when @formula + :formula + when @value.is_a?(Roo::Link) + :link + else + @type + end + end + + class Coordinate + attr_accessor :row, :column + + def initialize(row, column) + @row, @column = row, column + end + end + + private + + def type_cast_value(value) + case @type + when :float, :percentage + value.to_f + when :date + create_date(@base_date + value.to_i) + when :datetime + create_datetime(@base_date + value.to_f.round(6)) + when :time + value.to_f * 86_400 + else + value + end + end + + def create_date(date) + yyyy, mm, dd = date.strftime('%Y-%m-%d').split('-') + + Date.new(yyyy.to_i, mm.to_i, dd.to_i) + end + + def create_datetime(date) + datetime_string = date.strftime('%Y-%m-%d %H:%M:%S.%N') + t = round_datetime(datetime_string) + + DateTime.civil(t.year, t.month, t.day, t.hour, t.min, t.sec) + end + + def round_datetime(datetime_string) + /(?\d+)-(?\d+)-(?
\d+) (?\d+):(?\d+):(?\d+.\d+)/ =~ datetime_string + + Time.new(yyyy.to_i, mm.to_i, dd.to_i, hh.to_i, mi.to_i, ss.to_r).round(0) + end + end + end +end diff --git a/lib/roo/excelx/comments.rb b/lib/roo/excelx/comments.rb index cdec683d..af46d495 100644 --- a/lib/roo/excelx/comments.rb +++ b/lib/roo/excelx/comments.rb @@ -1,24 +1,22 @@ require 'roo/excelx/extractor' module Roo - class Excelx::Comments < Excelx::Extractor + class Excelx + class Comments < Excelx::Extractor + def comments + @comments ||= extract_comments + end - def comments - @comments ||= extract_comments - end + private - private + def extract_comments + return {} unless doc_exists? - def extract_comments - if doc_exists? - Hash[doc.xpath("//comments/commentList/comment").map do |comment| + Hash[doc.xpath('//comments/commentList/comment').map do |comment| value = (comment.at_xpath('./text/r/t') || comment.at_xpath('./text/t')).text [::Roo::Utils.ref_to_key(comment.attributes['ref'].to_s), value] end] - else - {} end end - end end diff --git a/lib/roo/excelx/extractor.rb b/lib/roo/excelx/extractor.rb index ab5c2603..1c9f97ea 100644 --- a/lib/roo/excelx/extractor.rb +++ b/lib/roo/excelx/extractor.rb @@ -1,20 +1,22 @@ module Roo - class Excelx::Extractor - def initialize(path) - @path = path - end + class Excelx + class Extractor + def initialize(path) + @path = path + end - private + private - def doc - @doc ||= + def doc + @doc ||= if doc_exists? ::Roo::Utils.load_xml(@path).remove_namespaces! end - end + end - def doc_exists? - @path && File.exist?(@path) + def doc_exists? + @path && File.exist?(@path) + end end end end diff --git a/lib/roo/excelx/relationships.rb b/lib/roo/excelx/relationships.rb index f07fed21..8a0ed97f 100644 --- a/lib/roo/excelx/relationships.rb +++ b/lib/roo/excelx/relationships.rb @@ -1,26 +1,25 @@ require 'roo/excelx/extractor' module Roo - class Excelx::Relationships < Excelx::Extractor - def [](index) - to_a[index] - end + class Excelx + class Relationships < Excelx::Extractor + def [](index) + to_a[index] + end - def to_a - @relationships ||= extract_relationships - end + def to_a + @relationships ||= extract_relationships + end + + private - private + def extract_relationships + return [] unless doc_exists? - def extract_relationships - if doc_exists? - Hash[doc.xpath("/Relationships/Relationship").map do |rel| + Hash[doc.xpath('/Relationships/Relationship').map do |rel| [rel.attribute('Id').text, rel] end] - else - [] end end - end end diff --git a/lib/roo/excelx/shared_strings.rb b/lib/roo/excelx/shared_strings.rb index dbfc99d6..c2fd5ebe 100644 --- a/lib/roo/excelx/shared_strings.rb +++ b/lib/roo/excelx/shared_strings.rb @@ -1,40 +1,37 @@ require 'roo/excelx/extractor' module Roo - class Excelx::SharedStrings < Excelx::Extractor - def [](index) - to_a[index] - end + class Excelx + class SharedStrings < Excelx::Extractor + def [](index) + to_a[index] + end - def to_a - @array ||= extract_shared_strings - end + def to_a + @array ||= extract_shared_strings + end + + private - private + def extract_shared_strings + return [] unless doc_exists? - def extract_shared_strings - if doc_exists? # read the shared strings xml document - doc.xpath("/sst/si").map do |si| + doc.xpath('/sst/si').map do |si| shared_string = '' si.children.each do |elem| case elem.name - when 'r' - elem.children.each do |r_elem| - if r_elem.name == 't' - shared_string << r_elem.content - end - end - when 't' - shared_string = elem.content + when 'r' + elem.children.each do |r_elem| + shared_string << r_elem.content if r_elem.name == 't' + end + when 't' + shared_string = elem.content end end shared_string end - else - [] end end - end end diff --git a/lib/roo/excelx/sheet.rb b/lib/roo/excelx/sheet.rb new file mode 100644 index 00000000..be78983c --- /dev/null +++ b/lib/roo/excelx/sheet.rb @@ -0,0 +1,107 @@ +module Roo + class Excelx + class Sheet + def initialize(name, rels_path, sheet_path, comments_path, styles, shared_strings, workbook, options = {}) + @name = name + @rels = Relationships.new(rels_path) + @comments = Comments.new(comments_path) + @styles = styles + @sheet = SheetDoc.new(sheet_path, @rels, @styles, shared_strings, workbook, options) + end + + def cells + @cells ||= @sheet.cells(@rels) + end + + def present_cells + @present_cells ||= cells.select { |_, cell| cell && cell.value } + end + + # Yield each row as array of Excelx::Cell objects + # accepts options max_rows (int) (offset by 1 for header), + # pad_cells (boolean) and offset (int) + def each_row(options = {}, &block) + row_count = 0 + options[:offset] ||= 0 + @sheet.each_row_streaming do |row| + break if options[:max_rows] && row_count == options[:max_rows] + options[:offset] + 1 + if block_given? && !(options[:offset] && row_count < options[:offset]) + block.call(cells_for_row_element(row, options)) + end + row_count += 1 + end + end + + def row(row_number) + first_column.upto(last_column).map do |col| + cells[[row_number, col]] + end.map { |cell| cell && cell.value } + end + + def column(col_number) + first_row.upto(last_row).map do |row| + cells[[row, col_number]] + end.map { |cell| cell && cell.value } + end + + # returns the number of the first non-empty row + def first_row + @first_row ||= present_cells.keys.map { |row, _| row }.min + end + + def last_row + @last_row ||= present_cells.keys.map { |row, _| row }.max + end + + # returns the number of the first non-empty column + def first_column + @first_column ||= present_cells.keys.map { |_, col| col }.min + end + + # returns the number of the last non-empty column + def last_column + @last_column ||= present_cells.keys.map { |_, col| col }.max + end + + def excelx_format(key) + cell = cells[key] + @styles.style_format(cell.style).to_s if cell + end + + def hyperlinks + @hyperlinks ||= @sheet.hyperlinks(@rels) + end + + def comments + @comments.comments + end + + def dimensions + @sheet.dimensions + end + + private + + # Take an xml row and return an array of Excelx::Cell objects + # optionally pad array to header width(assumed 1st row). + # takes option pad_cells (boolean) defaults false + def cells_for_row_element(row_element, options = {}) + return [] unless row_element + cell_col = 0 + cells = [] + @sheet.each_cell(row_element) do |cell| + cells.concat(pad_cells(cell, cell_col)) if options[:pad_cells] + cells << cell + cell_col = cell.coordinate.column + end + cells + end + + def pad_cells(cell, last_column) + pad = [] + (cell.coordinate.column - 1 - last_column).times { pad << nil } + pad + end + end + end +end diff --git a/lib/roo/excelx/sheet_doc.rb b/lib/roo/excelx/sheet_doc.rb index e3047131..092e8b7f 100644 --- a/lib/roo/excelx/sheet_doc.rb +++ b/lib/roo/excelx/sheet_doc.rb @@ -1,96 +1,93 @@ require 'roo/excelx/extractor' module Roo - class Excelx::SheetDoc < Excelx::Extractor - def initialize(path, relationships, styles, shared_strings, workbook, options = {}) - super(path) - @options = options - @relationships = relationships - @styles = styles - @shared_strings = shared_strings - @workbook = workbook - end + class Excelx + class SheetDoc < Excelx::Extractor + def initialize(path, relationships, styles, shared_strings, workbook, options = {}) + super(path) + @options = options + @relationships = relationships + @styles = styles + @shared_strings = shared_strings + @workbook = workbook + end - def cells(relationships) - @cells ||= extract_cells(relationships) - end + def cells(relationships) + @cells ||= extract_cells(relationships) + end - def hyperlinks(relationships) - @hyperlinks ||= extract_hyperlinks(relationships) - end + def hyperlinks(relationships) + @hyperlinks ||= extract_hyperlinks(relationships) + end - # Get the dimensions for the sheet. - # This is the upper bound of cells that might - # be parsed. (the document may be sparse so cell count is only upper bound) - def dimensions - @dimensions ||= extract_dimensions - end + # Get the dimensions for the sheet. + # This is the upper bound of cells that might + # be parsed. (the document may be sparse so cell count is only upper bound) + def dimensions + @dimensions ||= extract_dimensions + end - # Yield each row xml element to caller - def each_row_streaming(&block) - Roo::Utils.each_element(@path, 'row', &block) - end + # Yield each row xml element to caller + def each_row_streaming(&block) + Roo::Utils.each_element(@path, 'row', &block) + end - # Yield each cell as Excelx::Cell to caller for given - # row xml - def each_cell(row_xml) - return [] unless row_xml - row_xml.children.each do |cell_element| - key = ::Roo::Utils.ref_to_key(cell_element['r']) - yield cell_from_xml(cell_element, hyperlinks(@relationships)[key]) + # Yield each cell as Excelx::Cell to caller for given + # row xml + def each_cell(row_xml) + return [] unless row_xml + row_xml.children.each do |cell_element| + key = ::Roo::Utils.ref_to_key(cell_element['r']) + yield cell_from_xml(cell_element, hyperlinks(@relationships)[key]) + end end - end - private + private - def cell_from_xml(cell_xml, hyperlink) - # This is error prone, to_i will silently turn a nil into a 0 - # and it works by coincidence that Format[0] is general - style = cell_xml['s'].to_i # should be here - # c: - # 22606 - # , format: , tmp_type: float - value_type = + def cell_from_xml(cell_xml, hyperlink) + # This is error prone, to_i will silently turn a nil into a 0 + # and it works by coincidence that Format[0] is general + style = cell_xml['s'].to_i # should be here + # c: + # 22606 + # , format: , tmp_type: float + value_type = case cell_xml['t'] when 's' :shared when 'b' :boolean - # 2011-02-25 BEGIN when 'str' :string - # 2011-02-25 END - # 2011-09-15 BEGIN when 'inlineStr' :inlinestr - # 2011-09-15 END else format = @styles.style_format(style) Excelx::Format.to_type(format) end - formula = nil - row, column = ::Roo::Utils.split_coordinate(cell_xml['r']) - cell_xml.children.each do |cell| - case cell.name - when 'is' - cell.children.each do |inline_str| - if inline_str.name == 't' - return Excelx::Cell.new(inline_str.content,:string,formula,:string,inline_str.content,style, hyperlink, @workbook.base_date, Excelx::Cell::Coordinate.new(row, column)) + formula = nil + row, column = ::Roo::Utils.split_coordinate(cell_xml['r']) + cell_xml.children.each do |cell| + case cell.name + when 'is' + cell.children.each do |inline_str| + if inline_str.name == 't' + return Excelx::Cell.new(inline_str.content, :string, formula, :string, inline_str.content, style, hyperlink, @workbook.base_date, Excelx::Cell::Coordinate.new(row, column)) + end end - end - when 'f' - formula = cell.content - when 'v' - if [:time, :datetime].include?(value_type) && cell.content.to_f >= 1.0 - value_type = + when 'f' + formula = cell.content + when 'v' + if [:time, :datetime].include?(value_type) && cell.content.to_f >= 1.0 + value_type = if (cell.content.to_f - cell.content.to_f.floor).abs > 0.000001 :datetime else :date end - end - excelx_type = [:numeric_or_formula,format.to_s] - value = + end + excelx_type = [:numeric_or_formula, format.to_s] + value = case value_type when :shared value_type = :string @@ -109,54 +106,54 @@ def cell_from_xml(cell_xml, hyperlink) value_type = :float cell.content end - return Excelx::Cell.new(value,value_type,formula,excelx_type,cell.content,style, hyperlink, @workbook.base_date, Excelx::Cell::Coordinate.new(row, column)) + return Excelx::Cell.new(value, value_type, formula, excelx_type, cell.content, style, hyperlink, @workbook.base_date, Excelx::Cell::Coordinate.new(row, column)) + end end + Excelx::Cell.new(nil, nil, nil, nil, nil, nil, nil, nil, Excelx::Cell::Coordinate.new(row, column)) end - Excelx::Cell.new(nil, nil, nil, nil, nil, nil, nil, nil, Excelx::Cell::Coordinate.new(row, column)) - end - def extract_hyperlinks(relationships) - Hash[doc.xpath("/worksheet/hyperlinks/hyperlink").map do |hyperlink| - if hyperlink.attribute('id') && relationship = relationships[hyperlink.attribute('id').text] - [::Roo::Utils.ref_to_key(hyperlink.attributes['ref'].to_s), relationship.attribute('Target').text] - end - end.compact] - end + def extract_hyperlinks(relationships) + Hash[doc.xpath('/worksheet/hyperlinks/hyperlink').map do |hyperlink| + if hyperlink.attribute('id') && (relationship = relationships[hyperlink.attribute('id').text]) + [::Roo::Utils.ref_to_key(hyperlink.attributes['ref'].to_s), relationship.attribute('Target').text] + end + end.compact] + end - def expand_merged_ranges(cells) - # Extract merged ranges from xml - merges = {} - doc.xpath("/worksheet/mergeCells/mergeCell").each do |mergecell_xml| - tl, br = mergecell_xml['ref'].split(/:/).map {|ref| ::Roo::Utils.ref_to_key(ref)} - for row in tl[0]..br[0] do - for col in tl[1]..br[1] do - next if row == tl[0] && col == tl[1] - merges[[row,col]] = tl + def expand_merged_ranges(cells) + # Extract merged ranges from xml + merges = {} + doc.xpath('/worksheet/mergeCells/mergeCell').each do |mergecell_xml| + tl, br = mergecell_xml['ref'].split(/:/).map { |ref| ::Roo::Utils.ref_to_key(ref) } + for row in tl[0]..br[0] do + for col in tl[1]..br[1] do + next if row == tl[0] && col == tl[1] + merges[[row, col]] = tl + end end end + # Duplicate value into all cells in merged range + merges.each do |dst, src| + cells[dst] = cells[src] + end end - # Duplicate value into all cells in merged range - merges.each do |dst, src| - cells[dst] = cells[src] - end - end - def extract_cells(relationships) - extracted_cells = Hash[doc.xpath("/worksheet/sheetData/row/c").map do |cell_xml| - key = ::Roo::Utils.ref_to_key(cell_xml['r']) - [key, cell_from_xml(cell_xml, hyperlinks(relationships)[key])] - end] - if @options[:expand_merged_ranges] - expand_merged_ranges(extracted_cells) + def extract_cells(relationships) + extracted_cells = Hash[doc.xpath('/worksheet/sheetData/row/c').map do |cell_xml| + key = ::Roo::Utils.ref_to_key(cell_xml['r']) + [key, cell_from_xml(cell_xml, hyperlinks(relationships)[key])] + end] + + expand_merged_ranges(extracted_cells) if @options[:expand_merged_ranges] + + extracted_cells end - extracted_cells - end - def extract_dimensions - Roo::Utils.each_element(@path, 'dimension') do |dimension| - return dimension.attributes["ref"].value + def extract_dimensions + Roo::Utils.each_element(@path, 'dimension') do |dimension| + return dimension.attributes['ref'].value + end end - end =begin Datei xl/comments1.xml @@ -198,5 +195,6 @@ def extract_dimensions read_comments(sheet) end =end + end end end diff --git a/lib/roo/excelx/styles.rb b/lib/roo/excelx/styles.rb index bca92d31..877f6ea1 100644 --- a/lib/roo/excelx/styles.rb +++ b/lib/roo/excelx/styles.rb @@ -2,61 +2,63 @@ require 'roo/excelx/extractor' module Roo - class Excelx::Styles < Excelx::Extractor - # convert internal excelx attribute to a format - def style_format(style) - id = num_fmt_ids[style.to_i] - num_fmts[id] || Excelx::Format::STANDARD_FORMATS[id.to_i] - end + class Excelx + class Styles < Excelx::Extractor + # convert internal excelx attribute to a format + def style_format(style) + id = num_fmt_ids[style.to_i] + num_fmts[id] || Excelx::Format::STANDARD_FORMATS[id.to_i] + end - def definitions - @definitions ||= extract_definitions - end + def definitions + @definitions ||= extract_definitions + end - private + private - def num_fmt_ids - @num_fmt_ids ||= extract_num_fmt_ids - end + def num_fmt_ids + @num_fmt_ids ||= extract_num_fmt_ids + end - def num_fmts - @num_fmts ||= extract_num_fmts - end + def num_fmts + @num_fmts ||= extract_num_fmts + end - def fonts - @fonts ||= extract_fonts - end + def fonts + @fonts ||= extract_fonts + end - def extract_definitions - doc.xpath("//cellXfs").flat_map do |xfs| - xfs.children.map do |xf| - fonts[xf['fontId'].to_i] + def extract_definitions + doc.xpath('//cellXfs').flat_map do |xfs| + xfs.children.map do |xf| + fonts[xf['fontId'].to_i] + end end end - end - def extract_fonts - doc.xpath("//fonts/font").map do |font_el| - Font.new.tap do |font| - font.bold = !font_el.xpath('./b').empty? - font.italic = !font_el.xpath('./i').empty? - font.underline = !font_el.xpath('./u').empty? + def extract_fonts + doc.xpath('//fonts/font').map do |font_el| + Font.new.tap do |font| + font.bold = !font_el.xpath('./b').empty? + font.italic = !font_el.xpath('./i').empty? + font.underline = !font_el.xpath('./u').empty? + end end end - end - def extract_num_fmt_ids - doc.xpath("//cellXfs").flat_map do |xfs| - xfs.children.map do |xf| - xf['numFmtId'] + def extract_num_fmt_ids + doc.xpath('//cellXfs').flat_map do |xfs| + xfs.children.map do |xf| + xf['numFmtId'] + end end end - end - def extract_num_fmts - Hash[doc.xpath("//numFmt").map do |num_fmt| - [num_fmt['numFmtId'], num_fmt['formatCode']] - end] + def extract_num_fmts + Hash[doc.xpath('//numFmt').map do |num_fmt| + [num_fmt['numFmtId'], num_fmt['formatCode']] + end] + end end end end diff --git a/lib/roo/excelx/workbook.rb b/lib/roo/excelx/workbook.rb index 33cd4d06..7ef841f6 100644 --- a/lib/roo/excelx/workbook.rb +++ b/lib/roo/excelx/workbook.rb @@ -1,59 +1,59 @@ require 'roo/excelx/extractor' module Roo - class Excelx::Workbook < Excelx::Extractor - class Label - attr_reader :sheet, :row, :col, :name + class Excelx + class Workbook < Excelx::Extractor + class Label + attr_reader :sheet, :row, :col, :name - def initialize(name, sheet, row, col) - @name = name - @sheet = sheet - @row = row.to_i - @col = ::Roo::Utils.letter_to_number(col) - end + def initialize(name, sheet, row, col) + @name = name + @sheet = sheet + @row = row.to_i + @col = ::Roo::Utils.letter_to_number(col) + end - def key - [@row, @col] + def key + [@row, @col] + end end - end - def initialize(path) - super - if !doc_exists? - raise ArgumentError, 'missing required workbook file' + def initialize(path) + super + fail ArgumentError, 'missing required workbook file' unless doc_exists? end - end - def sheets - doc.xpath("//sheet") - end + def sheets + doc.xpath('//sheet') + end - # aka labels - def defined_names - Hash[doc.xpath("//definedName").map do |defined_name| - # "Sheet1!$C$5" - sheet, coordinates = defined_name.text.split('!$', 2) - col,row = coordinates.split('$') - name = defined_name['name'] - [name, Label.new(name, sheet,row,col)] - end] - end + # aka labels + def defined_names + Hash[doc.xpath('//definedName').map do |defined_name| + # "Sheet1!$C$5" + sheet, coordinates = defined_name.text.split('!$', 2) + col, row = coordinates.split('$') + name = defined_name['name'] + [name, Label.new(name, sheet, row, col)] + end] + end - def base_date - @base_date ||= + def base_date + @base_date ||= begin # Default to 1900 (minus one day due to excel quirk) but use 1904 if # it's set in the Workbook's workbookPr # http://msdn.microsoft.com/en-us/library/ff530155(v=office.12).aspx - result = Date.new(1899,12,30) # default - doc.css("workbookPr[date1904]").each do |workbookPr| - if workbookPr["date1904"] =~ /true|1/i - result = Date.new(1904,01,01) + result = Date.new(1899, 12, 30) # default + doc.css('workbookPr[date1904]').each do |workbookPr| + if workbookPr['date1904'] =~ /true|1/i + result = Date.new(1904, 01, 01) break end end result end + end end end end