Skip to content

Commit

Permalink
Merge pull request #215 from stevendaniels/cleanup_excelx
Browse files Browse the repository at this point in the history
Cleanup Excelx
  • Loading branch information
stevendaniels committed May 28, 2015
2 parents fcc9a01 + 023bc4d commit 84982e0
Show file tree
Hide file tree
Showing 10 changed files with 802 additions and 799 deletions.
955 changes: 389 additions & 566 deletions lib/roo/excelx.rb

Large diffs are not rendered by default.

77 changes: 77 additions & 0 deletions lib/roo/excelx/cell.rb
Original file line number Diff line number Diff line change
@@ -0,0 +1,77 @@
require 'date'

module Roo
class Excelx
class Cell
attr_reader :type, :formula, :value, :excelx_type, :excelx_value, :style, :hyperlink, :coordinate
attr_writer :value

def initialize(value, type, formula, excelx_type, excelx_value, style, hyperlink, base_date, coordinate)
@type = type
@formula = formula
@base_date = base_date if [:date, :datetime].include?(@type)
@excelx_type = excelx_type
@excelx_value = excelx_value
@style = style
@value = type_cast_value(value)
@value = Roo::Link.new(hyperlink, @value.to_s) if hyperlink
@coordinate = coordinate
end

def type
case
when @formula
:formula
when @value.is_a?(Roo::Link)
:link
else
@type
end
end

class Coordinate
attr_accessor :row, :column

def initialize(row, column)
@row, @column = row, column
end
end

private

def type_cast_value(value)
case @type
when :float, :percentage
value.to_f
when :date
create_date(@base_date + value.to_i)
when :datetime
create_datetime(@base_date + value.to_f.round(6))
when :time
value.to_f * 86_400
else
value
end
end

def create_date(date)
yyyy, mm, dd = date.strftime('%Y-%m-%d').split('-')

Date.new(yyyy.to_i, mm.to_i, dd.to_i)
end

def create_datetime(date)
datetime_string = date.strftime('%Y-%m-%d %H:%M:%S.%N')
t = round_datetime(datetime_string)

DateTime.civil(t.year, t.month, t.day, t.hour, t.min, t.sec)
end

def round_datetime(datetime_string)
/(?<yyyy>\d+)-(?<mm>\d+)-(?<dd>\d+) (?<hh>\d+):(?<mi>\d+):(?<ss>\d+.\d+)/ =~ datetime_string

Time.new(yyyy.to_i, mm.to_i, dd.to_i, hh.to_i, mi.to_i, ss.to_r).round(0)
end
end
end
end
20 changes: 9 additions & 11 deletions lib/roo/excelx/comments.rb
Original file line number Diff line number Diff line change
@@ -1,24 +1,22 @@
require 'roo/excelx/extractor'

module Roo
class Excelx::Comments < Excelx::Extractor
class Excelx
class Comments < Excelx::Extractor
def comments
@comments ||= extract_comments
end

def comments
@comments ||= extract_comments
end
private

private
def extract_comments
return {} unless doc_exists?

def extract_comments
if doc_exists?
Hash[doc.xpath("//comments/commentList/comment").map do |comment|
Hash[doc.xpath('//comments/commentList/comment').map do |comment|
value = (comment.at_xpath('./text/r/t') || comment.at_xpath('./text/t')).text
[::Roo::Utils.ref_to_key(comment.attributes['ref'].to_s), value]
end]
else
{}
end
end

end
end
22 changes: 12 additions & 10 deletions lib/roo/excelx/extractor.rb
Original file line number Diff line number Diff line change
@@ -1,20 +1,22 @@
module Roo
class Excelx::Extractor
def initialize(path)
@path = path
end
class Excelx
class Extractor
def initialize(path)
@path = path
end

private
private

def doc
@doc ||=
def doc
@doc ||=
if doc_exists?
::Roo::Utils.load_xml(@path).remove_namespaces!
end
end
end

def doc_exists?
@path && File.exist?(@path)
def doc_exists?
@path && File.exist?(@path)
end
end
end
end
27 changes: 13 additions & 14 deletions lib/roo/excelx/relationships.rb
Original file line number Diff line number Diff line change
@@ -1,26 +1,25 @@
require 'roo/excelx/extractor'

module Roo
class Excelx::Relationships < Excelx::Extractor
def [](index)
to_a[index]
end
class Excelx
class Relationships < Excelx::Extractor
def [](index)
to_a[index]
end

def to_a
@relationships ||= extract_relationships
end
def to_a
@relationships ||= extract_relationships
end

private

private
def extract_relationships
return [] unless doc_exists?

def extract_relationships
if doc_exists?
Hash[doc.xpath("/Relationships/Relationship").map do |rel|
Hash[doc.xpath('/Relationships/Relationship').map do |rel|
[rel.attribute('Id').text, rel]
end]
else
[]
end
end

end
end
41 changes: 19 additions & 22 deletions lib/roo/excelx/shared_strings.rb
Original file line number Diff line number Diff line change
@@ -1,40 +1,37 @@
require 'roo/excelx/extractor'

module Roo
class Excelx::SharedStrings < Excelx::Extractor
def [](index)
to_a[index]
end
class Excelx
class SharedStrings < Excelx::Extractor
def [](index)
to_a[index]
end

def to_a
@array ||= extract_shared_strings
end
def to_a
@array ||= extract_shared_strings
end

private

private
def extract_shared_strings
return [] unless doc_exists?

def extract_shared_strings
if doc_exists?
# read the shared strings xml document
doc.xpath("/sst/si").map do |si|
doc.xpath('/sst/si').map do |si|
shared_string = ''
si.children.each do |elem|
case elem.name
when 'r'
elem.children.each do |r_elem|
if r_elem.name == 't'
shared_string << r_elem.content
end
end
when 't'
shared_string = elem.content
when 'r'
elem.children.each do |r_elem|
shared_string << r_elem.content if r_elem.name == 't'
end
when 't'
shared_string = elem.content
end
end
shared_string
end
else
[]
end
end

end
end
107 changes: 107 additions & 0 deletions lib/roo/excelx/sheet.rb
Original file line number Diff line number Diff line change
@@ -0,0 +1,107 @@
module Roo
class Excelx
class Sheet
def initialize(name, rels_path, sheet_path, comments_path, styles, shared_strings, workbook, options = {})
@name = name
@rels = Relationships.new(rels_path)
@comments = Comments.new(comments_path)
@styles = styles
@sheet = SheetDoc.new(sheet_path, @rels, @styles, shared_strings, workbook, options)
end

def cells
@cells ||= @sheet.cells(@rels)
end

def present_cells
@present_cells ||= cells.select { |_, cell| cell && cell.value }
end

# Yield each row as array of Excelx::Cell objects
# accepts options max_rows (int) (offset by 1 for header),
# pad_cells (boolean) and offset (int)
def each_row(options = {}, &block)
row_count = 0
options[:offset] ||= 0
@sheet.each_row_streaming do |row|
break if options[:max_rows] && row_count == options[:max_rows] + options[:offset] + 1
if block_given? && !(options[:offset] && row_count < options[:offset])
block.call(cells_for_row_element(row, options))
end
row_count += 1
end
end

def row(row_number)
first_column.upto(last_column).map do |col|
cells[[row_number, col]]
end.map { |cell| cell && cell.value }
end

def column(col_number)
first_row.upto(last_row).map do |row|
cells[[row, col_number]]
end.map { |cell| cell && cell.value }
end

# returns the number of the first non-empty row
def first_row
@first_row ||= present_cells.keys.map { |row, _| row }.min
end

def last_row
@last_row ||= present_cells.keys.map { |row, _| row }.max
end

# returns the number of the first non-empty column
def first_column
@first_column ||= present_cells.keys.map { |_, col| col }.min
end

# returns the number of the last non-empty column
def last_column
@last_column ||= present_cells.keys.map { |_, col| col }.max
end

def excelx_format(key)
cell = cells[key]
@styles.style_format(cell.style).to_s if cell
end

def hyperlinks
@hyperlinks ||= @sheet.hyperlinks(@rels)
end

def comments
@comments.comments
end

def dimensions
@sheet.dimensions
end

private

# Take an xml row and return an array of Excelx::Cell objects
# optionally pad array to header width(assumed 1st row).
# takes option pad_cells (boolean) defaults false
def cells_for_row_element(row_element, options = {})
return [] unless row_element
cell_col = 0
cells = []
@sheet.each_cell(row_element) do |cell|
cells.concat(pad_cells(cell, cell_col)) if options[:pad_cells]
cells << cell
cell_col = cell.coordinate.column
end
cells
end

def pad_cells(cell, last_column)
pad = []
(cell.coordinate.column - 1 - last_column).times { pad << nil }
pad
end
end
end
end
Loading

0 comments on commit 84982e0

Please sign in to comment.