Skip to content

Commit

Permalink
Merge pull request #52 from krzysiek1507/feature/performance-fixes
Browse files Browse the repository at this point in the history
Performance and memory fixes
  • Loading branch information
liufengyun authored Apr 22, 2019
2 parents 93be507 + 6d028a6 commit 2040cc9
Show file tree
Hide file tree
Showing 4 changed files with 37 additions and 23 deletions.
33 changes: 20 additions & 13 deletions lib/hashdiff/diff.rb
Original file line number Diff line number Diff line change
Expand Up @@ -122,13 +122,15 @@ def self.diff(obj1, obj2, options = {}, &block)
elsif obj1.is_a?(Array) && !opts[:use_lcs]
result.concat(LinearCompareArray.call(obj1, obj2, opts))
elsif obj1.is_a?(Hash)
obj1_keys = obj1.keys
obj2_keys = obj2.keys

deleted_keys = obj1.keys - obj2.keys
common_keys = obj1.keys & obj2.keys
added_keys = obj2.keys - obj1.keys
deleted_keys = (obj1_keys - obj2_keys).sort_by(&:to_s)
common_keys = (obj1_keys & obj2_keys).sort_by(&:to_s)
added_keys = (obj2_keys - obj1_keys).sort_by(&:to_s)

# add deleted properties
deleted_keys.sort_by { |k, _v| k.to_s }.each do |k|
deleted_keys.each do |k|
change_key = prefix_append_key(opts[:prefix], k, opts)
custom_result = custom_compare(opts[:comparison], change_key, obj1[k], nil)

Expand All @@ -140,13 +142,13 @@ def self.diff(obj1, obj2, options = {}, &block)
end

# recursive comparison for common keys
common_keys.sort_by { |k, _v| k.to_s }.each do |k|
common_keys.each do |k|
prefix = prefix_append_key(opts[:prefix], k, opts)
result.concat(diff(obj1[k], obj2[k], opts.merge(prefix: prefix)))
end

# added properties
added_keys.sort_by { |k, _v| k.to_s }.each do |k|
added_keys.each do |k|
change_key = prefix_append_key(opts[:prefix], k, opts)
next if obj1.key?(k)

Expand All @@ -171,28 +173,33 @@ def self.diff(obj1, obj2, options = {}, &block)
#
# diff array using LCS algorithm
def self.diff_array_lcs(arraya, arrayb, options = {})
opts = {
prefix: '',
similarity: 0.8,
delimiter: '.'
}.merge!(options)
return [] if arraya.empty? && arrayb.empty?

change_set = []
return [] if arraya.empty? && arrayb.empty?

if arraya.empty?
arrayb.each_index do |index|
change_set << ['+', index, arrayb[index]]
end

return change_set
elsif arrayb.empty?
end

if arrayb.empty?
arraya.each_index do |index|
i = arraya.size - index - 1
change_set << ['-', i, arraya[i]]
end

return change_set
end

opts = {
prefix: '',
similarity: 0.8,
delimiter: '.'
}.merge!(options)

links = lcs(arraya, arrayb, opts)

# yield common
Expand Down
4 changes: 2 additions & 2 deletions lib/hashdiff/lcs.rb
Original file line number Diff line number Diff line change
Expand Up @@ -6,12 +6,12 @@ module HashDiff
# caculate array difference using LCS algorithm
# http://en.wikipedia.org/wiki/Longest_common_subsequence_problem
def self.lcs(arraya, arrayb, options = {})
return [] if arraya.empty? || arrayb.empty?

opts = { similarity: 0.8 }.merge!(options)

opts[:prefix] = prefix_append_array_index(opts[:prefix], '*', opts)

return [] if arraya.empty? || arrayb.empty?

a_start = b_start = 0
a_finish = arraya.size - 1
b_finish = arrayb.size - 1
Expand Down
15 changes: 7 additions & 8 deletions lib/hashdiff/util.rb
Original file line number Diff line number Diff line change
Expand Up @@ -7,14 +7,15 @@ module HashDiff
def self.similar?(obja, objb, options = {})
return compare_values(obja, objb, options) unless obja.is_a?(Array) || obja.is_a?(Hash) || objb.is_a?(Array) || objb.is_a?(Hash)

opts = { similarity: 0.8 }.merge(options)

count_a = count_nodes(obja)
count_b = count_nodes(objb)
diffs = count_diff diff(obja, objb, opts)

return true if (count_a + count_b).zero?

opts = { similarity: 0.8 }.merge!(options)

diffs = count_diff diff(obja, objb, opts)

(1 - diffs.to_f / (count_a + count_b).to_f) >= opts[:similarity]
end

Expand Down Expand Up @@ -83,8 +84,8 @@ def self.node(hash, parts)
#
# check for equality or "closeness" within given tolerance
def self.compare_values(obj1, obj2, options = {})
if (options[:numeric_tolerance].is_a? Numeric) &&
[obj1, obj2].all? { |v| v.is_a? Numeric }
if options[:numeric_tolerance].is_a?(Numeric) &&
obj1.is_a?(Numeric) && obj2.is_a?(Numeric)
return (obj1 - obj2).abs <= options[:numeric_tolerance]
end

Expand All @@ -105,9 +106,7 @@ def self.compare_values(obj1, obj2, options = {})
#
# check if objects are comparable
def self.comparable?(obj1, obj2, strict = true)
[Array, Hash].each do |type|
return true if obj1.is_a?(type) && obj2.is_a?(type)
end
return true if (obj1.is_a?(Array) || obj1.is_a?(Hash)) && obj2.is_a?(obj1.class)
return true if !strict && obj1.is_a?(Numeric) && obj2.is_a?(Numeric)

obj1.is_a?(obj2.class) && obj2.is_a?(obj1.class)
Expand Down
8 changes: 8 additions & 0 deletions spec/hash_diff/util_spec.rb
Original file line number Diff line number Diff line change
Expand Up @@ -20,6 +20,14 @@
described_class.similar?(a, b, similarity: 1).should be false
end

it 'is able to tell similiar empty hash' do
described_class.similar?({}, {}, 1).should be true
end

it 'is able to tell similiar empty array' do
described_class.similar?([], [], 1).should be true
end

it 'is able to tell similiar hash with values within tolerance' do
a = { 'a' => 1.5, 'b' => 2.25, 'c' => 3, 'd' => 4, 'e' => 5 }
b = { 'a' => 1.503, 'b' => 2.22, 'c' => 3, 'e' => 5 }
Expand Down

0 comments on commit 2040cc9

Please sign in to comment.