This repository has been archived by the owner on Apr 7, 2021. It is now read-only.
-
Notifications
You must be signed in to change notification settings - Fork 5
/
find_overlapping_regions_meta.rb
105 lines (90 loc) · 3.26 KB
/
find_overlapping_regions_meta.rb
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
# Script find overlapping regions using .META.
#
# It outputs the region names in the format "<tableName>,<startKey>,<encodedName>" suitable for use
# with: org.apache.hadoop.hbase.util.Merge
#
# ${HBASE_HOME}/bin/hbase org.jruby.Main find_overlapping_regions_meta.rb
#
include Java
import java.lang.Integer
import org.apache.hadoop.hbase.util.Bytes
import org.apache.hadoop.hbase.HConstants
import org.apache.hadoop.hbase.HRegionInfo
import org.apache.hadoop.hbase.client.HTable
import org.apache.hadoop.hbase.client.Delete
import org.apache.hadoop.hbase.client.Put
import org.apache.hadoop.hbase.client.Scan
import org.apache.hadoop.hbase.HTableDescriptor
import org.apache.hadoop.hbase.HBaseConfiguration
import org.apache.hadoop.hbase.util.FSUtils
import org.apache.hadoop.hbase.util.Writables
import org.apache.hadoop.fs.Path
import org.apache.hadoop.fs.FileSystem
import org.apache.commons.logging.LogFactory
import org.apache.hadoop.io.WritableComparator
# Name of this script
NAME = "find_overlapping_regions_meta"
# Print usage for this script
def usage
puts 'Usage: %s.rb' % NAME
exit!
end
# Get configuration to use.
c = HBaseConfiguration.new()
# Set hadoop filesystem configuration using the hbase.rootdir.
# Otherwise, we'll always use localhost though the hbase.rootdir
# might be pointing at hdfs location.
c.set("fs.default.name", c.get(HConstants::HBASE_DIR))
fs = FileSystem.get(c)
# Get a logger and a metautils instance.
LOG = LogFactory.getLog(NAME)
# Check arguments
if ARGV.size > 0
usage
end
# Get metatable name
metaName = ".META."
LOG.info("Finding overlapping regions and gaps in .META.")
require 'set'
metatable = HTable.new(c, metaName)
scan = Scan.new()
scanner = metatable.getScanner(scan)
oldHRITableName = nil
previousEndKey = ""
previousRegionName = ""
numOverlaps = 0
numGaps = 0
numRegions = 0
while (result = scanner.next())
numRegions = numRegions + 1
rowid = Bytes.toString(result.getRow())
rowidStr = java.lang.String.new(rowid)
bytes = result.getValue(HConstants::CATALOG_FAMILY, HConstants::REGIONINFO_QUALIFIER)
hri = Writables.getHRegionInfo(bytes)
hriTableName = hri.getTableDesc().getNameAsString()
if oldHRITableName
startKey = String.from_java_bytes hri.getStartKey()
endKey = String.from_java_bytes hri.getEndKey()
regionName = hri.getRegionNameAsString()
if hriTableName == oldHRITableName
# If current start key is ordered before the last end key, these are overlapping!
if (startKey <=> previousEndKey) < 0 then
puts "Overlap: " + previousRegionName + " " + regionName
puts "End keys - previous: " + previousEndKey + ", current: " + endKey
numOverlaps = numOverlaps + 1
end
# Conversely if it's after then there's a gap of missing keys
if (startKey <=> previousEndKey) > 0 then
puts "Gap: " + previousRegionName + " " + regionName
puts "End keys - previous: " + previousEndKey + ", current: " + endKey
numGaps = numGaps + 1
end
end
previousEndKey = endKey
previousRegionName = regionName
end
oldHRITableName = hriTableName
end
puts "Number of overlaps found: " + Integer.toString(numOverlaps)
puts "Number of gaps found: " + Integer.toString(numGaps)
puts "Number of regions found in user tables: " + Integer.toString(numRegions)