This repository has been archived by the owner on Apr 7, 2021. It is now read-only.
-
Notifications
You must be signed in to change notification settings - Fork 5
/
find_overlapping_regions.rb
executable file
·96 lines (81 loc) · 2.78 KB
/
find_overlapping_regions.rb
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
# Script adds a table back to a running hbase.
# Currently only works on a copied aside table.
# You cannot parse arbitrary table name.
#
# To see usage for this script, run:
#
# ${HBASE_HOME}/bin/hbase org.jruby.Main addtable.rb
#
include Java
import org.apache.hadoop.hbase.util.Bytes
import org.apache.hadoop.hbase.HConstants
import org.apache.hadoop.hbase.HRegionInfo
import org.apache.hadoop.hbase.client.HTable
import org.apache.hadoop.hbase.client.Delete
import org.apache.hadoop.hbase.client.Put
import org.apache.hadoop.hbase.client.Scan
import org.apache.hadoop.hbase.HTableDescriptor
import org.apache.hadoop.hbase.HBaseConfiguration
import org.apache.hadoop.hbase.util.FSUtils
import org.apache.hadoop.hbase.util.Writables
import org.apache.hadoop.fs.Path
import org.apache.hadoop.fs.FileSystem
import org.apache.commons.logging.LogFactory
import org.apache.hadoop.io.WritableComparator
# Name of this script
NAME = "find_duplicate_regions"
# Print usage for this script
def usage
puts 'Usage: %s.rb TABLE_NAME' % NAME
exit!
end
# Get configuration to use.
c = HBaseConfiguration.new()
# Set hadoop filesystem configuration using the hbase.rootdir.
# Otherwise, we'll always use localhost though the hbase.rootdir
# might be pointing at hdfs location.
c.set("fs.default.name", c.get(HConstants::HBASE_DIR))
fs = FileSystem.get(c)
# Get a logger and a metautils instance.
LOG = LogFactory.getLog(NAME)
# Check arguments
if ARGV.size < 1 || ARGV.size > 2
usage
end
# Get cmdline args.
srcdir = fs.makeQualified(Path.new(java.lang.String.new(ARGV[0])))
# Get table name
tableName = nil
if ARGV.size > 1
tableName = ARGV[1]
raise IOError("Not supported yet")
elsif
# If none provided use dirname
tableName = srcdir.getName()
end
HTableDescriptor.isLegalTableName(tableName.to_java_bytes)
# Figure locations under hbase.rootdir
rootdir = Path.new('/hbase')
tableDir = fs.makeQualified(Path.new(rootdir, tableName))
# Clean mentions of table from .META.
LOG.info("Finding regions of " + tableName + " in .META.")
require 'set'
wanted_table = HTable.new(c, tableName)
keys = wanted_table.getStartEndKeys
start_keys = keys.first.map {|x| String.from_java_bytes x }
end_keys = keys.second .map {|x| String.from_java_bytes x }
previousEndKey = ""
previousKey = ""
start_keys.zip(end_keys).each do |startKey, endKey|
regionName = tableName + "," + startKey + "," + endKey
# If current start key is ordered before the last end key, these are overlapping!
if (startKey <=> previousEndKey) < 0 then
puts "These overlap: " + previousKey + ":" + regionName
end
# Inversly if it's after then there's a gap of missing keys
if (startKey <=> previousEndKey) > 0 then
puts "These have a gap between them: " + previousKey + ":" + regionName
end
previousEndKey = endKey
previousKey = regionName
end