Skip to content

Commit

Permalink
Add various data dump utilities
Browse files Browse the repository at this point in the history
  • Loading branch information
ShaneCurcuru committed Feb 17, 2024
1 parent b8d26b6 commit cd92c05
Showing 1 changed file with 116 additions and 6 deletions.
122 changes: 116 additions & 6 deletions assets/ruby/foundation_reporter.rb
Original file line number Diff line number Diff line change
Expand Up @@ -9,6 +9,7 @@ module FoundationReporter
require 'csv'
require 'pathname'
require '../propublica990/propublica990'
require 'optparse'

# Get a list of all foundation files
# @param dir pointing to _foundations
Expand All @@ -22,6 +23,18 @@ def get_foundations(dir)
return foundations
end

# Get a list of all yaml data files of a type
# @param dir pointing to _foundations, _sponsorships, etc.
# @return hash of org => yaml frontmatter hash
def get_yamldataset(dir)
datasets = {}
Pathname.glob(File.join(dir, "*.md")) do |file|
dataset = YAML.load_file(file)
datasets[dataset['identifier']] = dataset
end
return datasets
end

# Get a list of all available EINs (for US-based Nonprofit501c* only).
# @param dir pointing to _foundations
# @return array of strings of EINs
Expand All @@ -44,15 +57,112 @@ def get_eins(dir)
end
return eins
end

# Gather statistics on a single field in all foundations
def gather_field(dir, onefield)
orgs = {}
vals = Hash.new{ | h, k | h[k] = [] }
output = {'orgs' => orgs, 'vals' => vals}
Dir["#{dir}/*.md"].each do |f|
begin
org = YAML.load_file(f)
data = org.fetch(onefield, nil)
if data
identifier = File.basename(f, '.md')
orgs[identifier] = data
vals[data] << identifier
end
rescue StandardError => e
puts "ERROR #{e}"
next # Silently ignore errors
end
end
return output
end

# Report on field usage
def field_usage(dataset)
report = {}
report['orgs'] = []
report['fieldcount'] = Hash.new(0)
dataset.each do | org, hash |
report['orgs'] << org
hash.each do | k, v |
report['fieldcount'][k] += 1 if v
end
end
report['fieldcount'] = Hash[report['fieldcount'].sort_by { |k, v| -v }]
return report
end

# ## ### #### ##### ######
# Check commandline options
def parse_commandline
options = {}
OptionParser.new do |opts|
opts.on('-h', '--help') { puts "#{DESCRIPTION}\n#{opts}"; exit }
opts.on('-oOUTFILE', '--out OUTFILE', 'Output filename for operation') do |out|
options[:out] = out
end
opts.on('-iINFILE', '--in INFILE', 'Input filename for operation') do |infile|
options[:infile] = infile
end
opts.on('-fFIELDNAME', '--field FIELDNAME', 'Single field name to report out for all foundations.') do |onefield|
options[:onefield] = onefield
end
opts.on('-cTYPE', '--count TYPE', 'Count field usage of all data files of a type.') do |ctype|
options[:ctype] = ctype
end
opts.on('-rREPORT', '--report REPORT', 'Output default reports.') do |reports|
options[:reports] = reports
end
begin
opts.parse!
rescue OptionParser::ParseError => e
$stderr.puts e
$stderr.puts opts
exit 1
end
end
return options
end
end

# ### #### ##### ######
# Main method for command line use
if __FILE__ == $PROGRAM_NAME
foundation_dir = File.join(Dir.pwd, '_foundations')
report_dir = File.join(Dir.pwd, '_data/p990')
eins = FoundationReporter.get_eins(foundation_dir)
orgs = Propublica990.get_orgs(eins, report_dir)
report_csv = File.join(report_dir, "foundations_990_common.csv")
Propublica990.orgs2csv_common(orgs, report_csv)
options = FoundationReporter.parse_commandline
options[:outfile] ||= 'foundation_reporter.json'

dirs = {
'foundations' => File.join(Dir.pwd, '_foundations'),
'sponsorships' => File.join(Dir.pwd, '_sponsorships'),
'entities' => File.join(Dir.pwd, '_entities'),
'p990' => File.join(Dir.pwd, '_data/p990')
}

ctype = options.fetch(:ctype, nil)
if ctype
output = {}
dataset = FoundationReporter.get_yamldataset(dirs[ctype])
output = FoundationReporter.field_usage(dataset)
puts JSON.pretty_generate(output)
end

ctype ||= 'foundations'
onefield = options.fetch(:onefield, nil)
if onefield
output = {}
output = FoundationReporter.gather_field(dirs[ctype], onefield)
puts JSON.pretty_generate(output)
end

reports = options.fetch(:reports, nil)
if reports

eins = FoundationReporter.get_eins(foundation_dir)
orgs = Propublica990.get_orgs(eins, p990_dir)
report_csv = File.join(p990_dir, options[:outfile])
Propublica990.orgs2csv_common(orgs, report_csv)
end
end

0 comments on commit cd92c05

Please sign in to comment.