Skip to content

Commit

Permalink
cmd/server: re-order SDN names where surname precedes "first name"
Browse files Browse the repository at this point in the history
'Nicolas Maduro' is listed as 'MADURO MOROS, Nicolas' in the OFAC
list, but search queries would come in as 'Nicolas Maduro'. We need to
re-order names so that search queries match them better.

This matches 'Nicolas Maduro' against his OFAC name at 94.4%.
  • Loading branch information
adamdecaf committed Mar 7, 2019
1 parent fa9fb88 commit e7050ec
Show file tree
Hide file tree
Showing 2 changed files with 45 additions and 4 deletions.
26 changes: 25 additions & 1 deletion cmd/server/search.go
Original file line number Diff line number Diff line change
Expand Up @@ -7,7 +7,9 @@ package main
import (
"encoding/json"
"errors"
"fmt"
"net/http"
"regexp"
"strconv"
"strings"
"sync"
Expand Down Expand Up @@ -205,12 +207,34 @@ func precomputeSDNs(sdns []*ofac.SDN) []*SDN {
for i := range sdns {
out[i] = &SDN{
SDN: sdns[i],
name: precompute(sdns[i].SDNName),
name: precompute(reorderSDNName(sdns[i].SDNName, sdns[i].SDNType)),
}
}
return out
}

var (
surnamePrecedes = regexp.MustCompile(`(,\s?[a-zA-Z]*)$`)
)

// reorderSDNName will take a given SDN name and if it matches a specific pattern where
// the first name is placed after the last name (surname) to return a string where the first name
// preceedes the last.
//
// Example:
// SDN EntityID: 19147 has 'FELIX B. MADURO S.A.'
// SDN EntityID: 22790 has 'MADURO MOROS, Nicolas'
func reorderSDNName(name string, tpe string) string {
if !strings.EqualFold(tpe, "individual") {
return name // only reorder individual names
}
if v := surnamePrecedes.FindString(name); v == "" {
return name // no match on 'Doe, John'
} else {
return strings.TrimSpace(fmt.Sprintf("%s %s", strings.TrimPrefix(v, ","), strings.TrimSuffix(name, v)))
}
}

// Address is ofac.Address wrapped with precomputed search metadata
type Address struct {
Address *ofac.Address
Expand Down
23 changes: 20 additions & 3 deletions cmd/server/search_test.go
Original file line number Diff line number Diff line change
Expand Up @@ -127,6 +127,24 @@ func TestSearch_precompute(t *testing.T) {
}
}

func TestSearch_reorderSDNName(t *testing.T) {
cases := []struct {
input, expected string
}{
{"Jane Doe", "Jane Doe"}, // control
{"Jane, Doe Other", "Jane, Doe Other"}, // made up name to make sure we don't clobber ,'s in the middle of a name
{"FELIX B. MADURO S.A.", "FELIX B. MADURO S.A."}, // keep .'s in a name
{"MADURO MOROS, Nicolas", "Nicolas MADURO MOROS"},
{"IBRAHIM, Sadr", "Sadr IBRAHIM"},
}
for i := range cases {
guess := reorderSDNName(cases[i].input, "individual")
if guess != cases[i].expected {
t.Errorf("reorderSDNName(%q)=%q expected %q", cases[i].input, guess, cases[i].expected)
}
}
}

// TestSearch_liveData will download the real OFAC data and run searches against the corpus.
// This test is designed to tweak match percents and results.
func TestSearch_liveData(t *testing.T) {
Expand All @@ -146,15 +164,14 @@ func TestSearch_liveData(t *testing.T) {
name string
match float64 // top match %
}{
{"Jane Doe", 0.765}, // matches 'jan lahore'
{"Nicolas MADURO", 0.944},
}
for i := range cases {
sdns := searcher.TopSDNs(1, cases[i].name)
if len(sdns) == 0 {
t.Errorf("name=%q got no results", cases[i].name)
}
fmt.Printf("%q matches %q at %.2f\n", cases[i].name, sdns[0].name, sdns[0].match)
eql(t, "", sdns[0].match, cases[i].match)
eql(t, fmt.Sprintf("%q (SDN=%s) matches %q ", cases[i].name, sdns[0].SDN.EntityID, sdns[0].name), sdns[0].match, cases[i].match)
}
}

Expand Down

0 comments on commit e7050ec

Please sign in to comment.