Skip to content

Commit

Permalink
Add authors summaries #1043 #1107
Browse files Browse the repository at this point in the history
Signed-off-by: Philippe Ombredanne <pombredanne@nexb.com>
  • Loading branch information
pombredanne committed Jun 22, 2018
1 parent 78137a8 commit 1e75cec
Show file tree
Hide file tree
Showing 59 changed files with 221 additions and 10 deletions.
35 changes: 28 additions & 7 deletions src/summarycode/plugin_copyright_summary.py
Original file line number Diff line number Diff line change
Expand Up @@ -78,14 +78,15 @@ class CopyrightSummary(PostScanPlugin):
attributes = OrderedDict([
('copyrights_summary', attr.ib(default=attr.Factory(list))),
('holders_summary', attr.ib(default=attr.Factory(list))),
('authors_summary', attr.ib(default=attr.Factory(list))),
])

sort_order = 12

options = [
CommandLineOption(('--copyrights-summary',),
is_flag=True, default=False,
help='Summarize copyrights and holders at the file and '
help='Summarize copyrights, holders and authors at the file and '
'directory level.',
help_group=POST_SCAN_GROUP)
]
Expand All @@ -107,6 +108,10 @@ def process_codebase(self, codebase, copyrights_summary, **kwargs):
{"value": "nexB Inc. and others.", "count": 13},
{"value": "MyCo Inc. and others.", "count": 13}
],
"authors_summary": [
{"value": "nexB Inc. and others.", "count": 13},
{"value": "MyCo Inc. and others.", "count": 13}
],
"""

def _collect_existing_summary_text_objects(_summaries):
Expand All @@ -119,16 +124,17 @@ def _collect_existing_summary_text_objects(_summaries):

for resource in codebase.walk(topdown=False):
if not (hasattr(resource, 'copyrights')
and hasattr(resource, 'holders')):
and hasattr(resource, 'holders')
and hasattr(resource, 'authors')):
continue
copyrights_summary = []
holders_summary = []
authors_summary = []
try:
# 1. Collect statements from this file/resource if any.
# Collect values from this file/resource if any.
copyrights_summary = [entry.get('value') for entry in resource.copyrights]

# 2. Collect holders from this file/resource if any.
holders_summary = [entry.get('value', []) for entry in resource.holders]
authors_summary = [entry.get('value') for entry in resource.authors]

if TRACE_DEEP:
logger_debug('process_codebase:1:from self:copyrights_summary:')
Expand All @@ -139,12 +145,19 @@ def _collect_existing_summary_text_objects(_summaries):
for s in holders_summary:
logger_debug(' ', s)

# 3. Collect direct children pre-summarized Texts
logger_debug('process_codebase:1:from self:authors_summary:')
for s in authors_summary:
logger_debug(' ', s)


# Collect direct children pre-summarized Texts
for child in resource.children(codebase):
copyrights_summary.extend(
_collect_existing_summary_text_objects(child.copyrights_summary))
holders_summary.extend(
_collect_existing_summary_text_objects(child.holders_summary))
authors_summary.extend(
_collect_existing_summary_text_objects(child.authors_summary))

if TRACE_DEEP:
logger_debug('process_codebase:2:self+children:copyrights_summary:')
Expand All @@ -155,17 +168,25 @@ def _collect_existing_summary_text_objects(_summaries):
for s in holders_summary:
logger_debug(' ', s)

logger_debug('process_codebase:2:self+children:authors_summary:')
for s in authors_summary:
logger_debug(' ', s)

# 3. summarize proper and save: expansion, cleaning and deduplication
summarized_copyright = summarize_copyrights(copyrights_summary, ignore_years=True)
summarized_holder = summarize_holders(holders_summary, expand=False)
summarized_authors = summarize_holders(authors_summary, expand=False)
resource.copyrights_summary = summarized_copyright
resource.holders_summary = summarized_holder
resource.authors_summary = summarized_authors
codebase.save_resource(resource)

except Exception as _e:
msg = 'Failed to create copyrights_summary or holders_summary for resource:\n{}\n'.format(repr(resource))
msg = 'Failed to create copyrights, authors or holders summary '
'for resource:\n{}\n'.format(repr(resource))
msg += 'with copyrights_summary:{}\n'.format(repr(copyrights_summary))
msg += 'with holders_summary:{}\n'.format(repr(holders_summary))
msg += 'with authors_summary:{}\n'.format(repr(authors_summary))
import traceback
msg += traceback.format_exc()
raise Exception(msg)
Expand Down
10 changes: 9 additions & 1 deletion tests/cluecode/cluecode_assert_utils.py
Original file line number Diff line number Diff line change
Expand Up @@ -79,6 +79,7 @@ class CopyrightTest(object):

holders_summary = attr.ib(default=attr.Factory(list))
copyrights_summary = attr.ib(default=attr.Factory(list))
authors_summary = attr.ib(default=attr.Factory(list))

expected_failures = attr.ib(default=attr.Factory(list))
notes = attr.ib(default=None)
Expand All @@ -99,10 +100,12 @@ def __attrs_post_init__(self, *args, **kwargs):
for holders_sum in self.holders_summary:
holders_sum['count'] = int(holders_sum['count'])

# fix counts to be ints: sane yaml loads everything as string
for copyrs_sum in self.copyrights_summary:
copyrs_sum['count'] = int(copyrs_sum['count'])

for auths_sum in self.authors_summary:
auths_sum['count'] = int(auths_sum['count'])

def to_dict(self):
"""
Serialize self to an ordered mapping.
Expand Down Expand Up @@ -236,12 +239,17 @@ def closure_test_function(*args, **kwargs):
if 'copyrights_summary' in test.what:
copyrights_summary = summarize_copyrights(copyrights)

authors_summary = []
if 'authors_summary' in test.what:
authors_summary = summarize_holders(authors)

results = dict(
copyrights=copyrights,
authors=authors,
holders=holders,
holders_summary=holders_summary,
copyrights_summary=copyrights_summary,
authors_summary=authors_summary,
)

if regen:
Expand Down
4 changes: 4 additions & 0 deletions tests/cluecode/data/authors/author-config.rpath.yml
Original file line number Diff line number Diff line change
@@ -1,4 +1,8 @@
what:
- authors
- authors_summary
authors:
- Gordon Matzigkeit <gord@gnu.ai.mit.edu>
authors_summary:
- value: Gordon Matzigkeit <gord@gnu.ai.mit.edu>
count: 1
4 changes: 4 additions & 0 deletions tests/cluecode/data/authors/author_addr_c-addr_c.c.yml
Original file line number Diff line number Diff line change
@@ -1,4 +1,8 @@
what:
- authors
- authors_summary
authors:
- John Doe
authors_summary:
- value: John Doe
count: 1
Original file line number Diff line number Diff line change
@@ -1,5 +1,9 @@
what:
- authors
- authors_summary
authors:
- Avinash Kak (kak@purdue.edu)
- Avinash Kak (kak@purdue.edu)
authors_summary:
- value: Avinash Kak (kak@purdue.edu)
count: 2
Original file line number Diff line number Diff line change
@@ -1,5 +1,9 @@
what:
- authors
- authors_summary
authors:
- Avinash Kak (kak@purdue.edu)
- Avinash Kak (kak@purdue.edu)
authors_summary:
- value: Avinash Kak (kak@purdue.edu)
count: 2
Original file line number Diff line number Diff line change
@@ -1,4 +1,8 @@
what:
- authors
- authors_summary
authors:
- the University of California, Berkeley and its contributors.
authors_summary:
- value: the University of California, Berkeley and its contributors.
count: 1
Original file line number Diff line number Diff line change
@@ -1,2 +1,3 @@
what:
- authors
- authors_summary
Original file line number Diff line number Diff line change
@@ -1,2 +1,3 @@
what:
- authors
- authors_summary
Original file line number Diff line number Diff line change
@@ -1,2 +1,3 @@
what:
- authors
- authors_summary
Original file line number Diff line number Diff line change
@@ -1,4 +1,8 @@
what:
- authors
- authors_summary
authors:
- Gordon Matzigkeit
authors_summary:
- value: Gordon Matzigkeit
count: 1
Original file line number Diff line number Diff line change
@@ -1,5 +1,11 @@
what:
- authors
- authors_summary
authors:
- Mohit Agarwal.
- Mitsubishi and NTT.
authors_summary:
- value: Mitsubishi and NTT.
count: 1
- value: Mohit Agarwal.
count: 1
1 change: 1 addition & 0 deletions tests/cluecode/data/authors/author_expat-expat_h.h.yml
Original file line number Diff line number Diff line change
@@ -1,2 +1,3 @@
what:
- authors
- authors_summary
Original file line number Diff line number Diff line change
@@ -1,4 +1,8 @@
what:
- authors
- authors_summary
authors:
- Gary O'Neall
authors_summary:
- value: Gary O'Neall
count: 1
6 changes: 6 additions & 0 deletions tests/cluecode/data/authors/author_gthomas_c-c.c.yml
Original file line number Diff line number Diff line change
@@ -1,5 +1,11 @@
what:
- authors
- authors_summary
authors:
- gthomas, sorin@netappi.com
- gthomas, sorin@netappi.com, andrew.lunn@ascom.ch
authors_summary:
- value: gthomas, sorin@netappi.com, andrew.lunn@ascom.ch
count: 1
- value: gthomas, sorin@netappi.com
count: 1
Original file line number Diff line number Diff line change
@@ -1,4 +1,8 @@
what:
- authors
- authors_summary
authors:
- Scott Violet
authors_summary:
- value: Scott Violet
count: 1
4 changes: 4 additions & 0 deletions tests/cluecode/data/authors/author_in_java_tag-java.java.yml
Original file line number Diff line number Diff line change
@@ -1,4 +1,8 @@
what:
- authors
- authors_summary
authors:
- Apple Banana Car
authors_summary:
- value: Apple Banana Car
count: 1
Original file line number Diff line number Diff line change
@@ -1,2 +1,3 @@
what:
- authors
- authors_summary
Original file line number Diff line number Diff line change
@@ -1,2 +1,3 @@
what:
- authors
- authors_summary
4 changes: 4 additions & 0 deletions tests/cluecode/data/authors/author_iproute2.c.yml
Original file line number Diff line number Diff line change
@@ -1,4 +1,8 @@
what:
- authors
- authors_summary
authors:
- Patrick McHardy <kaber@trash.net>
authors_summary:
- value: Patrick McHardy <kaber@trash.net>
count: 1
5 changes: 3 additions & 2 deletions tests/cluecode/data/authors/author_nathan-KEYS.yml
Original file line number Diff line number Diff line change
@@ -1,6 +1,7 @@
what:
- authors
- authors_summary
notes: |-
name +email is not enough to create an author
hence this is not detected correctly
Nathan Mittler <nathan.mittler@gmail.com>
hence this is not detected correctly
Nathan Mittler <nathan.mittler@gmail.com>
1 change: 1 addition & 0 deletions tests/cluecode/data/authors/author_no_author-c.c.yml
Original file line number Diff line number Diff line change
@@ -1,2 +1,3 @@
what:
- authors
- authors_summary
1 change: 1 addition & 0 deletions tests/cluecode/data/authors/author_none-wrong.yml
Original file line number Diff line number Diff line change
@@ -1,2 +1,3 @@
what:
- authors
- authors_summary
1 change: 1 addition & 0 deletions tests/cluecode/data/authors/author_none_c-c.c.yml
Original file line number Diff line number Diff line change
@@ -1,2 +1,3 @@
what:
- authors
- authors_summary
Original file line number Diff line number Diff line change
@@ -1,2 +1,3 @@
what:
- authors
- authors_summary
Original file line number Diff line number Diff line change
@@ -1,2 +1,3 @@
what:
- authors
- authors_summary
Original file line number Diff line number Diff line change
@@ -1,2 +1,3 @@
what:
- authors
- authors_summary
Original file line number Diff line number Diff line change
@@ -1,2 +1,3 @@
what:
- authors
- authors_summary
1 change: 1 addition & 0 deletions tests/cluecode/data/authors/author_russ_c-c.c.yml
Original file line number Diff line number Diff line change
@@ -1,4 +1,5 @@
what:
- authors
- authors_summary
notes: these are detected as copyrights, not authors Russ Dill <Russ.Dill@asu.edu> Vladimir
Oleynik <dzo@simtreas.ru>
1 change: 1 addition & 0 deletions tests/cluecode/data/authors/author_sample-c.c.yml
Original file line number Diff line number Diff line change
@@ -1,2 +1,3 @@
what:
- authors
- authors_summary
1 change: 1 addition & 0 deletions tests/cluecode/data/authors/author_samplepy-py.py.yml
Original file line number Diff line number Diff line change
@@ -1,2 +1,3 @@
what:
- authors
- authors_summary
1 change: 1 addition & 0 deletions tests/cluecode/data/authors/author_snippet.yml
Original file line number Diff line number Diff line change
@@ -1,2 +1,3 @@
what:
- authors
- authors_summary
Original file line number Diff line number Diff line change
@@ -1,4 +1,8 @@
what:
- authors
- authors_summary
authors:
- faith@dict.org
authors_summary:
- value: faith@dict.org
count: 1
Original file line number Diff line number Diff line change
@@ -1,5 +1,11 @@
what:
- authors
- authors_summary
authors:
- Philip Milne
- Scott Violet
authors_summary:
- value: Philip Milne
count: 1
- value: Scott Violet
count: 1
6 changes: 6 additions & 0 deletions tests/cluecode/data/authors/author_uc-LICENSE.yml
Original file line number Diff line number Diff line change
@@ -1,6 +1,12 @@
what:
- authors
- authors_summary
authors:
- the University of California, Berkeley and its contributors.
- UC Berkeley and its contributors.
- the University of California, Berkeley and its contributors.
authors_summary:
- value: the University of California, Berkeley and its contributors.
count: 2
- value: UC Berkeley and its contributors.
count: 1
Original file line number Diff line number Diff line change
@@ -1,6 +1,14 @@
what:
- authors
- authors_summary
authors:
- Erik Schoenfelder (schoenfr@ibr.cs.tu-bs.de)
- Jennifer Bray of Origin (jbray@origin-at.co.uk)
- Simon Leinen (simon@switch.ch)
authors_summary:
- value: Erik Schoenfelder (schoenfr@ibr.cs.tu-bs.de)
count: 1
- value: Jennifer Bray of Origin (jbray@origin-at.co.uk)
count: 1
- value: Simon Leinen (simon@switch.ch)
count: 1
Original file line number Diff line number Diff line change
@@ -1,2 +1,3 @@
what:
- authors
- authors_summary
4 changes: 4 additions & 0 deletions tests/cluecode/data/authors/configure.yml
Original file line number Diff line number Diff line change
@@ -1,4 +1,8 @@
what:
- authors
- authors_summary
authors:
- Gordon Matzigkeit <gord@gnu.ai.mit.edu>
authors_summary:
- value: Gordon Matzigkeit <gord@gnu.ai.mit.edu>
count: 1
1 change: 1 addition & 0 deletions tests/cluecode/data/authors/cvs_keywords.yml
Original file line number Diff line number Diff line change
@@ -1,2 +1,3 @@
what:
- authors
- authors_summary
Loading

0 comments on commit 1e75cec

Please sign in to comment.