diff --git a/Test/org/spdx/compare/TestCompareTemplateOutputHandler.java b/Test/org/spdx/compare/TestCompareTemplateOutputHandler.java index 2bce0bc9..9ed4501f 100644 --- a/Test/org/spdx/compare/TestCompareTemplateOutputHandler.java +++ b/Test/org/spdx/compare/TestCompareTemplateOutputHandler.java @@ -63,9 +63,10 @@ public void tearDown() throws Exception { /** * Test method for {@link org.spdx.compare.CompareTemplateOutputHandler#CompareTemplateOutputHandler(java.lang.String)}. + * @throws Exception */ @Test - public void testCompareTemplateOutputHandler() { + public void testCompareTemplateOutputHandler() throws Exception { CompareTemplateOutputHandler ctoh = new CompareTemplateOutputHandler("test"); assertTrue(ctoh.matches()); } @@ -75,7 +76,7 @@ public void testCompareTemplateOutputHandler() { * @throws LicenseTemplateRuleException */ @Test - public void testOptionalText() throws LicenseTemplateRuleException { + public void testOptionalText() throws Exception { String l1 = "Line 1\n"; String l2 = "Line 2\n"; String l3 = "Line 3\n"; @@ -127,7 +128,7 @@ public void testOptionalText() throws LicenseTemplateRuleException { * Test method for {@link org.spdx.compare.CompareTemplateOutputHandler#textEquivalent(java.lang.String)}. */ @Test - public void testTextEquivalent() { + public void testTextEquivalent() throws Exception { String l1 = "Line 1 with // skippable ## /** stuff\n"; String l1S = "Line 1 with skippable stuff\n"; String l2 = "## Line 2 with replaceable analogue cancelled stuff\n"; @@ -170,7 +171,7 @@ public void testTextEquivalent() { * Test method for {@link org.spdx.compare.CompareTemplateOutputHandler#normalText(java.lang.String)}. */ @Test - public void testNormalText() { + public void testNormalText() throws Exception { String line1 = "this is line one\n"; String line2 = "this line 2 is another line\n"; String line3 = "yet another third line\n"; @@ -213,7 +214,7 @@ public void testNormalText() { * @throws LicenseTemplateRuleException */ @Test - public void testVariableRule() throws LicenseTemplateRuleException { + public void testVariableRule() throws Exception { String line1 = "this is line one\n"; String line2 = "this line 2 is another line\n"; String line2Match = "this\\sline\\s.+another\\sline"; diff --git a/src/org/spdx/compare/CompareTemplateOutputHandler.java b/src/org/spdx/compare/CompareTemplateOutputHandler.java index 9d3c388e..fa1dadab 100644 --- a/src/org/spdx/compare/CompareTemplateOutputHandler.java +++ b/src/org/spdx/compare/CompareTemplateOutputHandler.java @@ -16,6 +16,14 @@ */ package org.spdx.compare; +import java.io.BufferedReader; +import java.io.IOException; +import java.io.StringReader; +import java.util.ArrayList; +import java.util.Collections; +import java.util.HashMap; +import java.util.List; +import java.util.Map; import java.util.regex.Matcher; import java.util.regex.Pattern; @@ -31,21 +39,130 @@ public class CompareTemplateOutputHandler implements ILicenseTemplateOutputHandler { + class LineColumn { + private int line; + private int column; + private int len; + + public LineColumn(int line, int column,int len) { + this.line = line; + this.column = column; + } + + public int getLine() { + return line; + } + + public void setLine(int line) { + this.line = line; + } + + public int getColumn() { + return column; + } + + public void setColumn(int column) { + this.column = column; + } + + public int getLen() { + return len; + } + + public void setLen(int len) { + this.len = len; + } + } + + class DifferenceDescription { + private boolean differenceFound; + private String differenceMessage; + private List differences; + + public DifferenceDescription(boolean differenceFound, String differenceMessage, List differences) { + this.differenceFound = differenceFound; + this.differenceMessage = differenceMessage; + this.differences = differences; + } + + public boolean isDifferenceFound() { + return differenceFound; + } + + public void setDifferenceFound(boolean differenceFound) { + this.differenceFound = differenceFound; + } + + public String getDifferenceMessage() { + return differenceMessage; + } + + public void setDifferenceMessage(String differenceMessage) { + this.differenceMessage = differenceMessage; + } + + public List getDifferences() { + return differences; + } + + public void setDifferences(List differences) { + this.differences = differences; + } + + } + String compareText = ""; boolean differenceFound = false; String[] compareTokens = new String[0]; int compareTokenCounter = 0; String nextCompareToken = null; String differenceExplanation = "No difference found"; + List differences = new ArrayList(); StringBuilder optionalText = new StringBuilder(); + Map tokenToLocation = new HashMap(); /** * @param compareText Text to compare the parsed SPDX license template to + * @throws IOException This is not to be expected since we are using StringReaders */ - public CompareTemplateOutputHandler(String compareText) { + public CompareTemplateOutputHandler(String compareText) throws IOException { this.compareText = compareText; - this.compareTokens = this.compareText.split(LicenseCompareHelper.TOKEN_DELIM); + List tokens = new ArrayList(); + BufferedReader reader = null; + try { + reader = new BufferedReader(new StringReader(compareText)); + int currentLine = 1; + int currentToken = 0; + String line = reader.readLine(); + Pattern delimPattern = Pattern.compile(LicenseCompareHelper.TOKEN_DELIM); + while (line != null) { + Matcher lineMatcher = delimPattern.matcher(line); + int lastColMatched = 0; + while (lineMatcher.find()) { + String token = line.substring(lastColMatched, lineMatcher.start()); + if (token.length() > 0) { + tokens.add(token); + tokenToLocation.put(currentToken, new LineColumn(currentLine, lastColMatched, token.length())); + currentToken++; + } + lastColMatched = lineMatcher.end(); + } + if (lastColMatched < line.length()) { + String token = line.substring(lastColMatched, line.length()); + tokens.add(token); + tokenToLocation.put(currentToken, new LineColumn(currentLine, lastColMatched, token.length())); + currentToken++; + } + currentLine++; + line = reader.readLine(); + } + } finally { + if (reader != null) { + reader.close(); + } + } + this.compareTokens = tokens.toArray(new String[tokens.size()]); compareTokenCounter = 0; nextCompareToken = LicenseCompareHelper.getTokenAt(compareTokens, compareTokenCounter++); } @@ -112,14 +229,23 @@ public void normalText(String text) { if (!textEquivalent(text)) { this.differenceFound = true; if (this.nextCompareToken == null) { - this.differenceExplanation = "End of compare text encountered before the end of the license template"; + LineColumn lastLineColumn = tokenToLocation.get(this.compareTokens.length-1); + // create a zero length location at the end of the file + addDifference("End of compare text encountered before the end of the license template", + new LineColumn(lastLineColumn.getLine(), lastLineColumn.getColumn()+lastLineColumn.getLen(),0)); } else { - this.differenceExplanation = "Difference found starting at token #"+ - String.valueOf(this.compareTokenCounter)+"\""+ - this.nextCompareToken+"\"."; + addDifference("Difference found in normal text",tokenToLocation.get(this.compareTokenCounter)); } } } + + private void addDifference(String msg, LineColumn location) { + this.differenceExplanation = msg + " starting at line #"+ + String.valueOf(location.getLine())+ " column #" + + String.valueOf(location.getColumn())+"\""+ + this.nextCompareToken+"\"."; + this.differences.add(location); + } /* (non-Javadoc) * @see org.spdx.licenseTemplate.ILicenseTemplateOutputHandler#variableRule(org.spdx.licenseTemplate.LicenseTemplateRule) @@ -134,18 +260,14 @@ public void variableRule(LicenseTemplateRule rule) { Matcher matcher = matchPattern.matcher(remainingText); if (!matcher.find()) { this.differenceFound = true; - this.differenceExplanation = "Variable text rule "+rule.getName()+ - " did not match the compare text starting at token #"+ - String.valueOf(this.compareTokenCounter)+"\""+ - this.nextCompareToken+"\"."; + addDifference("Variable text rule "+rule.getName()+" did not match the compare text", + tokenToLocation.get(this.compareTokenCounter)); } else if (matcher.start() > 0) { this.differenceFound = true; - this.differenceExplanation = "Extra text \""+ + addDifference("Extra text \""+ remainingText.substring(0, matcher.start()) + - "\" found before the variable text rule "+rule.getName()+ - " starting at token #"+ - String.valueOf(this.compareTokenCounter)+"\""+ - this.nextCompareToken+"\"."; + "\" found before the variable text rule "+rule.getName(), + tokenToLocation.get(this.compareTokenCounter)); } else { // advance the token counter String textAfterMatch = remainingText.substring(matcher.end()).trim(); @@ -164,7 +286,8 @@ public void variableRule(LicenseTemplateRule rule) { if (!this.nextCompareToken.equals(tokensAfterMatch[0]) && (tokensAfterMatch.length > 1 && !this.nextCompareToken.equals(tokensAfterMatch[1]))) { this.differenceFound = true; - this.differenceExplanation = "Missmatched text found after end of variable rule" + rule.getName(); + addDifference("Missmatched text found after end of variable rule" + rule.getName(), + tokenToLocation.get(this.nextCompareToken)); } } } @@ -212,11 +335,14 @@ public void endOptional(LicenseTemplateRule rule) { String saveNextComparisonToken = nextCompareToken; int saveCompareTokenCounter = compareTokenCounter; String saveDifferenceExplanation = this.differenceExplanation; + List saveDifferences = new ArrayList(); + Collections.copy(saveDifferences, this.differences); if (!textEquivalent(this.optionalText.toString())) { // reset counters this.nextCompareToken = saveNextComparisonToken; this.compareTokenCounter = saveCompareTokenCounter; this.differenceExplanation = saveDifferenceExplanation; + Collections.copy(this.differences, saveDifferences); } } @@ -226,5 +352,12 @@ public void endOptional(LicenseTemplateRule rule) { public boolean matches() { return !differenceFound; } + + /** + * @return details on the differences found + */ + public DifferenceDescription getDifferences() { + return new DifferenceDescription(differenceFound, this.differenceExplanation, this.differences); + } } diff --git a/src/org/spdx/compare/LicenseCompareHelper.java b/src/org/spdx/compare/LicenseCompareHelper.java index 9cb0a213..ff4a7e67 100644 --- a/src/org/spdx/compare/LicenseCompareHelper.java +++ b/src/org/spdx/compare/LicenseCompareHelper.java @@ -17,11 +17,13 @@ */ package org.spdx.compare; +import java.io.IOException; import java.util.List; import java.util.Map; import java.util.regex.Matcher; import java.util.regex.Pattern; +import org.spdx.compare.CompareTemplateOutputHandler.DifferenceDescription; import org.spdx.licenseTemplate.LicenseTemplateRuleException; import org.spdx.licenseTemplate.SpdxLicenseTemplateHelper; import org.spdx.rdfparser.InvalidSPDXAnalysisException; @@ -50,53 +52,56 @@ public class LicenseCompareHelper { .add("//").add("/*").add("*/").add("/**").add("#").add("##") .add("*").add("\"\"\"").add("=begin").add("=end").build(); - protected static final Map EQUIV_TOKENS = Maps.newHashMap(); + protected static final Map NORMALIZE_TOKENS = Maps.newHashMap(); static { //TODO: These should be moved to a property file - EQUIV_TOKENS.put("acknowledgement","acknowledgment"); EQUIV_TOKENS.put("acknowledgment","acknowledgement"); - EQUIV_TOKENS.put("analog","analogue"); EQUIV_TOKENS.put("analogue","analog"); - EQUIV_TOKENS.put("analyze","analyse"); EQUIV_TOKENS.put("analyse","analyze"); - EQUIV_TOKENS.put("artifact","artefact"); EQUIV_TOKENS.put("artefact","artifact"); - EQUIV_TOKENS.put("authorization","authorisation"); EQUIV_TOKENS.put("authorisation","authorization"); - EQUIV_TOKENS.put("authorized","authorised"); EQUIV_TOKENS.put("authorised","authorized"); - EQUIV_TOKENS.put("caliber","calibre"); EQUIV_TOKENS.put("calibre","caliber"); - EQUIV_TOKENS.put("canceled","cancelled"); EQUIV_TOKENS.put("cancelled","canceled"); - EQUIV_TOKENS.put("apitalizations","apitalisations"); EQUIV_TOKENS.put("apitalisations","apitalizations"); - EQUIV_TOKENS.put("catalog","catalogue"); EQUIV_TOKENS.put("catalogue","catalog"); - EQUIV_TOKENS.put("categorize","categorise"); EQUIV_TOKENS.put("categorise","categorize"); - EQUIV_TOKENS.put("center","centre"); EQUIV_TOKENS.put("centre","center"); - EQUIV_TOKENS.put("emphasized","emphasised"); EQUIV_TOKENS.put("emphasised","emphasized"); - EQUIV_TOKENS.put("favor","favour"); EQUIV_TOKENS.put("favour","favor"); - EQUIV_TOKENS.put("favorite","favourite"); EQUIV_TOKENS.put("favourite","favorite"); - EQUIV_TOKENS.put("fulfill","fulfil"); EQUIV_TOKENS.put("fulfil","fulfill"); - EQUIV_TOKENS.put("fulfillment","fulfilment"); EQUIV_TOKENS.put("fulfilment","fulfillment"); - EQUIV_TOKENS.put("initialize","initialise"); EQUIV_TOKENS.put("initialise","initialize"); - EQUIV_TOKENS.put("judgement","judgment"); EQUIV_TOKENS.put("judgment","judgement"); - EQUIV_TOKENS.put("labeling","labelling"); EQUIV_TOKENS.put("labelling","labeling"); - EQUIV_TOKENS.put("labor","labour"); EQUIV_TOKENS.put("labour","labor"); - EQUIV_TOKENS.put("license","licence"); EQUIV_TOKENS.put("licence","license"); - EQUIV_TOKENS.put("maximize","maximise"); EQUIV_TOKENS.put("maximise","maximize"); - EQUIV_TOKENS.put("modeled","modelled"); EQUIV_TOKENS.put("modelled","modeled"); - EQUIV_TOKENS.put("modeling","modelling"); EQUIV_TOKENS.put("modelling","modeling"); - EQUIV_TOKENS.put("offense","offence"); EQUIV_TOKENS.put("offence","offense"); - EQUIV_TOKENS.put("optimize","optimise"); EQUIV_TOKENS.put("optimise","optimize"); - EQUIV_TOKENS.put("organization","organisation"); EQUIV_TOKENS.put("organisation","organization"); - EQUIV_TOKENS.put("organize","organise"); EQUIV_TOKENS.put("organise","organize"); - EQUIV_TOKENS.put("practice","practise"); EQUIV_TOKENS.put("practise","practice"); - EQUIV_TOKENS.put("program","programme"); EQUIV_TOKENS.put("programme","program"); - EQUIV_TOKENS.put("realize","realise"); EQUIV_TOKENS.put("realise","realize"); - EQUIV_TOKENS.put("recognize","recognise"); EQUIV_TOKENS.put("recognise","recognize"); - EQUIV_TOKENS.put("signaling","signalling"); EQUIV_TOKENS.put("signalling","signaling"); - EQUIV_TOKENS.put("utilization","utilisation"); EQUIV_TOKENS.put("utilisation","utilization"); - EQUIV_TOKENS.put("while","whilst"); EQUIV_TOKENS.put("whilst","while"); - EQUIV_TOKENS.put("wilfull","wilful"); EQUIV_TOKENS.put("wilful","wilfull"); - EQUIV_TOKENS.put("noncommercial","non-commercial"); EQUIV_TOKENS.put("non-commercial","noncommercial"); - EQUIV_TOKENS.put("copyright-holder", "copyright-owner"); EQUIV_TOKENS.put("copyright-owner", "copyright-holder"); - EQUIV_TOKENS.put("sub-license", "sublicense"); EQUIV_TOKENS.put("sublicense", "sub-license"); - EQUIV_TOKENS.put("noninfringement", "non-infringement"); EQUIV_TOKENS.put("non-infringement", "noninfringement"); - EQUIV_TOKENS.put("(c)", "©"); + NORMALIZE_TOKENS.put("acknowledgment","acknowledgement"); + NORMALIZE_TOKENS.put("analogue","analog"); + NORMALIZE_TOKENS.put("analyse","analyze"); + NORMALIZE_TOKENS.put("artefact","artifact"); + NORMALIZE_TOKENS.put("authorisation","authorization"); + NORMALIZE_TOKENS.put("authorised","authorized"); + NORMALIZE_TOKENS.put("calibre","caliber"); + NORMALIZE_TOKENS.put("cancelled","canceled"); + NORMALIZE_TOKENS.put("apitalisations","apitalizations"); + NORMALIZE_TOKENS.put("catalogue","catalog"); + NORMALIZE_TOKENS.put("categorise","categorize"); + NORMALIZE_TOKENS.put("centre","center"); + NORMALIZE_TOKENS.put("emphasised","emphasized"); + NORMALIZE_TOKENS.put("favour","favor"); + NORMALIZE_TOKENS.put("favourite","favorite"); + NORMALIZE_TOKENS.put("fulfil","fulfill"); + NORMALIZE_TOKENS.put("fulfilment","fulfillment"); + NORMALIZE_TOKENS.put("initialise","initialize"); + NORMALIZE_TOKENS.put("judgment","judgement"); + NORMALIZE_TOKENS.put("labelling","labeling"); + NORMALIZE_TOKENS.put("labour","labor"); + NORMALIZE_TOKENS.put("licence","license"); + NORMALIZE_TOKENS.put("maximise","maximize"); + NORMALIZE_TOKENS.put("modelled","modeled"); + NORMALIZE_TOKENS.put("modelling","modeling"); + NORMALIZE_TOKENS.put("offence","offense"); + NORMALIZE_TOKENS.put("optimise","optimize"); + NORMALIZE_TOKENS.put("organisation","organization"); + NORMALIZE_TOKENS.put("organise","organize"); + NORMALIZE_TOKENS.put("practise","practice"); + NORMALIZE_TOKENS.put("programme","program"); + NORMALIZE_TOKENS.put("realise","realize"); + NORMALIZE_TOKENS.put("recognise","recognize"); + NORMALIZE_TOKENS.put("signalling","signaling"); + NORMALIZE_TOKENS.put("utilisation","utilization"); + NORMALIZE_TOKENS.put("whilst","while"); + NORMALIZE_TOKENS.put("wilful","wilfull"); + NORMALIZE_TOKENS.put("non-commercial","noncommercial"); + NORMALIZE_TOKENS.put("copyright-owner", "copyright-holder"); + NORMALIZE_TOKENS.put("sublicense", "sub-license"); + NORMALIZE_TOKENS.put("non-infringement", "noninfringement"); + NORMALIZE_TOKENS.put("©", "(c)"); + NORMALIZE_TOKENS.put("copyright", "(c)"); } + + static final String DASHES_REGEX = "[\\u2012\\u2013\\u2014\\u2015]"; static final String PER_CENT_REGEX = "(?i)per\\scent"; static final Pattern PER_CENT_PATTERN = Pattern.compile(PER_CENT_REGEX, Pattern.CASE_INSENSITIVE); @@ -221,12 +226,16 @@ static boolean tokensEquivalent(String tokenA, String tokenB) { if (s1.equals(s2)) { return true; } else { - // check for equivalent tokens - if (EQUIV_TOKENS.get(s1) != null) { - return s2.equals(EQUIV_TOKENS.get(s1)); - } else { - return false; + // check for equivalent tokens by normalizing the tokens + String ns1 = NORMALIZE_TOKENS.get(s1); + if (ns1 == null) { + ns1 = s1; } + String ns2 = NORMALIZE_TOKENS.get(s2); + if (ns2 == null) { + ns2 = s2; + } + return ns1.equals(ns2); } } } @@ -329,18 +338,23 @@ private static boolean isLicenseSetsEqual(LicenseSet license1, LicenseSet licens * @return True if the license text is the same per the license matching guidelines * @throws SpdxCompareException */ - public static boolean isTextStandardLicense(SpdxListedLicense license, String compareText) throws SpdxCompareException { + public static DifferenceDescription isTextStandardLicense(SpdxListedLicense license, String compareText) throws SpdxCompareException { String licenseTemplate = license.getStandardLicenseTemplate(); if (licenseTemplate == null || licenseTemplate.trim().isEmpty()) { - return isLicenseTextEquivalent(license.getLicenseText(), compareText); + licenseTemplate = license.getLicenseText(); + } + CompareTemplateOutputHandler compareTemplateOutputHandler = null; + try { + compareTemplateOutputHandler = new CompareTemplateOutputHandler(compareText); + } catch (IOException e1) { + throw(new SpdxCompareException("IO Error reading the compare text: "+e1.getMessage(),e1)); } - CompareTemplateOutputHandler compareTemplateOutputHandler = new CompareTemplateOutputHandler(compareText); try { SpdxLicenseTemplateHelper.parseTemplate(licenseTemplate, compareTemplateOutputHandler); } catch (LicenseTemplateRuleException e) { throw(new SpdxCompareException("Invalid template rule found during compare: "+e.getMessage(),e)); } - return compareTemplateOutputHandler.matches(); + return compareTemplateOutputHandler.getDifferences(); } /** @@ -356,7 +370,7 @@ public static String[] matchingStandardLicenseIds(String licenseText) throws Inv List matchingIds = Lists.newArrayList(); for (String stdLicId : stdLicenseIds) { SpdxListedLicense license = LicenseInfoFactory.getListedLicenseById(stdLicId); - if (isTextStandardLicense(license, licenseText)) { + if (!isTextStandardLicense(license, licenseText).isDifferenceFound()) { matchingIds.add(license.getLicenseId()); } }