From 18a80a03cdf7af6f9ca4a9af52eab205592f8362 Mon Sep 17 00:00:00 2001 From: Gary O'Neall Date: Wed, 6 Dec 2023 13:43:25 -0800 Subject: [PATCH 1/4] Fixes for end of license matches Fixes #214 Fixes #213 Signed-off-by: Gary O'Neall --- TestFiles/BSD-2-Clause-nl.txt | 23 ++ TestFiles/BSD-2-Clause.template.txt | 12 +- TestFiles/BSD-3-Clause-newline.txt | 26 ++ TestFiles/BSD-3-Clause.template.txt | 12 +- TestFiles/EPL-2.0.template.txt | 111 +++++++ TestFiles/EPL-2.0.txt | 277 ++++++++++++++++++ TestFiles/MIT.template.txt | 10 + TestFiles/MIT2Spaces.txt | 21 ++ .../utility/compare/LicenseCompareHelper.java | 56 ++-- .../compare/LicenseCompareHelperTest.java | 70 ++++- 10 files changed, 581 insertions(+), 37 deletions(-) create mode 100644 TestFiles/BSD-2-Clause-nl.txt create mode 100644 TestFiles/BSD-3-Clause-newline.txt create mode 100644 TestFiles/EPL-2.0.template.txt create mode 100644 TestFiles/EPL-2.0.txt create mode 100644 TestFiles/MIT.template.txt create mode 100644 TestFiles/MIT2Spaces.txt diff --git a/TestFiles/BSD-2-Clause-nl.txt b/TestFiles/BSD-2-Clause-nl.txt new file mode 100644 index 000000000..ab8a1a0bd --- /dev/null +++ b/TestFiles/BSD-2-Clause-nl.txt @@ -0,0 +1,23 @@ +Copyright (c) 2015, Atlassian Pty Ltd +All rights reserved. + +Redistribution and use in source and binary forms, with or without +modification, are permitted provided that the following conditions are met: + +* Redistributions of source code must retain the above copyright notice, this + list of conditions and the following disclaimer. + +* Redistributions in binary form must reproduce the above copyright notice, + this list of conditions and the following disclaimer in the documentation + and/or other materials provided with the distribution. + +THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" +AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE +IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE +DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE +FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL +DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR +SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER +CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, +OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE +OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. \ No newline at end of file diff --git a/TestFiles/BSD-2-Clause.template.txt b/TestFiles/BSD-2-Clause.template.txt index 0d2fb0b96..72e0cce64 100644 --- a/TestFiles/BSD-2-Clause.template.txt +++ b/TestFiles/BSD-2-Clause.template.txt @@ -1,5 +1,9 @@ -Copyright (c) < ";match=".+">> All rights reserved. +< . ";match=".{0,5000}">> + Redistribution and use in source and binary forms, with or without modification, are permitted provided that the following conditions are met: - 1. Redistributions of source code must retain the above copyright notice, this list of conditions and the following disclaimer. - 2. Redistributions in binary form must reproduce the above copyright notice, this list of conditions and the following disclaimer in the documentation and/or other materials provided with the distribution. -THIS SOFTWARE IS PROVIDED BY <> "AS IS" AND ANY <> OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL <> BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. \ No newline at end of file + + <> Redistributions of source code must retain the above copyright notice, this list of conditions and the following disclaimer. + + <> Redistributions in binary form must reproduce the above copyright notice, this list of conditions and the following disclaimer in the documentation and/or other materials provided with the distribution. + +THIS<> SOFTWARE<> IS PROVIDED BY <> "AS IS" AND ANY <> OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL <> BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS<> SOFTWARE<>, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. diff --git a/TestFiles/BSD-3-Clause-newline.txt b/TestFiles/BSD-3-Clause-newline.txt new file mode 100644 index 000000000..5b70d4e9a --- /dev/null +++ b/TestFiles/BSD-3-Clause-newline.txt @@ -0,0 +1,26 @@ +Redistribution and use in source and binary forms, with or without +modification, are permitted provided that the following conditions +are met: + + - Redistributions of source code must retain the above copyright + notice, this list of conditions and the following disclaimer. + + - Redistributions in binary form must reproduce the above copyright + notice, this list of conditions and the following disclaimer in the + documentation and/or other materials provided with the distribution. + + - Neither the name of the Eclipse Foundation, Inc. nor the names of its + contributors may be used to endorse or promote products derived + from this software without specific prior written permission. + +THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS +IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, +THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR +PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR +CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, +EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, +PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR +PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF +LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING +NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS +SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. \ No newline at end of file diff --git a/TestFiles/BSD-3-Clause.template.txt b/TestFiles/BSD-3-Clause.template.txt index 4e809db2a..2b3d7f8bb 100644 --- a/TestFiles/BSD-3-Clause.template.txt +++ b/TestFiles/BSD-3-Clause.template.txt @@ -1,11 +1,11 @@ -Copyright (c) < ";match=".+">> . All rights reserved. +< . ";match=".{0,5000}">> -Redistribution and use in source and binary forms, with or without modification, are permitted provided that the following conditions are met: +Redistribution and use in source and binary forms<>, with or without modification, <> permitted provided that the following conditions are met: - <> Redistributions of source code must retain the above copyright notice, this list of conditions and the following disclaimer. + <> Redistributions of <> must retain the <> copyright notice, this list of conditions and the following disclaimer. - <> Redistributions in binary form must reproduce the above copyright notice, this list of conditions and the following disclaimer in the documentation and/or other materials provided with the distribution. + <> Redistributions in binary form must reproduce the <> copyright notice, this list of conditions and the following disclaimer in the documentation and/or other materials provided with the distribution. - <> Neither the name of <> nor the names of its contributors may be used to endorse or promote products derived from this software without specific prior written permission. + <> <> be used to endorse or promote products derived from this <> without specific prior written permission. -THIS SOFTWARE IS PROVIDED BY <> "AS IS" AND ANY <> OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL <> BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. \ No newline at end of file +THIS <> IS PROVIDED <> "AS IS" AND ANY <> OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL <> BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS <> , EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. diff --git a/TestFiles/EPL-2.0.template.txt b/TestFiles/EPL-2.0.template.txt new file mode 100644 index 000000000..6f4ba8abc --- /dev/null +++ b/TestFiles/EPL-2.0.template.txt @@ -0,0 +1,111 @@ +<>Eclipse Public License - v 2.0 + +<> + +THE ACCOMPANYING PROGRAM IS PROVIDED UNDER THE TERMS OF THIS ECLIPSE PUBLIC LICENSE ("AGREEMENT"). ANY USE, REPRODUCTION OR DISTRIBUTION OF THE PROGRAM CONSTITUTES RECIPIENT'S ACCEPTANCE OF THIS AGREEMENT. + + <> DEFINITIONS + + "Contribution" means: + + <> in the case of the initial Contributor, the initial content Distributed under this Agreement, and + + <> in the case of each subsequent Contributor: + + <> changes to the Program, and + + <> additions to the Program; + + where such changes and/or additions to the Program originate from and are Distributed by that particular Contributor. A Contribution "originates" from a Contributor if it was added to the Program by such Contributor itself or anyone acting on such Contributor's behalf. Contributions do not include changes or additions to the Program that are not Modified Works. + + "Contributor" means any person or entity that Distributes the Program. + + "Licensed Patents" mean patent claims licensable by a Contributor which are necessarily infringed by the use or sale of its Contribution alone or when combined with the Program. + + "Program" means the Contributions Distributed in accordance with this Agreement. + + "Recipient" means anyone who receives the Program under this Agreement or any Secondary License (as applicable), including Contributors. + + "Derivative Works" shall mean any work, whether in Source Code or other form, that is based on (or derived from) the Program and for which the editorial revisions, annotations, elaborations, or other modifications represent, as a whole, an original work of authorship. + + "Modified Works" shall mean any work in Source Code or other form that results from an addition to, deletion from, or modification of the contents of the Program, including, for purposes of clarity any new file in Source Code form that contains any contents of the Program. Modified Works shall not include works that contain only declarations, interfaces, types, classes, structures, or files of the Program solely in each case in order to link to, bind by name, or subclass the Program or Modified Works thereof. + + "Distribute" means the acts of a) distributing or b) making available in any manner that enables the transfer of a copy. + + "Source Code" means the form of a Program preferred for making modifications, including but not limited to software source code, documentation source, and configuration files. + + "Secondary License" means either the GNU General Public License, Version 2.0, or any later versions of that license, including any exceptions or additional permissions as identified by the initial Contributor. + + <> GRANT OF RIGHTS + + <> Subject to the terms of this Agreement, each Contributor hereby grants Recipient a non-exclusive, worldwide, royalty-free copyright license to reproduce, prepare Derivative Works of, publicly display, publicly perform, Distribute and sublicense the Contribution of such Contributor, if any, and such Derivative Works. + + <> Subject to the terms of this Agreement, each Contributor hereby grants Recipient a non-exclusive, worldwide, royalty-free patent license under Licensed Patents to make, use, sell, offer to sell, import and otherwise transfer the Contribution of such Contributor, if any, in Source Code or other form. This patent license shall apply to the combination of the Contribution and the Program if, at the time the Contribution is added by the Contributor, such addition of the Contribution causes such combination to be covered by the Licensed Patents. The patent license shall not apply to any other combinations which include the Contribution. No hardware per se is licensed hereunder. + + <> Recipient understands that although each Contributor grants the licenses to its Contributions set forth herein, no assurances are provided by any Contributor that the Program does not infringe the patent or other intellectual property rights of any other entity. Each Contributor disclaims any liability to Recipient for claims brought by any other entity based on infringement of intellectual property rights or otherwise. As a condition to exercising the rights and licenses granted hereunder, each Recipient hereby assumes sole responsibility to secure any other intellectual property rights needed, if any. For example, if a third party patent license is required to allow Recipient to Distribute the Program, it is Recipient's responsibility to acquire that license before distributing the Program. + + <> Each Contributor represents that to its knowledge it has sufficient copyright rights in its Contribution, if any, to grant the copyright license set forth in this Agreement. + + <> Notwithstanding the terms of any Secondary License, no Contributor makes additional grants to any Recipient (other than those set forth in this Agreement) as a result of such Recipient's receipt of the Program under the terms of a Secondary License (if permitted under the terms of Section 3). + + <> REQUIREMENTS + + <> If a Contributor Distributes the Program in any form, then: + + <> the Program must also be made available as Source Code, in accordance with section 3.2, and the Contributor must accompany the Program with a statement that the Source Code for the Program is available under this Agreement, and informs Recipients how to obtain it in a reasonable manner on or through a medium customarily used for software exchange; and + + <> the Contributor may Distribute the Program under a license different than this Agreement, provided that such license: + + <> effectively disclaims on behalf of all other Contributors all warranties and conditions, express and implied, including warranties or conditions of title and non-infringement, and implied warranties or conditions of merchantability and fitness for a particular purpose; + + <> effectively excludes on behalf of all other Contributors all liability for damages, including direct, indirect, special, incidental and consequential damages, such as lost profits; + + <> does not attempt to limit or alter the recipients' rights in the Source Code under section 3.2; and + + <> requires any subsequent distribution of the Program by any party to be under a license that satisfies the requirements of this section 3. + + <> When the Program is Distributed as Source Code: + + <> it must be made available under this Agreement, or if the Program (i) is combined with other material in a separate file or files made available under a Secondary License, and (ii) the initial Contributor attached to the Source Code the notice described in Exhibit A of this Agreement, then the Program may be made available under the terms of such Secondary Licenses, and + + <> a copy of this Agreement must be included with each copy of the Program. + + <> Contributors may not remove or alter any copyright, patent, trademark, attribution notices, disclaimers of warranty, or limitations of liability ("notices") contained within the Program from any copy of the Program which they Distribute, provided that Contributors may add their own appropriate notices. + + <> COMMERCIAL DISTRIBUTION + + Commercial distributors of software may accept certain responsibilities with respect to end users, business partners and the like. While this license is intended to facilitate the commercial use of the Program, the Contributor who includes the Program in a commercial product offering should do so in a manner which does not create potential liability for other Contributors. Therefore, if a Contributor includes the Program in a commercial product offering, such Contributor ("Commercial Contributor") hereby agrees to defend and indemnify every other Contributor ("Indemnified Contributor") against any losses, damages and costs (collectively "Losses") arising from claims, lawsuits and other legal actions brought by a third party against the Indemnified Contributor to the extent caused by the acts or omissions of such Commercial Contributor in connection with its distribution of the Program in a commercial product offering. The obligations in this section do not apply to any claims or Losses relating to any actual or alleged intellectual property infringement. In order to qualify, an Indemnified Contributor must: a) promptly notify the Commercial Contributor in writing of such claim, and b) allow the Commercial Contributor to control, and cooperate with the Commercial Contributor in, the defense and any related settlement negotiations. The Indemnified Contributor may participate in any such claim at its own expense. + + For example, a Contributor might include the Program in a commercial product offering, Product X. That Contributor is then a Commercial Contributor. If that Commercial Contributor then makes performance claims, or offers warranties related to Product X, those performance claims and warranties are such Commercial Contributor's responsibility alone. Under this section, the Commercial Contributor would have to defend claims against the other Contributors related to those performance claims and warranties, and if a court requires any other Contributor to pay any damages as a result, the Commercial Contributor must pay those damages. + + <> NO WARRANTY + + EXCEPT AS EXPRESSLY SET FORTH IN THIS AGREEMENT, AND TO THE EXTENT PERMITTED BY APPLICABLE LAW, THE PROGRAM IS PROVIDED ON AN "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, EITHER EXPRESS OR IMPLIED INCLUDING, WITHOUT LIMITATION, ANY WARRANTIES OR CONDITIONS OF TITLE, NON-INFRINGEMENT, MERCHANTABILITY OR FITNESS FOR A PARTICULAR PURPOSE. Each Recipient is solely responsible for determining the appropriateness of using and distributing the Program and assumes all risks associated with its exercise of rights under this Agreement, including but not limited to the risks and costs of program errors, compliance with applicable laws, damage to or loss of data, programs or equipment, and unavailability or interruption of operations. + + <> DISCLAIMER OF LIABILITY + + EXCEPT AS EXPRESSLY SET FORTH IN THIS AGREEMENT, AND TO THE EXTENT PERMITTED BY APPLICABLE LAW, NEITHER RECIPIENT NOR ANY CONTRIBUTORS SHALL HAVE ANY LIABILITY FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING WITHOUT LIMITATION LOST PROFITS), HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OR DISTRIBUTION OF THE PROGRAM OR THE EXERCISE OF ANY RIGHTS GRANTED HEREUNDER, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGES. + + <> GENERAL + + If any provision of this Agreement is invalid or unenforceable under applicable law, it shall not affect the validity or enforceability of the remainder of the terms of this Agreement, and without further action by the parties hereto, such provision shall be reformed to the minimum extent necessary to make such provision valid and enforceable. + + If Recipient institutes patent litigation against any entity (including a cross-claim or counterclaim in a lawsuit) alleging that the Program itself (excluding combinations of the Program with other software or hardware) infringes such Recipient's patent(s), then such Recipient's rights granted under Section 2(b) shall terminate as of the date such litigation is filed. + + All Recipient's rights under this Agreement shall terminate if it fails to comply with any of the material terms or conditions of this Agreement and does not cure such failure in a reasonable period of time after becoming aware of such noncompliance. If all Recipient's rights under this Agreement terminate, Recipient agrees to cease use and distribution of the Program as soon as reasonably practicable. However, Recipient's obligations under this Agreement and any licenses granted by Recipient relating to the Program shall continue and survive. + + Everyone is permitted to copy and distribute copies of this Agreement, but in order to avoid inconsistency the Agreement is copyrighted and may only be modified in the following manner. The Agreement Steward reserves the right to publish new versions (including revisions) of this Agreement from time to time. No one other than the Agreement Steward has the right to modify this Agreement. The Eclipse Foundation is the initial Agreement Steward. The Eclipse Foundation may assign the responsibility to serve as the Agreement Steward to a suitable separate entity. Each new version of the Agreement will be given a distinguishing version number. The Program (including Contributions) may always be Distributed subject to the version of the Agreement under which it was received. In addition, after a new version of the Agreement is published, Contributor may elect to Distribute the Program (including its Contributions) under the new version. + + Except as expressly stated in Sections 2(a) and 2(b) above, Recipient receives no rights or licenses to the intellectual property of any Contributor under this Agreement, whether expressly, by implication, estoppel or otherwise. All rights in the Program not expressly granted under this Agreement are reserved. Nothing in this Agreement is intended to be enforceable by any entity that is not a Contributor or Recipient. No third-party beneficiary rights are created under this Agreement. + + Exhibit A - Form of Secondary Licenses Notice + + "This Source Code may also be made available under the following Secondary Licenses when the conditions for such availability set forth in the Eclipse Public License, v. 2.0 are satisfied: {name license(s), version(s), and exceptions or additional permissions here}." + + Simply including a copy of this Agreement, including this Exhibit A is not sufficient to license the Source Code under Secondary Licenses. + + If it is not possible or desirable to put the notice in a particular file, then You may include the notice in a location (such as a LICENSE file in a relevant directory) where a recipient would be likely to look for such a notice. + + You may add additional accurate notices of copyright ownership. + + \ No newline at end of file diff --git a/TestFiles/EPL-2.0.txt b/TestFiles/EPL-2.0.txt new file mode 100644 index 000000000..e23ece2c8 --- /dev/null +++ b/TestFiles/EPL-2.0.txt @@ -0,0 +1,277 @@ +Eclipse Public License - v 2.0 + + THE ACCOMPANYING PROGRAM IS PROVIDED UNDER THE TERMS OF THIS ECLIPSE + PUBLIC LICENSE ("AGREEMENT"). ANY USE, REPRODUCTION OR DISTRIBUTION + OF THE PROGRAM CONSTITUTES RECIPIENT'S ACCEPTANCE OF THIS AGREEMENT. + +1. DEFINITIONS + +"Contribution" means: + + a) in the case of the initial Contributor, the initial content + Distributed under this Agreement, and + + b) in the case of each subsequent Contributor: + i) changes to the Program, and + ii) additions to the Program; + where such changes and/or additions to the Program originate from + and are Distributed by that particular Contributor. A Contribution + "originates" from a Contributor if it was added to the Program by + such Contributor itself or anyone acting on such Contributor's behalf. + Contributions do not include changes or additions to the Program that + are not Modified Works. + +"Contributor" means any person or entity that Distributes the Program. + +"Licensed Patents" mean patent claims licensable by a Contributor which +are necessarily infringed by the use or sale of its Contribution alone +or when combined with the Program. + +"Program" means the Contributions Distributed in accordance with this +Agreement. + +"Recipient" means anyone who receives the Program under this Agreement +or any Secondary License (as applicable), including Contributors. + +"Derivative Works" shall mean any work, whether in Source Code or other +form, that is based on (or derived from) the Program and for which the +editorial revisions, annotations, elaborations, or other modifications +represent, as a whole, an original work of authorship. + +"Modified Works" shall mean any work in Source Code or other form that +results from an addition to, deletion from, or modification of the +contents of the Program, including, for purposes of clarity any new file +in Source Code form that contains any contents of the Program. Modified +Works shall not include works that contain only declarations, +interfaces, types, classes, structures, or files of the Program solely +in each case in order to link to, bind by name, or subclass the Program +or Modified Works thereof. + +"Distribute" means the acts of a) distributing or b) making available +in any manner that enables the transfer of a copy. + +"Source Code" means the form of a Program preferred for making +modifications, including but not limited to software source code, +documentation source, and configuration files. + +"Secondary License" means either the GNU General Public License, +Version 2.0, or any later versions of that license, including any +exceptions or additional permissions as identified by the initial +Contributor. + +2. GRANT OF RIGHTS + + a) Subject to the terms of this Agreement, each Contributor hereby + grants Recipient a non-exclusive, worldwide, royalty-free copyright + license to reproduce, prepare Derivative Works of, publicly display, + publicly perform, Distribute and sublicense the Contribution of such + Contributor, if any, and such Derivative Works. + + b) Subject to the terms of this Agreement, each Contributor hereby + grants Recipient a non-exclusive, worldwide, royalty-free patent + license under Licensed Patents to make, use, sell, offer to sell, + import and otherwise transfer the Contribution of such Contributor, + if any, in Source Code or other form. This patent license shall + apply to the combination of the Contribution and the Program if, at + the time the Contribution is added by the Contributor, such addition + of the Contribution causes such combination to be covered by the + Licensed Patents. The patent license shall not apply to any other + combinations which include the Contribution. No hardware per se is + licensed hereunder. + + c) Recipient understands that although each Contributor grants the + licenses to its Contributions set forth herein, no assurances are + provided by any Contributor that the Program does not infringe the + patent or other intellectual property rights of any other entity. + Each Contributor disclaims any liability to Recipient for claims + brought by any other entity based on infringement of intellectual + property rights or otherwise. As a condition to exercising the + rights and licenses granted hereunder, each Recipient hereby + assumes sole responsibility to secure any other intellectual + property rights needed, if any. For example, if a third party + patent license is required to allow Recipient to Distribute the + Program, it is Recipient's responsibility to acquire that license + before distributing the Program. + + d) Each Contributor represents that to its knowledge it has + sufficient copyright rights in its Contribution, if any, to grant + the copyright license set forth in this Agreement. + + e) Notwithstanding the terms of any Secondary License, no + Contributor makes additional grants to any Recipient (other than + those set forth in this Agreement) as a result of such Recipient's + receipt of the Program under the terms of a Secondary License + (if permitted under the terms of Section 3). + +3. REQUIREMENTS + +3.1 If a Contributor Distributes the Program in any form, then: + + a) the Program must also be made available as Source Code, in + accordance with section 3.2, and the Contributor must accompany + the Program with a statement that the Source Code for the Program + is available under this Agreement, and informs Recipients how to + obtain it in a reasonable manner on or through a medium customarily + used for software exchange; and + + b) the Contributor may Distribute the Program under a license + different than this Agreement, provided that such license: + i) effectively disclaims on behalf of all other Contributors all + warranties and conditions, express and implied, including + warranties or conditions of title and non-infringement, and + implied warranties or conditions of merchantability and fitness + for a particular purpose; + + ii) effectively excludes on behalf of all other Contributors all + liability for damages, including direct, indirect, special, + incidental and consequential damages, such as lost profits; + + iii) does not attempt to limit or alter the recipients' rights + in the Source Code under section 3.2; and + + iv) requires any subsequent distribution of the Program by any + party to be under a license that satisfies the requirements + of this section 3. + +3.2 When the Program is Distributed as Source Code: + + a) it must be made available under this Agreement, or if the + Program (i) is combined with other material in a separate file or + files made available under a Secondary License, and (ii) the initial + Contributor attached to the Source Code the notice described in + Exhibit A of this Agreement, then the Program may be made available + under the terms of such Secondary Licenses, and + + b) a copy of this Agreement must be included with each copy of + the Program. + +3.3 Contributors may not remove or alter any copyright, patent, +trademark, attribution notices, disclaimers of warranty, or limitations +of liability ("notices") contained within the Program from any copy of +the Program which they Distribute, provided that Contributors may add +their own appropriate notices. + +4. COMMERCIAL DISTRIBUTION + +Commercial distributors of software may accept certain responsibilities +with respect to end users, business partners and the like. While this +license is intended to facilitate the commercial use of the Program, +the Contributor who includes the Program in a commercial product +offering should do so in a manner which does not create potential +liability for other Contributors. Therefore, if a Contributor includes +the Program in a commercial product offering, such Contributor +("Commercial Contributor") hereby agrees to defend and indemnify every +other Contributor ("Indemnified Contributor") against any losses, +damages and costs (collectively "Losses") arising from claims, lawsuits +and other legal actions brought by a third party against the Indemnified +Contributor to the extent caused by the acts or omissions of such +Commercial Contributor in connection with its distribution of the Program +in a commercial product offering. The obligations in this section do not +apply to any claims or Losses relating to any actual or alleged +intellectual property infringement. In order to qualify, an Indemnified +Contributor must: a) promptly notify the Commercial Contributor in +writing of such claim, and b) allow the Commercial Contributor to control, +and cooperate with the Commercial Contributor in, the defense and any +related settlement negotiations. The Indemnified Contributor may +participate in any such claim at its own expense. + +For example, a Contributor might include the Program in a commercial +product offering, Product X. That Contributor is then a Commercial +Contributor. If that Commercial Contributor then makes performance +claims, or offers warranties related to Product X, those performance +claims and warranties are such Commercial Contributor's responsibility +alone. Under this section, the Commercial Contributor would have to +defend claims against the other Contributors related to those performance +claims and warranties, and if a court requires any other Contributor to +pay any damages as a result, the Commercial Contributor must pay +those damages. + +5. NO WARRANTY + +EXCEPT AS EXPRESSLY SET FORTH IN THIS AGREEMENT, AND TO THE EXTENT +PERMITTED BY APPLICABLE LAW, THE PROGRAM IS PROVIDED ON AN "AS IS" +BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, EITHER EXPRESS OR +IMPLIED INCLUDING, WITHOUT LIMITATION, ANY WARRANTIES OR CONDITIONS OF +TITLE, NON-INFRINGEMENT, MERCHANTABILITY OR FITNESS FOR A PARTICULAR +PURPOSE. Each Recipient is solely responsible for determining the +appropriateness of using and distributing the Program and assumes all +risks associated with its exercise of rights under this Agreement, +including but not limited to the risks and costs of program errors, +compliance with applicable laws, damage to or loss of data, programs +or equipment, and unavailability or interruption of operations. + +6. DISCLAIMER OF LIABILITY + +EXCEPT AS EXPRESSLY SET FORTH IN THIS AGREEMENT, AND TO THE EXTENT +PERMITTED BY APPLICABLE LAW, NEITHER RECIPIENT NOR ANY CONTRIBUTORS +SHALL HAVE ANY LIABILITY FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, +EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING WITHOUT LIMITATION LOST +PROFITS), HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN +CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) +ARISING IN ANY WAY OUT OF THE USE OR DISTRIBUTION OF THE PROGRAM OR THE +EXERCISE OF ANY RIGHTS GRANTED HEREUNDER, EVEN IF ADVISED OF THE +POSSIBILITY OF SUCH DAMAGES. + +7. GENERAL + +If any provision of this Agreement is invalid or unenforceable under +applicable law, it shall not affect the validity or enforceability of +the remainder of the terms of this Agreement, and without further +action by the parties hereto, such provision shall be reformed to the +minimum extent necessary to make such provision valid and enforceable. + +If Recipient institutes patent litigation against any entity +(including a cross-claim or counterclaim in a lawsuit) alleging that the +Program itself (excluding combinations of the Program with other software +or hardware) infringes such Recipient's patent(s), then such Recipient's +rights granted under Section 2(b) shall terminate as of the date such +litigation is filed. + +All Recipient's rights under this Agreement shall terminate if it +fails to comply with any of the material terms or conditions of this +Agreement and does not cure such failure in a reasonable period of +time after becoming aware of such noncompliance. If all Recipient's +rights under this Agreement terminate, Recipient agrees to cease use +and distribution of the Program as soon as reasonably practicable. +However, Recipient's obligations under this Agreement and any licenses +granted by Recipient relating to the Program shall continue and survive. + +Everyone is permitted to copy and distribute copies of this Agreement, +but in order to avoid inconsistency the Agreement is copyrighted and +may only be modified in the following manner. The Agreement Steward +reserves the right to publish new versions (including revisions) of +this Agreement from time to time. No one other than the Agreement +Steward has the right to modify this Agreement. The Eclipse Foundation +is the initial Agreement Steward. The Eclipse Foundation may assign the +responsibility to serve as the Agreement Steward to a suitable separate +entity. Each new version of the Agreement will be given a distinguishing +version number. The Program (including Contributions) may always be +Distributed subject to the version of the Agreement under which it was +received. In addition, after a new version of the Agreement is published, +Contributor may elect to Distribute the Program (including its +Contributions) under the new version. + +Except as expressly stated in Sections 2(a) and 2(b) above, Recipient +receives no rights or licenses to the intellectual property of any +Contributor under this Agreement, whether expressly, by implication, +estoppel or otherwise. All rights in the Program not expressly granted +under this Agreement are reserved. Nothing in this Agreement is intended +to be enforceable by any entity that is not a Contributor or Recipient. +No third-party beneficiary rights are created under this Agreement. + +Exhibit A - Form of Secondary Licenses Notice + +"This Source Code may also be made available under the following +Secondary Licenses when the conditions for such availability set forth +in the Eclipse Public License, v. 2.0 are satisfied: {name license(s), +version(s), and exceptions or additional permissions here}." + + Simply including a copy of this Agreement, including this Exhibit A + is not sufficient to license the Source Code under Secondary Licenses. + + If it is not possible or desirable to put the notice in a particular + file, then You may include the notice in a location (such as a LICENSE + file in a relevant directory) where a recipient would be likely to + look for such a notice. + + You may add additional accurate notices of copyright ownership. \ No newline at end of file diff --git a/TestFiles/MIT.template.txt b/TestFiles/MIT.template.txt new file mode 100644 index 000000000..3e04e397f --- /dev/null +++ b/TestFiles/MIT.template.txt @@ -0,0 +1,10 @@ +<>MIT License + +<> < ";match=".{0,5000}">> + +Permission is hereby granted, free of charge, to any person obtaining a copy of <> (the "Software"), to deal in the Software without restriction, including without limitation the rights to use, copy, modify, merge, publish, distribute, sublicense, and/or sell copies of the Software, and to permit persons to whom the Software is furnished to do so, subject to the following conditions: + +The above copyright notice and this permission notice<> (including the next paragraph)<> shall be included in all copies or substantial portions of the Software. + +THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL <> BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. + diff --git a/TestFiles/MIT2Spaces.txt b/TestFiles/MIT2Spaces.txt new file mode 100644 index 000000000..069d6702d --- /dev/null +++ b/TestFiles/MIT2Spaces.txt @@ -0,0 +1,21 @@ +MIT License + +Copyright (C) 2020, Twilio SendGrid, Inc. + +Permission is hereby granted, free of charge, to any person obtaining a copy of +this software and associated documentation files (the "Software"), to deal in +the Software without restriction, including without limitation the rights to +use, copy, modify, merge, publish, distribute, sublicense, and/or sell copies +of the Software, and to permit persons to whom the Software is furnished to do +so, subject to the following conditions: + +The above copyright notice and this permission notice shall be included in all +copies or substantial portions of the Software. + +THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR +IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, +FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE +AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER +LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, +OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE +SOFTWARE. \ No newline at end of file diff --git a/src/main/java/org/spdx/utility/compare/LicenseCompareHelper.java b/src/main/java/org/spdx/utility/compare/LicenseCompareHelper.java index 8c8d6a014..577095c96 100644 --- a/src/main/java/org/spdx/utility/compare/LicenseCompareHelper.java +++ b/src/main/java/org/spdx/utility/compare/LicenseCompareHelper.java @@ -672,22 +672,22 @@ public static List getNonOptionalLicenseText(String licenseTemplate, Var * Creates a regular expression pattern to match the start of a license text * @param nonOptionalText List of strings of non-optional text from the license template (see {@literal List getNonOptionalLicenseText}) * @param numberOfWords Number of words to use in the match - * @return Pattern which will match the start of the license text + * @return A pair of Patterns the first of which will match the start of the license text the second of which will match the end of the license */ - public static Pattern nonOptionalTextToStartPattern(List nonOptionalText, int numberOfWords) { + public static Pair nonOptionalTextToPatterns(List nonOptionalText, int numberOfWords) { if (Objects.isNull(nonOptionalText) || nonOptionalText.size() == 0 || numberOfWords < 1) { - return Pattern.compile(""); + return new ImmutablePair<>(Pattern.compile(""), Pattern.compile("")); } int startWordCount = 0; int startTextIndex = 0; int wordsInLastLine = 0; // keep track of the number of words processed in the last start line to make sure we don't overlap words in the end lines - StringBuilder patternBuilder = new StringBuilder(); + StringBuilder startPatternBuilder = new StringBuilder(); String regexLimit = "," + Integer.toString(numberOfWords * 10) + "}"; String lastRegex = ""; while (startWordCount < numberOfWords && startTextIndex < nonOptionalText.size()) { String line = nonOptionalText.get(startTextIndex++); - if (patternBuilder.length() > 0 && line.trim().length() > 0 && !patternBuilder.toString().endsWith("}")) { - patternBuilder.append(".{0,5}"); + if (startPatternBuilder.length() > 0 && line.trim().length() > 0 && !startPatternBuilder.toString().endsWith("}")) { + startPatternBuilder.append(".{0,50}"); //TODO: Replace this with the optional text match itself - requires redesign } String[] regexSplits = line.trim().split(FilterTemplateOutputHandler.REGEX_ESCAPE); boolean inRegex = false; // if it starts with a regex, it will start with a blank line @@ -701,7 +701,7 @@ public static Pattern nonOptionalTextToStartPattern(List nonOptionalText } else { regexToAppend = regexSplit; } - if (patternBuilder.toString().endsWith("}") && regexToAppend.endsWith("}")) { + if (startPatternBuilder.toString().endsWith("}") && regexToAppend.endsWith("}")) { // collapse consecutive match anything Matcher lastRegexMatch = REGEX_QUANTIFIER_PATTERN.matcher(lastRegex); Matcher regexToAppendMatch = REGEX_QUANTIFIER_PATTERN.matcher(regexToAppend); @@ -712,12 +712,12 @@ public static Pattern nonOptionalTextToStartPattern(List nonOptionalText regexToAppend = ""; // already covered by previous regex } else { // remove the last max - patternBuilder.setLength(patternBuilder.length()-(lastRegexMatch.group(2).length()+1)); + startPatternBuilder.setLength(startPatternBuilder.length()-(lastRegexMatch.group(2).length()+1)); regexToAppend = regexToAppend.substring(regexToAppend.indexOf(',')+1); } } } - patternBuilder.append(regexToAppend); + startPatternBuilder.append(regexToAppend); lastRegex = regexToAppend; startWordCount++; inRegex = false; @@ -731,8 +731,8 @@ public static Pattern nonOptionalTextToStartPattern(List nonOptionalText if (NORMALIZE_TOKENS.containsKey(token.toLowerCase())) { token = NORMALIZE_TOKENS.get(token.toLowerCase()); } - patternBuilder.append(Pattern.quote(token)); - patternBuilder.append("\\s*"); + startPatternBuilder.append(Pattern.quote(token)); + startPatternBuilder.append("\\s*"); startWordCount++; wordsInLastLine++; } @@ -741,8 +741,9 @@ public static Pattern nonOptionalTextToStartPattern(List nonOptionalText } } } - patternBuilder.append(".{0,36000}"); + // End words + StringBuilder endPatternBuilder = new StringBuilder(); List endTextReversePattern = new ArrayList<>(); int endTextIndex = nonOptionalText.size()-1; int endWordCount = 0; @@ -752,6 +753,9 @@ public static Pattern nonOptionalTextToStartPattern(List nonOptionalText (endTextIndex == lastProcessedStartLine && (numberOfWords - endWordCount) < (nonOptionalText.get(endTextIndex).length() - wordsInLastLine)))) { // Check to make sure we're not overlapping the start words List nonEmptyTokens = new ArrayList<>(); String line = nonOptionalText.get(endTextIndex); + if (endTextReversePattern.size() > 0 && line.trim().length() > 0 && !endTextReversePattern.get(endTextReversePattern.size()-1).endsWith("}")) { + endTextReversePattern.add(".{0,50}"); //TODO: Replace this with the optional text match itself - requires redesign + } String[] regexSplits = line.trim().split(FilterTemplateOutputHandler.REGEX_ESCAPE); boolean inRegex = false; for (String regexSplit:regexSplits) { @@ -782,6 +786,9 @@ public static Pattern nonOptionalTextToStartPattern(List nonOptionalText endTextReversePattern.add(token.substring(FilterTemplateOutputHandler.REGEX_ESCAPE.length())); } else { endTextReversePattern.add("\\s*"); + if (NORMALIZE_TOKENS.containsKey(token.toLowerCase())) { + token = NORMALIZE_TOKENS.get(token.toLowerCase()); + } endTextReversePattern.add(Pattern.quote(token)); } remainingTokens--; @@ -791,9 +798,11 @@ public static Pattern nonOptionalTextToStartPattern(List nonOptionalText int revPatternIndex = endTextReversePattern.size()-1; while (revPatternIndex >= 0) { - patternBuilder.append(endTextReversePattern.get(revPatternIndex--)); + endPatternBuilder.append(endTextReversePattern.get(revPatternIndex--)); } - return Pattern.compile(patternBuilder.toString(), Pattern.DOTALL|Pattern.CASE_INSENSITIVE); + return new ImmutablePair<>( + Pattern.compile(startPatternBuilder.toString(), Pattern.DOTALL|Pattern.CASE_INSENSITIVE), + Pattern.compile(endPatternBuilder.toString(), Pattern.DOTALL|Pattern.CASE_INSENSITIVE)); } /** @@ -954,7 +963,8 @@ private static String findTemplateWithinText(String text, String template) throw templateNonOptionalText.set(0, firstLine); } } - Pattern matchPattern = nonOptionalTextToStartPattern(templateNonOptionalText, CROSS_REF_NUM_WORDS_MATCH); + Pair matchPatterns = nonOptionalTextToPatterns(templateNonOptionalText, CROSS_REF_NUM_WORDS_MATCH); + List> charPositions = new ArrayList<>(); String normalizedText = removeCommentChars(normalizeText(text)); normalizedText = normalizedText.replaceAll("(-|=|\\*){3,}", ""); // Remove ----, ***, and ==== @@ -967,13 +977,15 @@ private static String findTemplateWithinText(String text, String template) throw charPositions.add(new ImmutablePair<>(0, 0)); } - Matcher matcher = matchPattern.matcher(compareText); - if(matcher.find()) { - startIndex = findOriginalStart(matcher.start(), charPositions); - endIndex = findOriginalStart(matcher.end(), charPositions); - result = normalizedText.substring(startIndex, endIndex); + Matcher startMatcher = matchPatterns.getLeft().matcher(compareText); + if(startMatcher.find()) { + startIndex = findOriginalStart(startMatcher.start(), charPositions); + Matcher endMatcher = matchPatterns.getRight().matcher(compareText); + if (endMatcher.find()) { + endIndex = findOriginalStart(endMatcher.end(), charPositions); + result = normalizedText.substring(startIndex, endIndex); + } } - return result; } @@ -994,7 +1006,7 @@ public static boolean isStandardLicenseWithinText(String text, SpdxListedLicense try { String completeText = findTemplateWithinText(text, license.getStandardLicenseTemplate()); if (completeText != null) { - result = !isTextStandardLicense(license, completeText).isDifferenceFound(); + return !isTextStandardLicense(license, completeText).isDifferenceFound(); } } catch (SpdxCompareException e) { logger.warn("Error getting optional text for license ID " + license.getLicenseId(), e); diff --git a/src/test/java/org/spdx/utility/compare/LicenseCompareHelperTest.java b/src/test/java/org/spdx/utility/compare/LicenseCompareHelperTest.java index 3419e331a..14241fc92 100644 --- a/src/test/java/org/spdx/utility/compare/LicenseCompareHelperTest.java +++ b/src/test/java/org/spdx/utility/compare/LicenseCompareHelperTest.java @@ -81,6 +81,14 @@ public class LicenseCompareHelperTest extends TestCase { static final String POLYFORM_NC_TEMPLATE = "TestFiles" + File.separator + "PolyForm-Noncommercial-1.0.0.template.txt"; static final String APL_1_TEXT = "TestFiles" + File.separator + "APL-1.0.txt"; static final String APL_1_TEMPLATE = "TestFiles" + File.separator + "APL-1.0.template.txt"; + static final String MIT_2_SPACES = "TestFiles" + File.separator + "MIT2Spaces.txt"; + static final String MIT_TEMPLATE = "TestFiles" + File.separator + "MIT.template.txt"; + static final String BSD_3_CLAUSE_NL = "TestFiles" + File.separator + "BSD-3-Clause-newline.txt"; + static final String BSD_3_CLAUSE_TEMPLATE = "TestFiles" + File.separator + "BSD-3-Clause.template.txt"; + static final String BSD_2_CLAUSE_NL = "TestFiles" + File.separator + "BSD-2-Clause-nl.txt"; + static final String BSD_2_CLAUSE_TEMPLATE = "TestFiles" + File.separator + "BSD-2-Clause.template.txt"; + static final String EPL_2 = "TestFiles" + File.separator + "EPL-2.0.txt"; + static final String EPL_2_TEMPLATE = "TestFiles" + File.separator + "EPL-2.0.template.txt"; /** * @throws java.lang.Exception @@ -980,20 +988,72 @@ public void testRegressionAPL10() throws InvalidSPDXAnalysisException, SpdxCompa } public void testNonOptionalTextToStartPattern() throws InvalidSPDXAnalysisException, SpdxCompareException { + //TODO: Fix this up for the new return value String expectedMatch = "This is line 1\nThis is line 2"; List noRegexes = Arrays.asList(new String[] {"This is line 1", "This is line 2"}); - assertTrue(LicenseCompareHelper.nonOptionalTextToStartPattern(noRegexes, 100).matcher(expectedMatch).matches()); + assertTrue(LicenseCompareHelper.nonOptionalTextToPatterns(noRegexes, 100).getLeft().matcher(expectedMatch).matches()); List regexMiddle = Arrays.asList(new String[] {"This is~~~.+~~~1", "This is line 2"}); - assertTrue(LicenseCompareHelper.nonOptionalTextToStartPattern(regexMiddle, 100).matcher(expectedMatch).matches()); + assertTrue(LicenseCompareHelper.nonOptionalTextToPatterns(regexMiddle, 100).getLeft().matcher(expectedMatch).matches()); List regexStart = Arrays.asList(new String[] {"~~~.+~~~is line 1", "This is line 2"}); - assertTrue(LicenseCompareHelper.nonOptionalTextToStartPattern(regexStart, 100).matcher(expectedMatch).matches()); + assertTrue(LicenseCompareHelper.nonOptionalTextToPatterns(regexStart, 100).getLeft().matcher(expectedMatch).matches()); List regexEnd = Arrays.asList(new String[] {"This is line~~~.+~~~", "This is line 2"}); - assertTrue(LicenseCompareHelper.nonOptionalTextToStartPattern(regexEnd, 100).matcher(expectedMatch).matches()); + assertTrue(LicenseCompareHelper.nonOptionalTextToPatterns(regexEnd, 100).getLeft().matcher(expectedMatch).matches()); List multipleRegex = Arrays.asList(new String[] {"~~~.+~~~is line~~~.+~~~", "This is line 2"}); - assertTrue(LicenseCompareHelper.nonOptionalTextToStartPattern(multipleRegex, 100).matcher(expectedMatch).matches()); + assertTrue(LicenseCompareHelper.nonOptionalTextToPatterns(multipleRegex, 100).getLeft().matcher(expectedMatch).matches()); } + + public void test2Spaces() throws InvalidSPDXAnalysisException, SpdxCompareException, IOException { + String licText = UnitTestHelper.fileToText(MIT_2_SPACES); + String templateText = UnitTestHelper.fileToText(MIT_TEMPLATE); + SpdxListedLicense lic = new SpdxListedLicense( + new SpdxListedLicense.Builder("MIT", "MIT", licText) + .setTemplate(templateText)); + DifferenceDescription diff = LicenseCompareHelper.isTextStandardLicense(lic, licText); + if (diff.isDifferenceFound()) { + fail(diff.getDifferenceMessage()); + } + } + + public void testBsdNewLine() throws InvalidSPDXAnalysisException, SpdxCompareException, IOException { + String licText = UnitTestHelper.fileToText(BSD_3_CLAUSE_NL); + String templateText = UnitTestHelper.fileToText(BSD_3_CLAUSE_TEMPLATE); + SpdxListedLicense lic = new SpdxListedLicense( + new SpdxListedLicense.Builder("BSD-3-Clause", "BSD-3-Clause", licText) + .setTemplate(templateText)); + DifferenceDescription diff = LicenseCompareHelper.isTextStandardLicense(lic, licText); + if (diff.isDifferenceFound()) { + fail(diff.getDifferenceMessage()); + } + } + + public void testConsistentMatch() throws InvalidSPDXAnalysisException, SpdxCompareException, IOException { + String licText = UnitTestHelper.fileToText(BSD_2_CLAUSE_NL); + String templateText = UnitTestHelper.fileToText(BSD_2_CLAUSE_TEMPLATE); + SpdxListedLicense lic = new SpdxListedLicense( + new SpdxListedLicense.Builder("BSD-2-Clause", "BSD-2-Clause", licText) + .setTemplate(templateText)); + DifferenceDescription diff = LicenseCompareHelper.isTextStandardLicense(lic, licText); + if (diff.isDifferenceFound()) { + fail(diff.getDifferenceMessage()); + } + assertTrue(LicenseCompareHelper.isStandardLicenseWithinText(licText, lic)); + } + + public void testEpl20ConsistentMatch() throws InvalidSPDXAnalysisException, SpdxCompareException, IOException { + String licText = UnitTestHelper.fileToText(EPL_2); + String templateText = UnitTestHelper.fileToText(EPL_2_TEMPLATE); + SpdxListedLicense lic = new SpdxListedLicense( + new SpdxListedLicense.Builder("EPL-2.0", "EPL-2.0", licText) + .setTemplate(templateText)); + DifferenceDescription diff = LicenseCompareHelper.isTextStandardLicense(lic, licText); + if (diff.isDifferenceFound()) { + fail(diff.getDifferenceMessage()); + } + assertTrue(LicenseCompareHelper.isStandardLicenseWithinText(licText, lic)); + } + } From 5aad1e04d0935d4a02308b922179389369f52b26 Mon Sep 17 00:00:00 2001 From: Gary O'Neall Date: Fri, 8 Dec 2023 01:34:38 -0800 Subject: [PATCH 2/4] Redesign FilterTemplateOutputHandler to output optional regex Signed-off-by: Gary O'Neall --- TestFiles/GPL-2.0-NL.txt | 339 ++++++++++++++++++ TestFiles/GPL-2.0-only.template.txt | 127 +++++++ TestFiles/GPL-optional-template.txt | 33 ++ .../compare/FilterTemplateOutputHandler.java | 83 ++++- .../utility/compare/LicenseCompareHelper.java | 39 +- .../FilterTemplateOutputHandlerTest.java | 146 ++++++++ .../compare/LicenseCompareHelperTest.java | 15 + 7 files changed, 766 insertions(+), 16 deletions(-) create mode 100644 TestFiles/GPL-2.0-NL.txt create mode 100644 TestFiles/GPL-2.0-only.template.txt create mode 100644 TestFiles/GPL-optional-template.txt create mode 100644 src/test/java/org/spdx/utility/compare/FilterTemplateOutputHandlerTest.java diff --git a/TestFiles/GPL-2.0-NL.txt b/TestFiles/GPL-2.0-NL.txt new file mode 100644 index 000000000..ecbc05937 --- /dev/null +++ b/TestFiles/GPL-2.0-NL.txt @@ -0,0 +1,339 @@ + GNU GENERAL PUBLIC LICENSE + Version 2, June 1991 + + Copyright (C) 1989, 1991 Free Software Foundation, Inc., + 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA + Everyone is permitted to copy and distribute verbatim copies + of this license document, but changing it is not allowed. + + Preamble + + The licenses for most software are designed to take away your +freedom to share and change it. By contrast, the GNU General Public +License is intended to guarantee your freedom to share and change free +software--to make sure the software is free for all its users. This +General Public License applies to most of the Free Software +Foundation's software and to any other program whose authors commit to +using it. (Some other Free Software Foundation software is covered by +the GNU Lesser General Public License instead.) You can apply it to +your programs, too. + + When we speak of free software, we are referring to freedom, not +price. Our General Public Licenses are designed to make sure that you +have the freedom to distribute copies of free software (and charge for +this service if you wish), that you receive source code or can get it +if you want it, that you can change the software or use pieces of it +in new free programs; and that you know you can do these things. + + To protect your rights, we need to make restrictions that forbid +anyone to deny you these rights or to ask you to surrender the rights. +These restrictions translate to certain responsibilities for you if you +distribute copies of the software, or if you modify it. + + For example, if you distribute copies of such a program, whether +gratis or for a fee, you must give the recipients all the rights that +you have. You must make sure that they, too, receive or can get the +source code. And you must show them these terms so they know their +rights. + + We protect your rights with two steps: (1) copyright the software, and +(2) offer you this license which gives you legal permission to copy, +distribute and/or modify the software. + + Also, for each author's protection and ours, we want to make certain +that everyone understands that there is no warranty for this free +software. If the software is modified by someone else and passed on, we +want its recipients to know that what they have is not the original, so +that any problems introduced by others will not reflect on the original +authors' reputations. + + Finally, any free program is threatened constantly by software +patents. We wish to avoid the danger that redistributors of a free +program will individually obtain patent licenses, in effect making the +program proprietary. To prevent this, we have made it clear that any +patent must be licensed for everyone's free use or not licensed at all. + + The precise terms and conditions for copying, distribution and +modification follow. + + GNU GENERAL PUBLIC LICENSE + TERMS AND CONDITIONS FOR COPYING, DISTRIBUTION AND MODIFICATION + + 0. This License applies to any program or other work which contains +a notice placed by the copyright holder saying it may be distributed +under the terms of this General Public License. The "Program", below, +refers to any such program or work, and a "work based on the Program" +means either the Program or any derivative work under copyright law: +that is to say, a work containing the Program or a portion of it, +either verbatim or with modifications and/or translated into another +language. (Hereinafter, translation is included without limitation in +the term "modification".) Each licensee is addressed as "you". + +Activities other than copying, distribution and modification are not +covered by this License; they are outside its scope. The act of +running the Program is not restricted, and the output from the Program +is covered only if its contents constitute a work based on the +Program (independent of having been made by running the Program). +Whether that is true depends on what the Program does. + + 1. You may copy and distribute verbatim copies of the Program's +source code as you receive it, in any medium, provided that you +conspicuously and appropriately publish on each copy an appropriate +copyright notice and disclaimer of warranty; keep intact all the +notices that refer to this License and to the absence of any warranty; +and give any other recipients of the Program a copy of this License +along with the Program. + +You may charge a fee for the physical act of transferring a copy, and +you may at your option offer warranty protection in exchange for a fee. + + 2. You may modify your copy or copies of the Program or any portion +of it, thus forming a work based on the Program, and copy and +distribute such modifications or work under the terms of Section 1 +above, provided that you also meet all of these conditions: + + a) You must cause the modified files to carry prominent notices + stating that you changed the files and the date of any change. + + b) You must cause any work that you distribute or publish, that in + whole or in part contains or is derived from the Program or any + part thereof, to be licensed as a whole at no charge to all third + parties under the terms of this License. + + c) If the modified program normally reads commands interactively + when run, you must cause it, when started running for such + interactive use in the most ordinary way, to print or display an + announcement including an appropriate copyright notice and a + notice that there is no warranty (or else, saying that you provide + a warranty) and that users may redistribute the program under + these conditions, and telling the user how to view a copy of this + License. (Exception: if the Program itself is interactive but + does not normally print such an announcement, your work based on + the Program is not required to print an announcement.) + +These requirements apply to the modified work as a whole. If +identifiable sections of that work are not derived from the Program, +and can be reasonably considered independent and separate works in +themselves, then this License, and its terms, do not apply to those +sections when you distribute them as separate works. But when you +distribute the same sections as part of a whole which is a work based +on the Program, the distribution of the whole must be on the terms of +this License, whose permissions for other licensees extend to the +entire whole, and thus to each and every part regardless of who wrote it. + +Thus, it is not the intent of this section to claim rights or contest +your rights to work written entirely by you; rather, the intent is to +exercise the right to control the distribution of derivative or +collective works based on the Program. + +In addition, mere aggregation of another work not based on the Program +with the Program (or with a work based on the Program) on a volume of +a storage or distribution medium does not bring the other work under +the scope of this License. + + 3. You may copy and distribute the Program (or a work based on it, +under Section 2) in object code or executable form under the terms of +Sections 1 and 2 above provided that you also do one of the following: + + a) Accompany it with the complete corresponding machine-readable + source code, which must be distributed under the terms of Sections + 1 and 2 above on a medium customarily used for software interchange; or, + + b) Accompany it with a written offer, valid for at least three + years, to give any third party, for a charge no more than your + cost of physically performing source distribution, a complete + machine-readable copy of the corresponding source code, to be + distributed under the terms of Sections 1 and 2 above on a medium + customarily used for software interchange; or, + + c) Accompany it with the information you received as to the offer + to distribute corresponding source code. (This alternative is + allowed only for noncommercial distribution and only if you + received the program in object code or executable form with such + an offer, in accord with Subsection b above.) + +The source code for a work means the preferred form of the work for +making modifications to it. For an executable work, complete source +code means all the source code for all modules it contains, plus any +associated interface definition files, plus the scripts used to +control compilation and installation of the executable. However, as a +special exception, the source code distributed need not include +anything that is normally distributed (in either source or binary +form) with the major components (compiler, kernel, and so on) of the +operating system on which the executable runs, unless that component +itself accompanies the executable. + +If distribution of executable or object code is made by offering +access to copy from a designated place, then offering equivalent +access to copy the source code from the same place counts as +distribution of the source code, even though third parties are not +compelled to copy the source along with the object code. + + 4. You may not copy, modify, sublicense, or distribute the Program +except as expressly provided under this License. Any attempt +otherwise to copy, modify, sublicense or distribute the Program is +void, and will automatically terminate your rights under this License. +However, parties who have received copies, or rights, from you under +this License will not have their licenses terminated so long as such +parties remain in full compliance. + + 5. You are not required to accept this License, since you have not +signed it. However, nothing else grants you permission to modify or +distribute the Program or its derivative works. These actions are +prohibited by law if you do not accept this License. Therefore, by +modifying or distributing the Program (or any work based on the +Program), you indicate your acceptance of this License to do so, and +all its terms and conditions for copying, distributing or modifying +the Program or works based on it. + + 6. Each time you redistribute the Program (or any work based on the +Program), the recipient automatically receives a license from the +original licensor to copy, distribute or modify the Program subject to +these terms and conditions. You may not impose any further +restrictions on the recipients' exercise of the rights granted herein. +You are not responsible for enforcing compliance by third parties to +this License. + + 7. If, as a consequence of a court judgment or allegation of patent +infringement or for any other reason (not limited to patent issues), +conditions are imposed on you (whether by court order, agreement or +otherwise) that contradict the conditions of this License, they do not +excuse you from the conditions of this License. If you cannot +distribute so as to satisfy simultaneously your obligations under this +License and any other pertinent obligations, then as a consequence you +may not distribute the Program at all. For example, if a patent +license would not permit royalty-free redistribution of the Program by +all those who receive copies directly or indirectly through you, then +the only way you could satisfy both it and this License would be to +refrain entirely from distribution of the Program. + +If any portion of this section is held invalid or unenforceable under +any particular circumstance, the balance of the section is intended to +apply and the section as a whole is intended to apply in other +circumstances. + +It is not the purpose of this section to induce you to infringe any +patents or other property right claims or to contest validity of any +such claims; this section has the sole purpose of protecting the +integrity of the free software distribution system, which is +implemented by public license practices. Many people have made +generous contributions to the wide range of software distributed +through that system in reliance on consistent application of that +system; it is up to the author/donor to decide if he or she is willing +to distribute software through any other system and a licensee cannot +impose that choice. + +This section is intended to make thoroughly clear what is believed to +be a consequence of the rest of this License. + + 8. If the distribution and/or use of the Program is restricted in +certain countries either by patents or by copyrighted interfaces, the +original copyright holder who places the Program under this License +may add an explicit geographical distribution limitation excluding +those countries, so that distribution is permitted only in or among +countries not thus excluded. In such case, this License incorporates +the limitation as if written in the body of this License. + + 9. The Free Software Foundation may publish revised and/or new versions +of the General Public License from time to time. Such new versions will +be similar in spirit to the present version, but may differ in detail to +address new problems or concerns. + +Each version is given a distinguishing version number. If the Program +specifies a version number of this License which applies to it and "any +later version", you have the option of following the terms and conditions +either of that version or of any later version published by the Free +Software Foundation. If the Program does not specify a version number of +this License, you may choose any version ever published by the Free Software +Foundation. + + 10. If you wish to incorporate parts of the Program into other free +programs whose distribution conditions are different, write to the author +to ask for permission. For software which is copyrighted by the Free +Software Foundation, write to the Free Software Foundation; we sometimes +make exceptions for this. Our decision will be guided by the two goals +of preserving the free status of all derivatives of our free software and +of promoting the sharing and reuse of software generally. + + NO WARRANTY + + 11. BECAUSE THE PROGRAM IS LICENSED FREE OF CHARGE, THERE IS NO WARRANTY +FOR THE PROGRAM, TO THE EXTENT PERMITTED BY APPLICABLE LAW. EXCEPT WHEN +OTHERWISE STATED IN WRITING THE COPYRIGHT HOLDERS AND/OR OTHER PARTIES +PROVIDE THE PROGRAM "AS IS" WITHOUT WARRANTY OF ANY KIND, EITHER EXPRESSED +OR IMPLIED, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF +MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE. THE ENTIRE RISK AS +TO THE QUALITY AND PERFORMANCE OF THE PROGRAM IS WITH YOU. SHOULD THE +PROGRAM PROVE DEFECTIVE, YOU ASSUME THE COST OF ALL NECESSARY SERVICING, +REPAIR OR CORRECTION. + + 12. IN NO EVENT UNLESS REQUIRED BY APPLICABLE LAW OR AGREED TO IN WRITING +WILL ANY COPYRIGHT HOLDER, OR ANY OTHER PARTY WHO MAY MODIFY AND/OR +REDISTRIBUTE THE PROGRAM AS PERMITTED ABOVE, BE LIABLE TO YOU FOR DAMAGES, +INCLUDING ANY GENERAL, SPECIAL, INCIDENTAL OR CONSEQUENTIAL DAMAGES ARISING +OUT OF THE USE OR INABILITY TO USE THE PROGRAM (INCLUDING BUT NOT LIMITED +TO LOSS OF DATA OR DATA BEING RENDERED INACCURATE OR LOSSES SUSTAINED BY +YOU OR THIRD PARTIES OR A FAILURE OF THE PROGRAM TO OPERATE WITH ANY OTHER +PROGRAMS), EVEN IF SUCH HOLDER OR OTHER PARTY HAS BEEN ADVISED OF THE +POSSIBILITY OF SUCH DAMAGES. + + END OF TERMS AND CONDITIONS + + How to Apply These Terms to Your New Programs + + If you develop a new program, and you want it to be of the greatest +possible use to the public, the best way to achieve this is to make it +free software which everyone can redistribute and change under these terms. + + To do so, attach the following notices to the program. It is safest +to attach them to the start of each source file to most effectively +convey the exclusion of warranty; and each file should have at least +the "copyright" line and a pointer to where the full notice is found. + + + Copyright (C) + + This program is free software; you can redistribute it and/or modify + it under the terms of the GNU General Public License as published by + the Free Software Foundation; either version 2 of the License, or + (at your option) any later version. + + This program is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + GNU General Public License for more details. + + You should have received a copy of the GNU General Public License along + with this program; if not, write to the Free Software Foundation, Inc., + 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA. + +Also add information on how to contact you by electronic and paper mail. + +If the program is interactive, make it output a short notice like this +when it starts in an interactive mode: + + Gnomovision version 69, Copyright (C) year name of author + Gnomovision comes with ABSOLUTELY NO WARRANTY; for details type `show w'. + This is free software, and you are welcome to redistribute it + under certain conditions; type `show c' for details. + +The hypothetical commands `show w' and `show c' should show the appropriate +parts of the General Public License. Of course, the commands you use may +be called something other than `show w' and `show c'; they could even be +mouse-clicks or menu items--whatever suits your program. + +You should also get your employer (if you work as a programmer) or your +school, if any, to sign a "copyright disclaimer" for the program, if +necessary. Here is a sample; alter the names: + + Yoyodyne, Inc., hereby disclaims all copyright interest in the program + `Gnomovision' (which makes passes at compilers) written by James Hacker. + + , 1 April 1989 + Ty Coon, President of Vice + +This General Public License does not permit incorporating your program into +proprietary programs. If your program is a subroutine library, you may +consider it more useful to permit linking proprietary applications with the +library. If this is what you want to do, use the GNU Lesser General +Public License instead of this License. \ No newline at end of file diff --git a/TestFiles/GPL-2.0-only.template.txt b/TestFiles/GPL-2.0-only.template.txt new file mode 100644 index 000000000..2e7e39e14 --- /dev/null +++ b/TestFiles/GPL-2.0-only.template.txt @@ -0,0 +1,127 @@ +<>GNU GENERAL PUBLIC LICENSE + +Version 2, June 1991 + +<> + +Copyright (C) 1989, 1991 Free Software Foundation, Inc. <> + +51 Franklin Street, Fifth Floor, Boston, MA 02110-1301<>, <> USA + +Everyone is permitted to copy and distribute verbatim copies of this license document, but changing it is not allowed. + +Preamble + +The licenses for most software are designed to take away your freedom to share and change it. By contrast, the GNU General Public License is intended to guarantee your freedom to share and change free software--to make sure the software is free for all its users. This General Public License applies to most of the Free Software Foundation's software and to any other program whose authors commit to using it. (Some other Free Software Foundation software is covered by the GNU Lesser General Public License instead.) You can apply it to your programs, too. + +When we speak of free software, we are referring to freedom, not price. Our General Public Licenses are designed to make sure that you have the freedom to distribute copies of free software (and charge for this service if you wish), that you receive source code or can get it if you want it, that you can change the software or use pieces of it in new free programs; and that you know you can do these things. + +To protect your rights, we need to make restrictions that forbid anyone to deny you these rights or to ask you to surrender the rights. These restrictions translate to certain responsibilities for you if you distribute copies of the software, or if you modify it. + +For example, if you distribute copies of such a program, whether gratis or for a fee, you must give the recipients all the rights that you have. You must make sure that they, too, receive or can get the source code. And you must show them these terms so they know their rights. + +We protect your rights with two steps: (1) copyright the software, and (2) offer you this license which gives you legal permission to copy, distribute and/or modify the software. + +Also, for each author's protection and ours, we want to make certain that everyone understands that there is no warranty for this free software. If the software is modified by someone else and passed on, we want its recipients to know that what they have is not the original, so that any problems introduced by others will not reflect on the original authors' reputations. + +Finally, any free program is threatened constantly by software patents. We wish to avoid the danger that redistributors of a free program will individually obtain patent licenses, in effect making the program proprietary. To prevent this, we have made it clear that any patent must be licensed for everyone's free use or not licensed at all. + +The precise terms and conditions for copying, distribution and modification follow. + +<> TERMS AND CONDITIONS FOR COPYING, DISTRIBUTION AND MODIFICATION + + <> This License applies to any program or other work which contains a notice placed by the copyright holder saying it may be distributed under the terms of this General Public License. The "Program", below, refers to any such program or work, and a "work based on the Program" means either the Program or any derivative work under copyright law: that is to say, a work containing the Program or a portion of it, either verbatim or with modifications and/or translated into another language. (Hereinafter, translation is included without limitation in the term "modification".) Each licensee is addressed as "you". + + Activities other than copying, distribution and modification are not covered by this License; they are outside its scope. The act of running the Program is not restricted, and the output from the Program is covered only if its contents constitute a work based on the Program (independent of having been made by running the Program). Whether that is true depends on what the Program does. + + <> You may copy and distribute verbatim copies of the Program's source code as you receive it, in any medium, provided that you conspicuously and appropriately publish on each copy an appropriate copyright notice and disclaimer of warranty; keep intact all the notices that refer to this License and to the absence of any warranty; and give any other recipients of the Program a copy of this License along with the Program. + + You may charge a fee for the physical act of transferring a copy, and you may at your option offer warranty protection in exchange for a fee. + + <> You may modify your copy or copies of the Program or any portion of it, thus forming a work based on the Program, and copy and distribute such modifications or work under the terms of Section 1 above, provided that you also meet all of these conditions: + + <> You must cause the modified files to carry prominent notices stating that you changed the files and the date of any change. + + <> You must cause any work that you distribute or publish, that in whole or in part contains or is derived from the Program or any part thereof, to be licensed as a whole at no charge to all third parties under the terms of this License. + + <> If the modified program normally reads commands interactively when run, you must cause it, when started running for such interactive use in the most ordinary way, to print or display an announcement including an appropriate copyright notice and a notice that there is no warranty (or else, saying that you provide a warranty) and that users may redistribute the program under these conditions, and telling the user how to view a copy of this License. (Exception: if the Program itself is interactive but does not normally print such an announcement, your work based on the Program is not required to print an announcement.) + + These requirements apply to the modified work as a whole. If identifiable sections of that work are not derived from the Program, and can be reasonably considered independent and separate works in themselves, then this License, and its terms, do not apply to those sections when you distribute them as separate works. But when you distribute the same sections as part of a whole which is a work based on the Program, the distribution of the whole must be on the terms of this License, whose permissions for other licensees extend to the entire whole, and thus to each and every part regardless of who wrote it. + + Thus, it is not the intent of this section to claim rights or contest your rights to work written entirely by you; rather, the intent is to exercise the right to control the distribution of derivative or collective works based on the Program. + + In addition, mere aggregation of another work not based on the Program with the Program (or with a work based on the Program) on a volume of a storage or distribution medium does not bring the other work under the scope of this License. + + <> You may copy and distribute the Program (or a work based on it, under Section 2) in object code or executable form under the terms of Sections 1 and 2 above provided that you also do one of the following: + + <> Accompany it with the complete corresponding machine-readable source code, which must be distributed under the terms of Sections 1 and 2 above on a medium customarily used for software interchange; or, + + <> Accompany it with a written offer, valid for at least three years, to give any third party, for a charge no more than your cost of physically performing source distribution, a complete machine-readable copy of the corresponding source code, to be distributed under the terms of Sections 1 and 2 above on a medium customarily used for software interchange; or, + + <> Accompany it with the information you received as to the offer to distribute corresponding source code. (This alternative is allowed only for noncommercial distribution and only if you received the program in object code or executable form with such an offer, in accord with Subsection b above.) + + The source code for a work means the preferred form of the work for making modifications to it. For an executable work, complete source code means all the source code for all modules it contains, plus any associated interface definition files, plus the scripts used to control compilation and installation of the executable. However, as a special exception, the source code distributed need not include anything that is normally distributed (in either source or binary form) with the major components (compiler, kernel, and so on) of the operating system on which the executable runs, unless that component itself accompanies the executable. + + If distribution of executable or object code is made by offering access to copy from a designated place, then offering equivalent access to copy the source code from the same place counts as distribution of the source code, even though third parties are not compelled to copy the source along with the object code. + + <> You may not copy, modify, sublicense, or distribute the Program except as expressly provided under this License. Any attempt otherwise to copy, modify, sublicense or distribute the Program is void, and will automatically terminate your rights under this License. However, parties who have received copies, or rights, from you under this License will not have their licenses terminated so long as such parties remain in full compliance. + + <> You are not required to accept this License, since you have not signed it. However, nothing else grants you permission to modify or distribute the Program or its derivative works. These actions are prohibited by law if you do not accept this License. Therefore, by modifying or distributing the Program (or any work based on the Program), you indicate your acceptance of this License to do so, and all its terms and conditions for copying, distributing or modifying the Program or works based on it. + + <> Each time you redistribute the Program (or any work based on the Program), the recipient automatically receives a license from the original licensor to copy, distribute or modify the Program subject to these terms and conditions. You may not impose any further restrictions on the recipients' exercise of the rights granted herein. You are not responsible for enforcing compliance by third parties to this License. + + <> If, as a consequence of a court judgment or allegation of patent infringement or for any other reason (not limited to patent issues), conditions are imposed on you (whether by court order, agreement or otherwise) that contradict the conditions of this License, they do not excuse you from the conditions of this License. If you cannot distribute so as to satisfy simultaneously your obligations under this License and any other pertinent obligations, then as a consequence you may not distribute the Program at all. For example, if a patent license would not permit royalty-free redistribution of the Program by all those who receive copies directly or indirectly through you, then the only way you could satisfy both it and this License would be to refrain entirely from distribution of the Program. + + If any portion of this section is held invalid or unenforceable under any particular circumstance, the balance of the section is intended to apply and the section as a whole is intended to apply in other circumstances. + + It is not the purpose of this section to induce you to infringe any patents or other property right claims or to contest validity of any such claims; this section has the sole purpose of protecting the integrity of the free software distribution system, which is implemented by public license practices. Many people have made generous contributions to the wide range of software distributed through that system in reliance on consistent application of that system; it is up to the author/donor to decide if he or she is willing to distribute software through any other system and a licensee cannot impose that choice. + + This section is intended to make thoroughly clear what is believed to be a consequence of the rest of this License. + + <> If the distribution and/or use of the Program is restricted in certain countries either by patents or by copyrighted interfaces, the original copyright holder who places the Program under this License may add an explicit geographical distribution limitation excluding those countries, so that distribution is permitted only in or among countries not thus excluded. In such case, this License incorporates the limitation as if written in the body of this License. + + <> The Free Software Foundation may publish revised and/or new versions of the General Public License from time to time. Such new versions will be similar in spirit to the present version, but may differ in detail to address new problems or concerns. + + Each version is given a distinguishing version number. If the Program specifies a version number of this License which applies to it and "any later version", you have the option of following the terms and conditions either of that version or of any later version published by the Free Software Foundation. If the Program does not specify a version number of this License, you may choose any version ever published by the Free Software Foundation. + + <> If you wish to incorporate parts of the Program into other free programs whose distribution conditions are different, write to the author to ask for permission. For software which is copyrighted by the Free Software Foundation, write to the Free Software Foundation; we sometimes make exceptions for this. Our decision will be guided by the two goals of preserving the free status of all derivatives of our free software and of promoting the sharing and reuse of software generally. + + NO WARRANTY + + <> BECAUSE THE PROGRAM IS LICENSED FREE OF CHARGE, THERE IS NO WARRANTY FOR THE PROGRAM, TO THE EXTENT PERMITTED BY APPLICABLE LAW. EXCEPT WHEN OTHERWISE STATED IN WRITING THE COPYRIGHT HOLDERS AND/OR OTHER PARTIES PROVIDE THE PROGRAM "AS IS" WITHOUT WARRANTY OF ANY KIND, EITHER EXPRESSED OR IMPLIED, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE. THE ENTIRE RISK AS TO THE QUALITY AND PERFORMANCE OF THE PROGRAM IS WITH YOU. SHOULD THE PROGRAM PROVE DEFECTIVE, YOU ASSUME THE COST OF ALL NECESSARY SERVICING, REPAIR OR CORRECTION. + + <> IN NO EVENT UNLESS REQUIRED BY APPLICABLE LAW OR AGREED TO IN WRITING WILL ANY COPYRIGHT HOLDER, OR ANY OTHER PARTY WHO MAY MODIFY AND/OR REDISTRIBUTE THE PROGRAM AS PERMITTED ABOVE, BE LIABLE TO YOU FOR DAMAGES, INCLUDING ANY GENERAL, SPECIAL, INCIDENTAL OR CONSEQUENTIAL DAMAGES ARISING OUT OF THE USE OR INABILITY TO USE THE PROGRAM (INCLUDING BUT NOT LIMITED TO LOSS OF DATA OR DATA BEING RENDERED INACCURATE OR LOSSES SUSTAINED BY YOU OR THIRD PARTIES OR A FAILURE OF THE PROGRAM TO OPERATE WITH ANY OTHER PROGRAMS), EVEN IF SUCH HOLDER OR OTHER PARTY HAS BEEN ADVISED OF THE POSSIBILITY OF SUCH DAMAGES.<> END OF TERMS AND CONDITIONS + +How to Apply These Terms to Your New Programs + +If you develop a new program, and you want it to be of the greatest possible use to the public, the best way to achieve this is to make it free software which everyone can redistribute and change under these terms. + +To do so, attach the following notices to the program. It is safest to attach them to the start of each source file to most effectively convey the exclusion of warranty; and each file should have at least the "copyright" line and a pointer to where the full notice is found. + +<><<>one line to give the program's name and <> idea of what it does.<>><> + +Copyright (C)<><<> <><>> <><> <<>name of author<>><> + +This program is free software; you can redistribute it and/or modify it under the terms of the GNU General Public License as published by the Free Software Foundation; either version 2 of the License, or (at your option) any later version. + +This program is distributed in the hope that it will be useful, but WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for more details. + +You should have received a copy of the GNU General Public License along with this program; if not, write to the Free Software Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301<>, <> USA. + +Also add information on how to contact you by electronic and paper mail. + +If the program is interactive, make it output a short notice like this when it starts in an interactive mode: + +Gnomovision version 69, Copyright (C) year name of author Gnomovision comes with ABSOLUTELY NO WARRANTY; for details type `show w'. This is free software, and you are welcome to redistribute it under certain conditions; type `show c' for details. + +The hypothetical commands `show w' and `show c' should show the appropriate parts of the General Public License. Of course, the commands you use may be called something other than `show w' and `show c'; they could even be mouse-clicks or menu items--whatever suits your program. + +You should also get your employer (if you work as a programmer) or your school, if any, to sign a "copyright disclaimer" for the program, if necessary. Here is a sample; alter the names: + +Yoyodyne, Inc., hereby disclaims all copyright interest in the program `Gnomovision' (which makes passes at compilers) written by James Hacker. + +<><<>signature of Ty Coon<> ><>, 1 April 1989 Ty Coon, President of Vice + +<><> This General Public License does not permit incorporating your program into proprietary programs. If your program is a subroutine library, you may consider it more useful to permit linking proprietary applications with the library. If this is what you want to do, use the GNU Lesser General Public License instead of this License. + +<> \ No newline at end of file diff --git a/TestFiles/GPL-optional-template.txt b/TestFiles/GPL-optional-template.txt new file mode 100644 index 000000000..93128ba58 --- /dev/null +++ b/TestFiles/GPL-optional-template.txt @@ -0,0 +1,33 @@ +THE POSSIBILITY OF SUCH DAMAGES.<> END OF TERMS AND CONDITIONS + +How to Apply These Terms to Your New Programs + +If you develop a new program, and you want it to be of the greatest possible use to the public, the best way to achieve this is to make it free software which everyone can redistribute and change under these terms. + +To do so, attach the following notices to the program. It is safest to attach them to the start of each source file to most effectively convey the exclusion of warranty; and each file should have at least the "copyright" line and a pointer to where the full notice is found. + +<><<>one line to give the program's name and <> idea of what it does.<>><> + +Copyright (C)<><<> <><>> <><> <<>name of author<>><> + +This program is free software; you can redistribute it and/or modify it under the terms of the GNU General Public License as published by the Free Software Foundation; either version 2 of the License, or (at your option) any later version. + +This program is distributed in the hope that it will be useful, but WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for more details. + +You should have received a copy of the GNU General Public License along with this program; if not, write to the Free Software Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301<>, <> USA. + +Also add information on how to contact you by electronic and paper mail. + +If the program is interactive, make it output a short notice like this when it starts in an interactive mode: + +Gnomovision version 69, Copyright (C) year name of author Gnomovision comes with ABSOLUTELY NO WARRANTY; for details type `show w'. This is free software, and you are welcome to redistribute it under certain conditions; type `show c' for details. + +The hypothetical commands `show w' and `show c' should show the appropriate parts of the General Public License. Of course, the commands you use may be called something other than `show w' and `show c'; they could even be mouse-clicks or menu items--whatever suits your program. + +You should also get your employer (if you work as a programmer) or your school, if any, to sign a "copyright disclaimer" for the program, if necessary. Here is a sample; alter the names: + +Yoyodyne, Inc., hereby disclaims all copyright interest in the program `Gnomovision' (which makes passes at compilers) written by James Hacker. + +<><<>signature of Ty Coon<> ><>, 1 April 1989 Ty Coon, President of Vice + +<> \ No newline at end of file diff --git a/src/main/java/org/spdx/utility/compare/FilterTemplateOutputHandler.java b/src/main/java/org/spdx/utility/compare/FilterTemplateOutputHandler.java index 18f364259..fc022789e 100644 --- a/src/main/java/org/spdx/utility/compare/FilterTemplateOutputHandler.java +++ b/src/main/java/org/spdx/utility/compare/FilterTemplateOutputHandler.java @@ -16,7 +16,11 @@ package org.spdx.utility.compare; import java.util.ArrayList; +import java.util.Arrays; +import java.util.HashMap; import java.util.List; +import java.util.Map; +import java.util.regex.Pattern; import org.spdx.licenseTemplate.ILicenseTemplateOutputHandler; import org.spdx.licenseTemplate.LicenseTemplateRule; @@ -35,25 +39,41 @@ public enum VarTextHandling { REGEX, // Include the regex itself included by the REGEX_ESCAPE strings } + public enum OptionalTextHandling { + OMIT, // Omit the optional text + ORIGINAL, // Retain the optional text + REGEX_USING_TOKENS // Create a regex for the optional text with the REGEX_ESCAPE string tokenizing the words + } + private VarTextHandling varTextHandling; + private OptionalTextHandling optionalTextHandling; private List filteredText = new ArrayList<>(); StringBuilder currentString = new StringBuilder(); private int optionalDepth = 0; // depth of optional rules + private Map> optionalTokens = new HashMap<>(); // map of optional dept to a list of tokens for the optional text /** * @param includeVarText if true, include the default variable text */ @Deprecated public FilterTemplateOutputHandler(boolean includeVarText) { - this(includeVarText ? VarTextHandling.ORIGINAL : VarTextHandling.OMIT); + this(includeVarText ? VarTextHandling.ORIGINAL : VarTextHandling.OMIT, OptionalTextHandling.OMIT); } - /** * @param varTextHandling include original, exclude, or include the regex (enclosed with "~~~") for "var" text */ public FilterTemplateOutputHandler(VarTextHandling varTextHandling) { + this(varTextHandling, OptionalTextHandling.OMIT); + } + + /** + * @param varTextHandling include original, exclude, or include the regex (enclosed with "~~~") for "var" text + * @param optionalTextHandling include optional text, exclude, or include a regex for the optional text + */ + public FilterTemplateOutputHandler(VarTextHandling varTextHandling, OptionalTextHandling optionalTextHandling) { this.varTextHandling = varTextHandling; + this.optionalTextHandling = optionalTextHandling; } /* (non-Javadoc) @@ -61,8 +81,11 @@ public FilterTemplateOutputHandler(VarTextHandling varTextHandling) { */ @Override public void text(String text) { - if (optionalDepth <= 0) { + if (optionalDepth <= 0 || OptionalTextHandling.ORIGINAL.equals(optionalTextHandling)) { currentString.append(text); + } else if (OptionalTextHandling.REGEX_USING_TOKENS.equals(optionalTextHandling)) { + optionalTokens.get(optionalDepth).addAll(Arrays.asList( + LicenseCompareHelper.tokenizeLicenseText(text, new HashMap()))); } } @@ -73,10 +96,16 @@ public void text(String text) { public void variableRule(LicenseTemplateRule rule) { if (VarTextHandling.REGEX.equals(varTextHandling) && optionalDepth <= 0) { currentString.append(REGEX_ESCAPE); + currentString.append('('); currentString.append(rule.getMatch()); + currentString.append(')'); currentString.append(REGEX_ESCAPE); } else if (VarTextHandling.ORIGINAL.equals(varTextHandling) && optionalDepth <= 0) { currentString.append(rule.getOriginal()); + } else if (optionalDepth > 0 && OptionalTextHandling.REGEX_USING_TOKENS.equals(optionalTextHandling)) { + currentString.append('('); + currentString.append(rule.getMatch()); + currentString.append(')'); } else { if (currentString.length() > 0) { filteredText.add(currentString.toString()); @@ -90,19 +119,63 @@ public void variableRule(LicenseTemplateRule rule) { */ @Override public void beginOptional(LicenseTemplateRule rule) { + if (OptionalTextHandling.REGEX_USING_TOKENS.equals(optionalTextHandling)) { + if (optionalDepth == 0) { + if (currentString.length() > 0) { + filteredText.add(currentString.toString()); + currentString.setLength(0); + } + currentString.append(REGEX_ESCAPE); + } else { + currentString.append(toTokenRegex(optionalTokens.get(optionalDepth))); + optionalTokens.get(optionalDepth).clear(); + } + currentString.append('('); + } else if (currentString.length() > 0) { + filteredText.add(currentString.toString()); + currentString.setLength(0); + } optionalDepth++; + optionalTokens.put(optionalDepth, new ArrayList<>()); } + /** + * @param tokens list of tokens + * @return regular expression with quoted tokens + */ + private String toTokenRegex(List tokens) { + StringBuilder sb = new StringBuilder(); + for (String token:optionalTokens.get(optionalDepth)) { + token = token.trim(); + if (LicenseCompareHelper.NORMALIZE_TOKENS.containsKey(token.toLowerCase())) { + token = LicenseCompareHelper.NORMALIZE_TOKENS.get(token.toLowerCase()); + } + sb.append(Pattern.quote(token)); + sb.append("\\s*"); + } + return sb.toString(); + } + + /* (non-Javadoc) * @see org.spdx.licenseTemplate.ILicenseTemplateOutputHandler#endOptional(org.spdx.licenseTemplate.LicenseTemplateRule) */ @Override public void endOptional(LicenseTemplateRule rule) { - optionalDepth--; - if (optionalDepth == 0 && currentString.length() > 0) { + if (OptionalTextHandling.REGEX_USING_TOKENS.equals(optionalTextHandling)) { + currentString.append(toTokenRegex(optionalTokens.get(optionalDepth))); + currentString.append(")?"); + if (optionalDepth == 1) { + currentString.append(REGEX_ESCAPE); filteredText.add(currentString.toString()); currentString.setLength(0); + } + } else if (currentString.length() > 0) { + filteredText.add(currentString.toString()); + currentString.setLength(0); } + optionalTokens.remove(optionalDepth); + optionalDepth--; } /* (non-Javadoc) diff --git a/src/main/java/org/spdx/utility/compare/LicenseCompareHelper.java b/src/main/java/org/spdx/utility/compare/LicenseCompareHelper.java index 577095c96..fd369d032 100644 --- a/src/main/java/org/spdx/utility/compare/LicenseCompareHelper.java +++ b/src/main/java/org/spdx/utility/compare/LicenseCompareHelper.java @@ -52,6 +52,7 @@ import org.spdx.licenseTemplate.LicenseTemplateRuleException; import org.spdx.licenseTemplate.SpdxLicenseTemplateHelper; import org.spdx.utility.compare.CompareTemplateOutputHandler.DifferenceDescription; +import org.spdx.utility.compare.FilterTemplateOutputHandler.OptionalTextHandling; import org.spdx.utility.compare.FilterTemplateOutputHandler.VarTextHandling; /** @@ -646,18 +647,33 @@ private static boolean isLicenseSetsEqual(LicenseSet license1, LicenseSet licens @Deprecated public static List getNonOptionalLicenseText(String licenseTemplate, boolean includeVarText) throws SpdxCompareException { return getNonOptionalLicenseText(licenseTemplate, - includeVarText ? VarTextHandling.ORIGINAL : VarTextHandling.OMIT); + includeVarText ? VarTextHandling.ORIGINAL : VarTextHandling.OMIT, + OptionalTextHandling.OMIT); } /** - * Get the text of a license minus any optional text - note: this include the default variable text + * Get the text of a license minus any optional text * @param licenseTemplate license template containing optional and var tags * @param varTextHandling include original, exclude, or include the regex (enclosed with "~~~") for "var" text * @return list of strings for all non-optional license text. * @throws SpdxCompareException */ - public static List getNonOptionalLicenseText(String licenseTemplate, VarTextHandling varTextHandling) throws SpdxCompareException { - FilterTemplateOutputHandler filteredOutput = new FilterTemplateOutputHandler(varTextHandling); + public static List getNonOptionalLicenseText(String licenseTemplate, + VarTextHandling varTextHandling) throws SpdxCompareException { + return getNonOptionalLicenseText(licenseTemplate, varTextHandling, OptionalTextHandling.OMIT); + } + + /** + * Get the text of a license converting variable and optional text according to the options + * @param licenseTemplate license template containing optional and var tags + * @param varTextHandling include original, exclude, or include the regex (enclosed with "~~~") for "var" text + * @param optionalTextHandling include optional text, exclude, or include a regex for the optional text + * @return list of strings for all non-optional license text. + * @throws SpdxCompareException + */ + public static List getNonOptionalLicenseText(String licenseTemplate, + VarTextHandling varTextHandling, OptionalTextHandling optionalTextHandling) throws SpdxCompareException { + FilterTemplateOutputHandler filteredOutput = new FilterTemplateOutputHandler(varTextHandling, optionalTextHandling); try { SpdxLicenseTemplateHelper.parseTemplate(licenseTemplate, filteredOutput); } catch (LicenseTemplateRuleException e) { @@ -686,9 +702,9 @@ public static Pair nonOptionalTextToPatterns(List nonO String lastRegex = ""; while (startWordCount < numberOfWords && startTextIndex < nonOptionalText.size()) { String line = nonOptionalText.get(startTextIndex++); - if (startPatternBuilder.length() > 0 && line.trim().length() > 0 && !startPatternBuilder.toString().endsWith("}")) { - startPatternBuilder.append(".{0,50}"); //TODO: Replace this with the optional text match itself - requires redesign - } +// if (startPatternBuilder.length() > 0 && line.trim().length() > 0 && !startPatternBuilder.toString().endsWith("}")) { +// startPatternBuilder.append(".{0").append(regexLimit); //TODO: Replace this with the optional text match itself - requires redesign +// } String[] regexSplits = line.trim().split(FilterTemplateOutputHandler.REGEX_ESCAPE); boolean inRegex = false; // if it starts with a regex, it will start with a blank line for (String regexSplit:regexSplits) { @@ -753,9 +769,9 @@ public static Pair nonOptionalTextToPatterns(List nonO (endTextIndex == lastProcessedStartLine && (numberOfWords - endWordCount) < (nonOptionalText.get(endTextIndex).length() - wordsInLastLine)))) { // Check to make sure we're not overlapping the start words List nonEmptyTokens = new ArrayList<>(); String line = nonOptionalText.get(endTextIndex); - if (endTextReversePattern.size() > 0 && line.trim().length() > 0 && !endTextReversePattern.get(endTextReversePattern.size()-1).endsWith("}")) { - endTextReversePattern.add(".{0,50}"); //TODO: Replace this with the optional text match itself - requires redesign - } +// if (endTextReversePattern.size() > 0 && line.trim().length() > 0 && !endTextReversePattern.get(endTextReversePattern.size()-1).endsWith("}")) { +// endTextReversePattern.add(".{0,50}"); //TODO: Replace this with the optional text match itself - requires redesign +// } String[] regexSplits = line.trim().split(FilterTemplateOutputHandler.REGEX_ESCAPE); boolean inRegex = false; for (String regexSplit:regexSplits) { @@ -953,7 +969,8 @@ private static String findTemplateWithinText(String text, String template) throw return null; } - List templateNonOptionalText = getNonOptionalLicenseText(removeCommentChars(template), VarTextHandling.REGEX); + List templateNonOptionalText = getNonOptionalLicenseText(removeCommentChars(template), + VarTextHandling.REGEX, OptionalTextHandling.REGEX_USING_TOKENS); if (templateNonOptionalText.size() > 0 && templateNonOptionalText.get(0).startsWith("~~~.")) { // Change to a non-greedy match String firstLine = templateNonOptionalText.get(0); diff --git a/src/test/java/org/spdx/utility/compare/FilterTemplateOutputHandlerTest.java b/src/test/java/org/spdx/utility/compare/FilterTemplateOutputHandlerTest.java new file mode 100644 index 000000000..fad285275 --- /dev/null +++ b/src/test/java/org/spdx/utility/compare/FilterTemplateOutputHandlerTest.java @@ -0,0 +1,146 @@ +/** + * Copyright (c) 2023 Source Auditor Inc. + * + * SPDX-License-Identifier: Apache-2.0 + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ +package org.spdx.utility.compare; + +import static org.junit.Assert.*; + +import java.io.File; +import java.io.IOException; +import java.util.List; + +import org.junit.After; +import org.junit.Before; +import org.junit.Test; +import org.spdx.licenseTemplate.LicenseParserException; +import org.spdx.licenseTemplate.LicenseTemplateRuleException; +import org.spdx.licenseTemplate.SpdxLicenseTemplateHelper; +import org.spdx.utility.compare.FilterTemplateOutputHandler.OptionalTextHandling; +import org.spdx.utility.compare.FilterTemplateOutputHandler.VarTextHandling; + +/** + * @author Gary O'Neall + * + */ +public class FilterTemplateOutputHandlerTest { + + static final String LINE1 = " line 1"; + static final String LINE2 = "line 2 "; + static final String LINE3 = "line3"; + static final String LINE4 = " line 4 "; + static final String VAR_ORIGINAL = "var original line 5"; + static final String LINE6 = "line 6"; + static final String LAST_LINE = "last line"; + static final String FIRST_OPTIONAL_TOKEN_REGEX = "~~~(\\Qline\\E\\s*\\Q1\\E\\s*(\\Qline\\E\\s*\\Q2\\E\\s*)?)?~~~"; + static final String LAST_OPTIONAL_TOKEN_REGEX = "~~~(\\Qlast\\E\\s*\\Qline\\E\\s*)?~~~"; + static final String VAR_REGEX = "~~~(,|)~~~"; + static final String TEST_TEMPLATE = "<>"+LINE1+"\n" + +"<>" + LINE2 + "<>\n" + + "<>\n" + + LINE3 + "\n" + + LINE4 + "\n" + + "<> \n" + + LINE6 + "\n" + + "<>"+LAST_LINE+"<>"; + static final String GPL_TEMPLATE_SNIPPET_FILE = "TestFiles" + File.separator + "GPL-optional-template.txt"; + + /** + * @throws java.lang.Exception + */ + @Before + public void setUp() throws Exception { + } + + /** + * @throws java.lang.Exception + */ + @After + public void tearDown() throws Exception { + } + + @Test + public void testOriginal() throws LicenseParserException, LicenseTemplateRuleException { + FilterTemplateOutputHandler filteredOutput = new FilterTemplateOutputHandler(VarTextHandling.ORIGINAL, + OptionalTextHandling.ORIGINAL); + SpdxLicenseTemplateHelper.parseTemplate(TEST_TEMPLATE, filteredOutput); + List result = filteredOutput.getFilteredText(); + assertEquals(4, result.size()); + assertEquals(LINE1 + "\n", result.get(0)); + assertEquals(LINE2, result.get(1)); + assertEquals("\n" + LINE3 + "\n" + LINE4 + "\n" + VAR_ORIGINAL + " \n" + LINE6 + "\n", result.get(2)); + assertEquals(LAST_LINE, result.get(3)); + } + + @Test + public void testOptionalRegex() throws LicenseParserException, LicenseTemplateRuleException { + FilterTemplateOutputHandler filteredOutput = new FilterTemplateOutputHandler(VarTextHandling.ORIGINAL, + OptionalTextHandling.REGEX_USING_TOKENS); + SpdxLicenseTemplateHelper.parseTemplate(TEST_TEMPLATE, filteredOutput); + List result = filteredOutput.getFilteredText(); + assertEquals(3, result.size()); + assertEquals(FIRST_OPTIONAL_TOKEN_REGEX, result.get(0)); + assertEquals("\n" + LINE3 + "\n" + LINE4 + "\n" + VAR_ORIGINAL + " \n" + LINE6 + "\n", result.get(1)); + assertEquals(LAST_OPTIONAL_TOKEN_REGEX, result.get(2)); + } + + @Test + public void testNoVar() throws LicenseParserException, LicenseTemplateRuleException { + FilterTemplateOutputHandler filteredOutput = new FilterTemplateOutputHandler(VarTextHandling.OMIT, + OptionalTextHandling.ORIGINAL); + SpdxLicenseTemplateHelper.parseTemplate(TEST_TEMPLATE, filteredOutput); + List result = filteredOutput.getFilteredText(); + assertEquals(5, result.size()); + assertEquals(LINE1 + "\n", result.get(0)); + assertEquals(LINE2, result.get(1)); + assertEquals("\n" + LINE3 + "\n" + LINE4 + "\n", result.get(2)); + assertEquals(" \n" + LINE6 + "\n", result.get(3)); + assertEquals(LAST_LINE, result.get(4)); + } + + @Test + public void testNoVarRegex() throws LicenseParserException, LicenseTemplateRuleException { + FilterTemplateOutputHandler filteredOutput = new FilterTemplateOutputHandler(VarTextHandling.REGEX, + OptionalTextHandling.ORIGINAL); + SpdxLicenseTemplateHelper.parseTemplate(TEST_TEMPLATE, filteredOutput); + List result = filteredOutput.getFilteredText(); + assertEquals(4, result.size()); + assertEquals(LINE1 + "\n", result.get(0)); + assertEquals(LINE2, result.get(1)); + assertEquals("\n" + LINE3 + "\n" + LINE4 + "\n" + VAR_REGEX + " \n" + LINE6 + "\n", result.get(2)); + assertEquals(LAST_LINE, result.get(3)); + } + + @Test + public void testNoOptional() throws LicenseParserException, LicenseTemplateRuleException { + FilterTemplateOutputHandler filteredOutput = new FilterTemplateOutputHandler(VarTextHandling.ORIGINAL, + OptionalTextHandling.OMIT); + SpdxLicenseTemplateHelper.parseTemplate(TEST_TEMPLATE, filteredOutput); + List result = filteredOutput.getFilteredText(); + assertEquals(1, result.size()); + assertEquals("\n" + LINE3 + "\n" + LINE4 + "\n" + VAR_ORIGINAL + " \n" + LINE6 + "\n", result.get(0)); + } + + @Test + public void testGplRegression() throws LicenseParserException, LicenseTemplateRuleException, IOException { + FilterTemplateOutputHandler filteredOutput = new FilterTemplateOutputHandler(VarTextHandling.REGEX, + OptionalTextHandling.REGEX_USING_TOKENS); + SpdxLicenseTemplateHelper.parseTemplate(UnitTestHelper.fileToText(GPL_TEMPLATE_SNIPPET_FILE), filteredOutput); + List result = filteredOutput.getFilteredText(); + assertEquals(2, result.size()); + } + +} diff --git a/src/test/java/org/spdx/utility/compare/LicenseCompareHelperTest.java b/src/test/java/org/spdx/utility/compare/LicenseCompareHelperTest.java index 14241fc92..35bfd6a32 100644 --- a/src/test/java/org/spdx/utility/compare/LicenseCompareHelperTest.java +++ b/src/test/java/org/spdx/utility/compare/LicenseCompareHelperTest.java @@ -89,6 +89,8 @@ public class LicenseCompareHelperTest extends TestCase { static final String BSD_2_CLAUSE_TEMPLATE = "TestFiles" + File.separator + "BSD-2-Clause.template.txt"; static final String EPL_2 = "TestFiles" + File.separator + "EPL-2.0.txt"; static final String EPL_2_TEMPLATE = "TestFiles" + File.separator + "EPL-2.0.template.txt"; + static final String GPL_2_NL = "TestFiles" + File.separator + "GPL-2.0-NL.txt"; + static final String GPL_2_TEMPLATE = "TestFiles" + File.separator + "GPL-2.0-only.template.txt"; /** * @throws java.lang.Exception @@ -1056,4 +1058,17 @@ public void testEpl20ConsistentMatch() throws InvalidSPDXAnalysisException, Spdx assertTrue(LicenseCompareHelper.isStandardLicenseWithinText(licText, lic)); } + public void testGpl20ConsistentMatch() throws InvalidSPDXAnalysisException, SpdxCompareException, IOException { + String licText = UnitTestHelper.fileToText(GPL_2_TEXT); + String templateText = UnitTestHelper.fileToText(GPL_2_TEMPLATE); + SpdxListedLicense lic = new SpdxListedLicense( + new SpdxListedLicense.Builder("GPL-2.0", "GPL-2.0", licText) + .setTemplate(templateText)); + DifferenceDescription diff = LicenseCompareHelper.isTextStandardLicense(lic, licText); + if (diff.isDifferenceFound()) { + fail(diff.getDifferenceMessage()); + } + assertTrue(LicenseCompareHelper.isStandardLicenseWithinText(licText, lic)); + } + } From 3062147c3e4965d693270a0667a163db43de1ed3 Mon Sep 17 00:00:00 2001 From: Gary O'Neall Date: Sat, 9 Dec 2023 14:35:03 -0800 Subject: [PATCH 3/4] Remove commented out code Signed-off-by: Gary O'Neall --- .../java/org/spdx/utility/compare/LicenseCompareHelper.java | 6 ------ 1 file changed, 6 deletions(-) diff --git a/src/main/java/org/spdx/utility/compare/LicenseCompareHelper.java b/src/main/java/org/spdx/utility/compare/LicenseCompareHelper.java index fd369d032..4b9525524 100644 --- a/src/main/java/org/spdx/utility/compare/LicenseCompareHelper.java +++ b/src/main/java/org/spdx/utility/compare/LicenseCompareHelper.java @@ -702,9 +702,6 @@ public static Pair nonOptionalTextToPatterns(List nonO String lastRegex = ""; while (startWordCount < numberOfWords && startTextIndex < nonOptionalText.size()) { String line = nonOptionalText.get(startTextIndex++); -// if (startPatternBuilder.length() > 0 && line.trim().length() > 0 && !startPatternBuilder.toString().endsWith("}")) { -// startPatternBuilder.append(".{0").append(regexLimit); //TODO: Replace this with the optional text match itself - requires redesign -// } String[] regexSplits = line.trim().split(FilterTemplateOutputHandler.REGEX_ESCAPE); boolean inRegex = false; // if it starts with a regex, it will start with a blank line for (String regexSplit:regexSplits) { @@ -769,9 +766,6 @@ public static Pair nonOptionalTextToPatterns(List nonO (endTextIndex == lastProcessedStartLine && (numberOfWords - endWordCount) < (nonOptionalText.get(endTextIndex).length() - wordsInLastLine)))) { // Check to make sure we're not overlapping the start words List nonEmptyTokens = new ArrayList<>(); String line = nonOptionalText.get(endTextIndex); -// if (endTextReversePattern.size() > 0 && line.trim().length() > 0 && !endTextReversePattern.get(endTextReversePattern.size()-1).endsWith("}")) { -// endTextReversePattern.add(".{0,50}"); //TODO: Replace this with the optional text match itself - requires redesign -// } String[] regexSplits = line.trim().split(FilterTemplateOutputHandler.REGEX_ESCAPE); boolean inRegex = false; for (String regexSplit:regexSplits) { From b488e51859f7713dbd15e803e044d83d5181c33b Mon Sep 17 00:00:00 2001 From: Gary O'Neall Date: Sun, 10 Dec 2023 13:49:44 -0800 Subject: [PATCH 4/4] Minor code cleanup Signed-off-by: Gary O'Neall --- .../spdx/utility/compare/FilterTemplateOutputHandler.java | 5 ++--- 1 file changed, 2 insertions(+), 3 deletions(-) diff --git a/src/main/java/org/spdx/utility/compare/FilterTemplateOutputHandler.java b/src/main/java/org/spdx/utility/compare/FilterTemplateOutputHandler.java index fc022789e..8f667d453 100644 --- a/src/main/java/org/spdx/utility/compare/FilterTemplateOutputHandler.java +++ b/src/main/java/org/spdx/utility/compare/FilterTemplateOutputHandler.java @@ -85,7 +85,7 @@ public void text(String text) { currentString.append(text); } else if (OptionalTextHandling.REGEX_USING_TOKENS.equals(optionalTextHandling)) { optionalTokens.get(optionalDepth).addAll(Arrays.asList( - LicenseCompareHelper.tokenizeLicenseText(text, new HashMap()))); + LicenseCompareHelper.tokenizeLicenseText(text, new HashMap<>()))); } } @@ -140,12 +140,11 @@ public void beginOptional(LicenseTemplateRule rule) { } /** - * @param tokens list of tokens * @return regular expression with quoted tokens */ private String toTokenRegex(List tokens) { StringBuilder sb = new StringBuilder(); - for (String token:optionalTokens.get(optionalDepth)) { + for (String token:tokens) { token = token.trim(); if (LicenseCompareHelper.NORMALIZE_TOKENS.containsKey(token.toLowerCase())) { token = LicenseCompareHelper.NORMALIZE_TOKENS.get(token.toLowerCase());