diff --git a/coverage/config.py b/coverage/config.py index 5468ca490..bf8dbbf6f 100644 --- a/coverage/config.py +++ b/coverage/config.py @@ -250,6 +250,7 @@ def __init__(self) -> None: # Defaults for [lcov] self.lcov_output = "coverage.lcov" + self.lcov_checksums = "off" # Defaults for [paths] self.paths: dict[str, list[str]] = {} @@ -428,6 +429,7 @@ def copy(self) -> CoverageConfig: # [lcov] ("lcov_output", "lcov:output"), + ("lcov_checksums", "lcov:checksums") ] def _set_attr_from_config_option( diff --git a/coverage/lcovreport.py b/coverage/lcovreport.py index 2c8c7ca72..168e2db88 100644 --- a/coverage/lcovreport.py +++ b/coverage/lcovreport.py @@ -22,13 +22,28 @@ def line_hash(line: str) -> str: """Produce a hash of a source line for use in the LCOV file.""" - # The LCOV file format requires MD5 as a fingerprint of the file. This is - # not a security use. Some security scanners raise alarms about the use of - # MD5 here, but it is a false positive. This is not a security concern. + # The LCOV file format optionally allows each line to be MD5ed as a + # fingerprint of the file. This is not a security use. Some security + # scanners raise alarms about the use of MD5 here, but it is a false + # positive. This is not a security concern. + # The unusual encoding of the MD5 hash, as a base64 sequence with the + # trailing = signs stripped, is specified by the LCOV file format. hashed = hashlib.md5(line.encode("utf-8")).digest() return base64.b64encode(hashed).decode("ascii").rstrip("=") +def file_hash(file: str) -> str: + """Produce a hash of an entire source file for use in the LCOV file.""" + # The LCOV file format optionally allows each entire file to be + # fingerprinted, using a hash algorithm and format of the generator's + # choice. We use sha256 (unlike line hashes), with the result written out + # in base64 with trailing = signs stripped (like line hashes). See the + # documentation of the 'checksums' option for how to tell the LCOV tools + # to check these hashes. + hashed = hashlib.sha256(file.encode("utf-8")).digest() + return base64.b64encode(hashed).decode("ascii").rstrip("=") + + class LcovReporter: """A reporter for writing LCOV coverage reports.""" @@ -36,8 +51,14 @@ class LcovReporter: def __init__(self, coverage: Coverage) -> None: self.coverage = coverage + self.config = coverage.config + self.checksum_mode = self.config.lcov_checksums.lower().strip() self.total = Numbers(self.coverage.config.precision) + if self.checksum_mode not in ("file", "line", "off"): + raise ValueError(f"invalid configuration, checksums = {self.checksum_mode!r}" + " not understood") + def report(self, morfs: Iterable[TMorf] | None, outfile: IO[str]) -> float: """Renders the full lcov report. @@ -49,85 +70,90 @@ def report(self, morfs: Iterable[TMorf] | None, outfile: IO[str]) -> float: self.coverage.get_data() outfile = outfile or sys.stdout - for fr, analysis in get_analysis_to_report(self.coverage, morfs): - self.get_lcov(fr, analysis, outfile) + # ensure file records are sorted by the _relative_ filename, not the full path + to_report = [(fr.relative_filename(), fr, analysis) + for fr, analysis in get_analysis_to_report(self.coverage, morfs)] + to_report.sort() + + for fname, fr, analysis in to_report: + self.total += analysis.numbers + self.lcov_file(fname, fr, analysis, outfile) return self.total.n_statements and self.total.pc_covered - def get_lcov(self, fr: FileReporter, analysis: Analysis, outfile: IO[str]) -> None: + def lcov_file(self, rel_fname: str, + fr: FileReporter, analysis: Analysis, + outfile: IO[str]) -> None: """Produces the lcov data for a single file. This currently supports both line and branch coverage, however function coverage is not supported. """ - self.total += analysis.numbers - - outfile.write("TN:\n") - outfile.write(f"SF:{fr.relative_filename()}\n") - source_lines = fr.source().splitlines() - for covered in sorted(analysis.executed): - if covered in analysis.excluded: - # Do not report excluded as executed - continue - # Note: Coverage.py currently only supports checking *if* a line - # has been executed, not how many times, so we set this to 1 for - # nice output even if it's technically incorrect. - - # The lines below calculate a 64-bit encoded md5 hash of the line - # corresponding to the DA lines in the lcov file, for either case - # of the line being covered or missed in coverage.py. The final two - # characters of the encoding ("==") are removed from the hash to - # allow genhtml to run on the resulting lcov file. - if source_lines: - if covered-1 >= len(source_lines): - break - line = source_lines[covered-1] - else: - line = "" - outfile.write(f"DA:{covered},1,{line_hash(line)}\n") - - for missed in sorted(analysis.missing): - # We don't have to skip excluded lines here, because `missing` - # already doesn't have them. - assert source_lines - line = source_lines[missed-1] - outfile.write(f"DA:{missed},0,{line_hash(line)}\n") - - outfile.write(f"LF:{analysis.numbers.n_statements}\n") - outfile.write(f"LH:{analysis.numbers.n_executed}\n") - - # More information dense branch coverage data. - missing_arcs = analysis.missing_branch_arcs() - executed_arcs = analysis.executed_branch_arcs() - for block_number, block_line_number in enumerate( - sorted(analysis.branch_stats().keys()), - ): - for branch_number, line_number in enumerate( - sorted(missing_arcs[block_line_number]), - ): - # The exit branches have a negative line number, - # this will not produce valid lcov. Setting - # the line number of the exit branch to 0 will allow - # for valid lcov, while preserving the data. - line_number = max(line_number, 0) - outfile.write(f"BRDA:{line_number},{block_number},{branch_number},-\n") - - # The start value below allows for the block number to be - # preserved between these two for loops (stopping the loop from - # resetting the value of the block number to 0). - for branch_number, line_number in enumerate( - sorted(executed_arcs[block_line_number]), - start=len(missing_arcs[block_line_number]), - ): - line_number = max(line_number, 0) - outfile.write(f"BRDA:{line_number},{block_number},{branch_number},1\n") - - # Summary of the branch coverage. + if analysis.numbers.n_statements == 0: + if self.config.skip_empty: + return + + outfile.write(f"SF:{rel_fname}\n") + + source_lines = None + if self.checksum_mode == "line": + source_lines = fr.source().splitlines() + elif self.checksum_mode == "file": + outfile.write(f"VER:{file_hash(fr.source())}\n") + + # Emit a DA: record for each line of the file. + lines = sorted(analysis.statements) + hash_suffix = "" + for line in lines: + if self.checksum_mode == "line": + hash_suffix = "," + line_hash(source_lines[line-1]) + # Q: can we get info about the number of times a statement is + # executed? If so, that should be recorded here. + hit = int(line not in analysis.missing) + outfile.write(f"DA:{line},{hit}{hash_suffix}\n") + + if analysis.numbers.n_statements > 0: + outfile.write(f"LF:{analysis.numbers.n_statements}\n") + outfile.write(f"LH:{analysis.numbers.n_executed}\n") + + # More information dense branch coverage data, if available. if analysis.has_arcs: branch_stats = analysis.branch_stats() + executed_arcs = analysis.executed_branch_arcs() + missing_arcs = analysis.missing_branch_arcs() + + for line in lines: + if line in branch_stats: + # In our data, exit branches have negative destination line numbers. + # The lcov tools will reject these - but the lcov tools consider the + # destinations of branches to be opaque tokens. Use the absolute + # value of the destination line number as the destination block + # number, and its sign as the destination branch number. This will + # ensure destinations are unique and stable, source line numbers are + # always positive, and destination block and branch numbers are always + # nonnegative, which are the properties we need. + + # The data we have does not permit us to identify branches that were + # never *reached*, which is what "-" in the hit column means. Such + # branches aren't in either executed_arcs or missing_arcs - we don't + # even know they exist. + + # Q: can we get counts of the number of times each arc was executed? + # branch_stats has "total" and "taken" counts but it doesn't have + # "taken" broken down by destination. + arcs = [] + arcs.extend((abs(l), int(l <= 0), 1) for l in executed_arcs[line]) + arcs.extend((abs(l), int(l <= 0), 0) for l in missing_arcs[line]) + arcs.sort() + + for block, branch, hit in arcs: + outfile.write(f"BRDA:{line},{block},{branch},{hit}\n") + + # Summary of the branch coverage. brf = sum(t for t, k in branch_stats.values()) brh = brf - sum(t - k for t, k in branch_stats.values()) - outfile.write(f"BRF:{brf}\n") - outfile.write(f"BRH:{brh}\n") + if brf > 0: + outfile.write(f"BRF:{brf}\n") + outfile.write(f"BRH:{brh}\n") outfile.write("end_of_record\n") diff --git a/doc/config.rst b/doc/config.rst index 94b077d4d..45f296aed 100644 --- a/doc/config.rst +++ b/doc/config.rst @@ -890,3 +890,57 @@ Settings particular to LCOV reporting (see :ref:`cmd_lcov`). ............. (string, default "coverage.lcov") Where to write the LCOV file. + +[lcov] checksums +................ + +(one of "off", "file", or "line"; default "off") What kind of checksums to +write as part of the LCOV file. The default is to not write any checksums. + +"line" gives the historical behavior, in which an MD5 checksum is computed of +each line *containing code subject to analysis*, separately. Because the +checksums do not cover the entire file and do not verify the ordering of +lines, this mode provides only a weak assurance that the source code +available to an analysis tool (e.g. ``genhtml``) matches the code that was +used to generate the coverage data. It also produces larger reports +than either of the other two modes. + +"file" computes an SHA-256 hash of each file, as a whole; this gives a +stronger assurance that the source code has not changed. To validate +the hashes emitted by "file" mode, use the following script as the +``--version-script`` when running ``genhtml`` etc.:: + + #! /usr/bin/env python3 + import argparse + import base64 + import hashlib + import sys + + def main(): + ap = argparse.ArgumentParser() + ap.add_argument("--compare", action="store_true") + ap.add_argument("source_file_name") + ap.add_argument("source_file_id", nargs="?", default=None) + ap.add_argument("info_file_id", nargs="?", default=None) + args = ap.parse_args() + + if args.compare: + if args.source_file_id is None or info_file_id is None: + ap.error("--compare mode requires source_file_id and info_file_id") + sys.exit(0 if args.source_file_id == args.info_file_id else 1) + else: + if args.source_file_id is not None or info_file_id is not None: + ap.error("determine mode does not use source_file_id and info_file_id") + with sys.stdout as ofp, \ + open(args.source_file_name, "rb") as ifp: + digest = hashlib.sha256(ifp.read()).digest() + file_id = base64.b64encode(digest).decode("ascii").rstrip("=") + ofp.write(file_id + "\n") + sys.exit(0) + + main() + +Note that for either "file" or "line" mode to work correctly, all of your +source files must be encoded using UTF-8. + +.. versionadded:: 7.6.2 diff --git a/tests/test_lcov.py b/tests/test_lcov.py index b2201ba8a..c062881b0 100644 --- a/tests/test_lcov.py +++ b/tests/test_lcov.py @@ -58,21 +58,75 @@ def IsItTrue(): return True """) expected_result = textwrap.dedent("""\ - TN: + SF:main_file.py + DA:1,1 + DA:2,0 + DA:4,1 + DA:5,0 + LF:4 + LH:2 + end_of_record + """) + self.assert_doesnt_exist(".coverage") + cov = coverage.Coverage(source=["."]) + self.start_import_stop(cov, "main_file") + pct = cov.lcov_report() + assert pct == 50.0 + actual_result = self.get_lcov_report_content() + assert expected_result == actual_result + + def test_line_hashing(self) -> None: + self.make_file("main_file.py", """\ + def cuboid_volume(l): + return (l*l*l) + + def IsItTrue(): + return True + """) + self.make_file(".coveragerc", "[lcov]\nchecksums = line\n") + self.assert_doesnt_exist(".coverage") + cov = coverage.Coverage(source=["."]) + self.start_import_stop(cov, "main_file") + pct = cov.lcov_report() + assert pct == 50.0 + expected_result = textwrap.dedent("""\ SF:main_file.py DA:1,1,7URou3io0zReBkk69lEb/Q - DA:4,1,ilhb4KUfytxtEuClijZPlQ DA:2,0,Xqj6H1iz/nsARMCAbE90ng + DA:4,1,ilhb4KUfytxtEuClijZPlQ DA:5,0,LWILTcvARcydjFFyo9qM0A LF:4 LH:2 end_of_record """) + actual_result = self.get_lcov_report_content() + assert expected_result == actual_result + + def test_file_hashing(self) -> None: + self.make_file("main_file.py", """\ + def cuboid_volume(l): + return (l*l*l) + + def IsItTrue(): + return True + """) + self.make_file(".coveragerc", "[lcov]\nchecksums = file\n") self.assert_doesnt_exist(".coverage") cov = coverage.Coverage(source=["."]) self.start_import_stop(cov, "main_file") pct = cov.lcov_report() assert pct == 50.0 + expected_result = textwrap.dedent("""\ + SF:main_file.py + VER:FjGZ0lkufNMCxmG+BA8yvoaqg9xdUOVKi5kpRpUs3c0 + DA:1,1 + DA:2,0 + DA:4,1 + DA:5,0 + LF:4 + LH:2 + end_of_record + """) actual_result = self.get_lcov_report_content() assert expected_result == actual_result @@ -88,25 +142,23 @@ def test_simple_line_coverage_two_files(self) -> None: assert pct == 50.0 self.assert_exists("data.lcov") expected_result = textwrap.dedent("""\ - TN: SF:main_file.py - DA:1,1,7URou3io0zReBkk69lEb/Q - DA:4,1,ilhb4KUfytxtEuClijZPlQ - DA:2,0,Xqj6H1iz/nsARMCAbE90ng - DA:5,0,LWILTcvARcydjFFyo9qM0A + DA:1,1 + DA:2,0 + DA:4,1 + DA:5,0 LF:4 LH:2 end_of_record - TN: SF:test_file.py - DA:1,1,R5Rb4IzmjKRgY/vFFc1TRg - DA:2,1,E/tvV9JPVDhEcTCkgrwOFw - DA:4,1,GP08LPBYJq8EzYveHJy2qA - DA:5,1,MV+jSLi6PFEl+WatEAptog - DA:6,0,qyqd1mF289dg6oQAQHA+gQ - DA:7,0,nmEYd5F1KrxemgC9iVjlqg - DA:8,0,jodMK26WYDizOO1C7ekBbg - DA:9,0,LtxfKehkX8o4KvC5GnN52g + DA:1,1 + DA:2,1 + DA:4,1 + DA:5,1 + DA:6,0 + DA:7,0 + DA:8,0 + DA:9,0 LF:8 LH:4 end_of_record @@ -130,16 +182,15 @@ def is_it_x(x): assert math.isclose(pct, 16.666666666666668) self.assert_exists("coverage.lcov") expected_result = textwrap.dedent("""\ - TN: SF:main_file.py - DA:1,1,4MDXMbvwQ3L7va1tsphVzw - DA:2,0,MuERA6EYyZNpKPqoJfzwkA - DA:3,0,sAyiiE6iAuPMte9kyd0+3g - DA:5,0,W/g8GJDAYJkSSurt59Mzfw + DA:1,1 + DA:2,0 + DA:3,0 + DA:5,0 LF:4 LH:1 - BRDA:3,0,0,- - BRDA:5,0,1,- + BRDA:2,3,0,0 + BRDA:2,5,0,0 BRF:2 BRH:0 end_of_record @@ -174,31 +225,27 @@ def test_is_it_x(self): assert math.isclose(pct, 41.666666666666664) self.assert_exists("coverage.lcov") expected_result = textwrap.dedent("""\ - TN: SF:main_file.py - DA:1,1,4MDXMbvwQ3L7va1tsphVzw - DA:2,0,MuERA6EYyZNpKPqoJfzwkA - DA:3,0,sAyiiE6iAuPMte9kyd0+3g - DA:5,0,W/g8GJDAYJkSSurt59Mzfw + DA:1,1 + DA:2,0 + DA:3,0 + DA:5,0 LF:4 LH:1 - BRDA:3,0,0,- - BRDA:5,0,1,- + BRDA:2,3,0,0 + BRDA:2,5,0,0 BRF:2 BRH:0 end_of_record - TN: SF:test_file.py - DA:1,1,9TxKIyoBtmhopmlbDNa8FQ - DA:2,1,E/tvV9JPVDhEcTCkgrwOFw - DA:4,1,C3s/c8C1Yd/zoNG1GnGexg - DA:5,1,9qPyWexYysgeKtB+YvuzAg - DA:6,0,LycuNcdqoUhPXeuXUTf5lA - DA:7,0,FPTWzd68bDx76HN7VHu1wA + DA:1,1 + DA:2,1 + DA:4,1 + DA:5,1 + DA:6,0 + DA:7,0 LF:6 LH:4 - BRF:0 - BRH:0 end_of_record """) actual_result = self.get_lcov_report_content() @@ -222,16 +269,15 @@ def test_half_covered_branch(self) -> None: assert math.isclose(pct, 66.66666666666667) self.assert_exists("coverage.lcov") expected_result = textwrap.dedent("""\ - TN: SF:main_file.py - DA:1,1,N4kbVOlkNI1rqOfCArBClw - DA:3,1,CmlqqPf0/H+R/p7/PLEXZw - DA:4,1,rE3mWnpoMq2W2sMETVk/uQ - DA:6,0,+Aov7ekIts7C96udNDVIIQ + DA:1,1 + DA:3,1 + DA:4,1 + DA:6,0 LF:4 LH:3 - BRDA:6,0,0,- - BRDA:4,0,1,1 + BRDA:3,4,0,1 + BRDA:3,6,0,0 BRF:2 BRH:1 end_of_record @@ -242,8 +288,7 @@ def test_half_covered_branch(self) -> None: def test_empty_init_files(self) -> None: # Test that in the case of an empty __init__.py file, the lcov # reporter will note that the file is there, and will note the empty - # line. It will also note the lack of branches, and the checksum for - # the line. + # line. It will also note the lack of branches. # # Although there are no lines found, it will note one line as hit in # old Pythons, and no lines hit in newer Pythons. @@ -258,23 +303,37 @@ def test_empty_init_files(self) -> None: # Newer Pythons have truly empty empty files. if env.PYBEHAVIOR.empty_is_empty: expected_result = textwrap.dedent("""\ - TN: SF:__init__.py - LF:0 - LH:0 - BRF:0 - BRH:0 end_of_record """) else: expected_result = textwrap.dedent("""\ - TN: SF:__init__.py - DA:1,1,1B2M2Y8AsgTpgAmY7PhCfg - LF:0 - LH:0 - BRF:0 - BRH:0 + DA:1,1 + end_of_record + """) + actual_result = self.get_lcov_report_content() + assert expected_result == actual_result + + def test_empty_init_file_skipped(self) -> None: + # Test that the lcov reporter honors skip_empty, when this + # is possible (see test_empty_init_files for when it isn't). + + self.make_file("__init__.py", "") + self.make_file(".coveragerc", "[report]\nskip_empty = True\n") + self.assert_doesnt_exist(".coverage") + cov = coverage.Coverage(branch=True, source=".") + self.start_import_stop(cov, "__init__") + pct = cov.lcov_report() + assert pct == 0.0 + self.assert_exists("coverage.lcov") + # Newer Pythons have truly empty empty files. + if env.PYBEHAVIOR.empty_is_empty: + expected_result = "" + else: + expected_result = textwrap.dedent("""\ + SF:__init__.py + DA:1,1 end_of_record """) actual_result = self.get_lcov_report_content() @@ -299,16 +358,15 @@ def test_excluded_lines(self) -> None: self.start_import_stop(cov, "runme") cov.lcov_report() expected_result = textwrap.dedent("""\ - TN: SF:runme.py - DA:1,1,nWfwsz0pRTEJrInVF+xNvQ - DA:3,1,uV4NoIauDo5LCti6agX9sg - DA:6,1,+PfQRgSChjQOGkA6MArMDg - DA:4,0,GR4ThLStnqpcZvm3alfRaA + DA:1,1 + DA:3,1 + DA:4,0 + DA:6,1 LF:4 LH:3 - BRDA:4,0,0,- - BRDA:6,0,1,1 + BRDA:3,4,0,0 + BRDA:3,6,0,1 BRF:2 BRH:1 end_of_record diff --git a/tests/test_report_common.py b/tests/test_report_common.py index 2f0b913b6..20c54e323 100644 --- a/tests/test_report_common.py +++ b/tests/test_report_common.py @@ -268,11 +268,10 @@ def test_lcov(self) -> None: with open("coverage.lcov") as lcov: actual = lcov.read() expected = textwrap.dedent("""\ - TN: SF:good.j2 - DA:1,1,FHs1rDakj9p/NAzMCu3Kgw - DA:3,1,DGOyp8LEgI+3CcdFYw9uKQ - DA:2,0,5iUbzxp9w7peeTPjJbvmBQ + DA:1,1 + DA:2,0 + DA:3,1 LF:3 LH:2 end_of_record