Skip to content

Commit

Permalink
Better isort integration
Browse files Browse the repository at this point in the history
- `--isort` and `--diff` now work together
- diff original unmodified and user-edited versions of files in the
  Git directory using Python's difflib, not by parsing `git diff`
  output
- process each edited file individually
- only run `isort` for edited files
- write back `isort` modifications together with `black`
  modifications, and skip writing if there are errors
- remove code that became unused
- avoid extra conversions between source code as a string and a list
  of line strings
- add some tests for previously untested functions
  • Loading branch information
akaihola committed Jul 1, 2020
1 parent 9adfc0b commit 8a4e13e
Show file tree
Hide file tree
Showing 15 changed files with 564 additions and 747 deletions.
82 changes: 49 additions & 33 deletions src/darker/__main__.py
Original file line number Diff line number Diff line change
Expand Up @@ -4,17 +4,17 @@
import sys
from difflib import unified_diff
from pathlib import Path
from typing import Dict, Iterable, List, Set, Union
from typing import Dict, Iterable, List, Union

from darker.black_diff import diff_and_get_opcodes, opcodes_to_chunks, run_black
from darker.black_diff import run_black
from darker.chooser import choose_lines
from darker.command_line import ISORT_INSTRUCTION, parse_command_line
from darker.git_diff import (
GitDiffParseError,
get_edit_linenums,
git_diff,
git_diff_name_only,
from darker.diff import (
diff_and_get_opcodes,
opcodes_to_chunks,
opcodes_to_edit_linenums,
)
from darker.git import git_diff_name_only, git_get_unmodified_content
from darker.import_sorting import SortImports, apply_isort
from darker.utils import get_common_root, joinlines
from darker.verification import NotEquivalentError, verify_ast_unchanged
Expand All @@ -35,7 +35,7 @@ def format_edited_parts(
"""Black (and optional isort) formatting for chunks with edits since the last commit
1. run isort on each edited file
2. do a ``git diff -U0 <path> ...`` for all file & dir paths on the command line
2. diff HEAD and worktree for all file & dir paths on the command line
3. extract line numbers in each edited to-file for changed lines
4. run black on the contents of each edited to-file
5. get a diff between the edited to-file and the reformatted content
Expand All @@ -55,32 +55,43 @@ def format_edited_parts(
:param print_diff: ``True`` to output diffs instead of modifying source files
"""
remaining_srcs: Set[Path] = set(srcs)
git_root = get_common_root(srcs)
changed_files = git_diff_name_only(srcs, git_root)
head_srcs = {
src: git_get_unmodified_content(src, git_root) for src in changed_files
}
worktree_srcs = {src: (git_root / src).read_text() for src in changed_files}

# 1. run isort
if isort:
if print_diff:
raise NotImplementedError('--isort is not supported with --print-diff')
changed_files = git_diff_name_only(remaining_srcs, git_root)
apply_isort(changed_files)

for context_lines in range(MAX_CONTEXT_LINES + 1):

# 2. do the git diff
logger.debug("Looking at %s", ", ".join(str(s) for s in remaining_srcs))
logger.debug("Git root: %s", git_root)
git_diff_result = git_diff(remaining_srcs, git_root, context_lines)

# 3. extract changed line numbers for each to-file
remaining_srcs = set()
for src_relative, edited_linenums in get_edit_linenums(git_diff_result):
edited_srcs = {
src: apply_isort(edited_content)
for src, edited_content in worktree_srcs.items()
}
else:
edited_srcs = worktree_srcs

for src_relative, edited_content in edited_srcs.items():
for context_lines in range(MAX_CONTEXT_LINES + 1):
src = git_root / src_relative
if not edited_linenums:
continue
edited = edited_content.splitlines()
head_lines = head_srcs[src_relative]

# 2. diff HEAD and worktree for all file & dir paths on the command line
edited_opcodes = diff_and_get_opcodes(head_lines, edited)

# 3. extract line numbers in each edited to-file for changed lines
edited_linenums = list(opcodes_to_edit_linenums(edited_opcodes))
if (
isort
and not edited_linenums
and edited_content == worktree_srcs[src_relative]
):
logger.debug("No changes in %s after isort", src)
break

# 4. run black
edited, formatted = run_black(src, black_args)
formatted = run_black(src, edited_content, black_args)
logger.debug("Read %s lines from edited file %s", len(edited), src)
logger.debug("Black reformat resulted in %s lines", len(formatted))

Expand All @@ -104,7 +115,9 @@ def format_edited_parts(
len(chosen_lines),
)
try:
verify_ast_unchanged(edited, result_str, black_chunks, edited_linenums)
verify_ast_unchanged(
edited_content, result_str, black_chunks, edited_linenums
)
except NotEquivalentError:
# Diff produced misaligned chunks which couldn't be reconstructed into
# a partially re-formatted Python file which produces an identical AST.
Expand All @@ -117,15 +130,18 @@ def format_edited_parts(
"Trying again with %s lines of context for `git diff -U`",
context_lines + 1,
)
remaining_srcs.add(src)
continue
else:
# 10. A re-formatted Python file which produces an identical AST was
# created successfully - write an updated file
logger.info("Writing %s bytes into %s", len(result_str), src)
# or print the diff
if print_diff:
difflines = list(
unified_diff(
edited, chosen_lines, src.as_posix(), src.as_posix(),
worktree_srcs[src_relative].splitlines(),
chosen_lines,
src.as_posix(),
src.as_posix(),
)
)
if len(difflines) > 2:
Expand All @@ -134,9 +150,9 @@ def format_edited_parts(
print(h2, end="")
print("\n".join(rest))
else:
logger.info("Writing %s bytes into %s", len(result_str), src)
src.write_text(result_str)
if not remaining_srcs:
break
break


def main(argv: List[str] = None) -> None:
Expand Down
136 changes: 22 additions & 114 deletions src/darker/black_diff.py
Original file line number Diff line number Diff line change
@@ -1,82 +1,41 @@
"""Turn Python code into chunks of original and re-formatted code
The functions in this module implement three steps
for converting a file with Python source code into a list of chunks.
From these chunks, the same file can be reconstructed
while choosing whether each chunk should be taken from the original untouched file
or from the version reformatted with Black.
"""Re-format Python source code using Black
In examples below, a simple two-line snippet is used.
The first line will be reformatted by Black, and the second left intact::
>>> from unittest.mock import Mock
>>> src = Mock()
>>> src.read_text.return_value = '''\\
>>> src_content = '''\\
... for i in range(5): print(i)
... print("done")
... '''
First, :func:`run_black` uses Black to reformat the contents of a given file.
Original and reformatted lines are returned e.g.::
Reformatted lines are returned e.g.::
>>> src_lines, dst_lines = run_black(src)
>>> src_lines
['for i in range(5): print(i)',
'print("done")']
>>> dst_lines = run_black(src, src_content, black_args={})
>>> dst_lines
['for i in range(5):',
' print(i)',
'print("done")']
The output of :func:`run_black` should then be fed into :func:`diff_and_get_opcodes`.
It divides a diff between the original and reformatted content
into alternating chunks of
intact (represented by the 'equal' tag) and
modified ('delete', 'replace' or 'insert' tag) lines.
Each chunk is an opcode represented by the tag and the corresponding 0-based line ranges
in the original and reformatted content, e.g.::
>>> opcodes = diff_and_get_opcodes(src_lines, dst_lines)
>>> len(opcodes)
2
>>> opcodes[0] # split 'for' loop into two lines
('replace', 0, 1, 0, 2)
>>> opcodes[1] # keep 'print("done")' as such
('equal', 1, 2, 2, 3)
Finally, :func:`opcodes_to_chunks` picks the lines
from original and reformatted content for each opcode.
It combines line content with the 1-based line offset in the original content, e.g.::
>>> chunks = list(opcodes_to_chunks(opcodes, src_lines, dst_lines))
>>> len(chunks)
2
>>> chunks[0] # (<offset in orig content>, <original lines>, <reformatted lines>)
(1,
['for i in range(5): print(i)'],
['for i in range(5):',
' print(i)'])
>>> chunks[1]
(2,
['print("done")'],
['print("done")'])
By concatenating the second items in these tuples, i.e. original lines,
the original file can be reconstructed.
By concatenating the third items, i.e. reformatted lines,
the complete output from Black can be reconstructed.
By concatenating and choosing either the second or third item,
a mixed result with only selected regions reformatted can be reconstructed.
See :mod:`darker.diff` and :mod:`darker.chooser`
for how this result is further processed with:
- :func:`~darker.diff.diff_and_get_opcodes`
to get a diff of the reformatting
- :func:`~darker.diff.opcodes_to_chunks`
to split the diff into chunks of original and reformatted content
- :func:`~darker.chooser.choose_lines`
to reconstruct the source code from original and reformatted chunks
based on whether reformats touch user-edited lines
"""

import logging
from difflib import SequenceMatcher
from functools import lru_cache
from pathlib import Path
from typing import Dict, Generator, List, Optional, Tuple, Union
from typing import Dict, List, Optional, Tuple, Union

from black import FileMode, format_str, read_pyproject_toml
from click import Command, Context, Option
Expand Down Expand Up @@ -104,12 +63,14 @@ def read_black_config(src: Path, value: Optional[str]) -> Dict[str, Union[bool,


def run_black(
src: Path, black_args: Dict[str, Union[bool, int]]
) -> Tuple[List[str], List[str]]:
"""Run the black formatter for the contents of the given Python file
src: Path, src_contents: str, black_args: Dict[str, Union[bool, int]]
) -> List[str]:
"""Run the black formatter for the Python source code given as a string
Return lines of the original file as well as the formatted content.
:param src: The originating file path for the source code
:param src_contents: The source code as a string
:param black_args: Command-line arguments to send to ``black.FileMode``
"""
Expand All @@ -133,59 +94,6 @@ def run_black(
# from the command line arguments
mode = FileMode(**effective_args)

src_contents = src.read_text()
dst_contents = format_str(src_contents, mode=mode)
return src_contents.splitlines(), dst_contents.splitlines()


def diff_and_get_opcodes(
src_lines: List[str], dst_lines: List[str]
) -> List[Tuple[str, int, int, int, int]]:
"""Return opcodes and line numbers for chunks in the diff of two lists of strings
The opcodes are 5-tuples for each chunk with
- the tag of the operation ('equal', 'delete', 'replace' or 'insert')
- the number of the first line in the chunk in the from-file
- the number of the last line in the chunk in the from-file
- the number of the first line in the chunk in the to-file
- the number of the last line in the chunk in the to-file
Line numbers are zero based.
"""
matcher = SequenceMatcher(None, src_lines, dst_lines, autojunk=False)
opcodes = matcher.get_opcodes()
logger.debug(
"Diff between edited and reformatted has %s opcode%s",
len(opcodes),
"s" if len(opcodes) > 1 else "",
)
return opcodes


def opcodes_to_chunks(
opcodes: List[Tuple[str, int, int, int, int]],
src_lines: List[str],
dst_lines: List[str],
) -> Generator[Tuple[int, List[str], List[str]], None, None]:
"""Convert each diff opcode to a line number and original plus modified lines
Each chunk is a 3-tuple with
- the 1-based number of the first line in the chunk in the from-file
- the original lines of the chunk in the from-file
- the modified lines of the chunk in the to-file
Based on this, the patch can be constructed by choosing either original or modified
lines for each chunk and concatenating them together.
"""
# Make sure every other opcode is an 'equal' tag
assert all(
(tag1 == "equal") != (tag2 == "equal")
for (tag1, _, _, _, _), (tag2, _, _, _, _) in zip(opcodes[:-1], opcodes[1:])
), opcodes

for tag, i1, i2, j1, j2 in opcodes:
yield i1 + 1, src_lines[i1:i2], dst_lines[j1:j2]
dst_lines: List[str] = dst_contents.splitlines()
return dst_lines
Loading

0 comments on commit 8a4e13e

Please sign in to comment.