Better isort integration

- `--isort` and `--diff` now work together - diff original unmodified and user-edited versions of files in the Git directory using Python's difflib, not by parsing `git diff` output - process each edited file individually - only run `isort` for edited files - write back `isort` modifications together with `black` modifications, and skip writing if there are errors - remove code that became unused - avoid extra conversions between source code as a string and a list of line strings - add some tests for previously untested functions
akaihola · Jul 1, 2020 · 8a4e13e · 8a4e13e
1 parent 9adfc0b
commit 8a4e13e
Show file tree

Hide file tree

Showing 15 changed files with 564 additions and 747 deletions.
diff --git a/src/darker/__main__.py b/src/darker/__main__.py
@@ -4,17 +4,17 @@
 import sys
 from difflib import unified_diff
 from pathlib import Path
-from typing import Dict, Iterable, List, Set, Union
+from typing import Dict, Iterable, List, Union
 
-from darker.black_diff import diff_and_get_opcodes, opcodes_to_chunks, run_black
+from darker.black_diff import run_black
 from darker.chooser import choose_lines
 from darker.command_line import ISORT_INSTRUCTION, parse_command_line
-from darker.git_diff import (
-    GitDiffParseError,
-    get_edit_linenums,
-    git_diff,
-    git_diff_name_only,
+from darker.diff import (
+    diff_and_get_opcodes,
+    opcodes_to_chunks,
+    opcodes_to_edit_linenums,
 )
+from darker.git import git_diff_name_only, git_get_unmodified_content
 from darker.import_sorting import SortImports, apply_isort
 from darker.utils import get_common_root, joinlines
 from darker.verification import NotEquivalentError, verify_ast_unchanged
@@ -35,7 +35,7 @@ def format_edited_parts(
     """Black (and optional isort) formatting for chunks with edits since the last commit
 
     1. run isort on each edited file
-    2. do a ``git diff -U0 <path> ...`` for all file & dir paths on the command line
+    2. diff HEAD and worktree for all file & dir paths on the command line
     3. extract line numbers in each edited to-file for changed lines
     4. run black on the contents of each edited to-file
     5. get a diff between the edited to-file and the reformatted content
@@ -55,32 +55,43 @@ def format_edited_parts(
     :param print_diff: ``True`` to output diffs instead of modifying source files
 
     """
-    remaining_srcs: Set[Path] = set(srcs)
     git_root = get_common_root(srcs)
+    changed_files = git_diff_name_only(srcs, git_root)
+    head_srcs = {
+        src: git_get_unmodified_content(src, git_root) for src in changed_files
+    }
+    worktree_srcs = {src: (git_root / src).read_text() for src in changed_files}
 
     # 1. run isort
     if isort:
-        if print_diff:
-            raise NotImplementedError('--isort is not supported with --print-diff')
-        changed_files = git_diff_name_only(remaining_srcs, git_root)
-        apply_isort(changed_files)
-
-    for context_lines in range(MAX_CONTEXT_LINES + 1):
-
-        # 2. do the git diff
-        logger.debug("Looking at %s", ", ".join(str(s) for s in remaining_srcs))
-        logger.debug("Git root: %s", git_root)
-        git_diff_result = git_diff(remaining_srcs, git_root, context_lines)
-
-        # 3. extract changed line numbers for each to-file
-        remaining_srcs = set()
-        for src_relative, edited_linenums in get_edit_linenums(git_diff_result):
+        edited_srcs = {
+            src: apply_isort(edited_content)
+            for src, edited_content in worktree_srcs.items()
+        }
+    else:
+        edited_srcs = worktree_srcs
+
+    for src_relative, edited_content in edited_srcs.items():
+        for context_lines in range(MAX_CONTEXT_LINES + 1):
             src = git_root / src_relative
-            if not edited_linenums:
-                continue
+            edited = edited_content.splitlines()
+            head_lines = head_srcs[src_relative]
+
+            # 2. diff HEAD and worktree for all file & dir paths on the command line
+            edited_opcodes = diff_and_get_opcodes(head_lines, edited)
+
+            # 3. extract line numbers in each edited to-file for changed lines
+            edited_linenums = list(opcodes_to_edit_linenums(edited_opcodes))
+            if (
+                isort
+                and not edited_linenums
+                and edited_content == worktree_srcs[src_relative]
+            ):
+                logger.debug("No changes in %s after isort", src)
+                break
 
             # 4. run black
-            edited, formatted = run_black(src, black_args)
+            formatted = run_black(src, edited_content, black_args)
             logger.debug("Read %s lines from edited file %s", len(edited), src)
             logger.debug("Black reformat resulted in %s lines", len(formatted))
 
@@ -104,7 +115,9 @@ def format_edited_parts(
                 len(chosen_lines),
             )
             try:
-                verify_ast_unchanged(edited, result_str, black_chunks, edited_linenums)
+                verify_ast_unchanged(
+                    edited_content, result_str, black_chunks, edited_linenums
+                )
             except NotEquivalentError:
                 # Diff produced misaligned chunks which couldn't be reconstructed into
                 # a partially re-formatted Python file which produces an identical AST.
@@ -117,15 +130,18 @@ def format_edited_parts(
                     "Trying again with %s lines of context for `git diff -U`",
                     context_lines + 1,
                 )
-                remaining_srcs.add(src)
+                continue
             else:
                 # 10. A re-formatted Python file which produces an identical AST was
                 #     created successfully - write an updated file
-                logger.info("Writing %s bytes into %s", len(result_str), src)
+                #     or print the diff
                 if print_diff:
                     difflines = list(
                         unified_diff(
-                            edited, chosen_lines, src.as_posix(), src.as_posix(),
+                            worktree_srcs[src_relative].splitlines(),
+                            chosen_lines,
+                            src.as_posix(),
+                            src.as_posix(),
                         )
                     )
                     if len(difflines) > 2:
@@ -134,9 +150,9 @@ def format_edited_parts(
                         print(h2, end="")
                         print("\n".join(rest))
                 else:
+                    logger.info("Writing %s bytes into %s", len(result_str), src)
                     src.write_text(result_str)
-        if not remaining_srcs:
-            break
+                break
 
 
 def main(argv: List[str] = None) -> None:

diff --git a/src/darker/black_diff.py b/src/darker/black_diff.py
@@ -1,82 +1,41 @@
-"""Turn Python code into chunks of original and re-formatted code
-
-The functions in this module implement three steps
-for converting a file with Python source code into a list of chunks.
-From these chunks, the same file can be reconstructed
-while choosing whether each chunk should be taken from the original untouched file
-or from the version reformatted with Black.
+"""Re-format Python source code using Black
 
 In examples below, a simple two-line snippet is used.
 The first line will be reformatted by Black, and the second left intact::
 
     >>> from unittest.mock import Mock
     >>> src = Mock()
-    >>> src.read_text.return_value = '''\\
+    >>> src_content = '''\\
     ... for i in range(5): print(i)
     ... print("done")
     ... '''
 
 First, :func:`run_black` uses Black to reformat the contents of a given file.
-Original and reformatted lines are returned e.g.::
+Reformatted lines are returned e.g.::
 
-    >>> src_lines, dst_lines = run_black(src)
-    >>> src_lines
-    ['for i in range(5): print(i)',
-     'print("done")']
+    >>> dst_lines = run_black(src, src_content, black_args={})
     >>> dst_lines
     ['for i in range(5):',
      '    print(i)',
      'print("done")']
 
-The output of :func:`run_black` should then be fed into :func:`diff_and_get_opcodes`.
-It divides a diff between the original and reformatted content
-into alternating chunks of
-intact (represented by the 'equal' tag) and
-modified ('delete', 'replace' or 'insert' tag) lines.
-Each chunk is an opcode represented by the tag and the corresponding 0-based line ranges
-in the original and reformatted content, e.g.::
-
-    >>> opcodes = diff_and_get_opcodes(src_lines, dst_lines)
-    >>> len(opcodes)
-    2
-    >>> opcodes[0]  # split 'for' loop into two lines
-    ('replace', 0, 1, 0, 2)
-    >>> opcodes[1]  # keep 'print("done")' as such
-    ('equal', 1, 2, 2, 3)
-
-Finally, :func:`opcodes_to_chunks` picks the lines
-from original and reformatted content for each opcode.
-It combines line content with the 1-based line offset in the original content, e.g.::
-
-    >>> chunks = list(opcodes_to_chunks(opcodes, src_lines, dst_lines))
-    >>> len(chunks)
-    2
-    >>> chunks[0]  # (<offset in orig content>, <original lines>, <reformatted lines>)
-    (1,
-     ['for i in range(5): print(i)'],
-     ['for i in range(5):',
-      '    print(i)'])
-    >>> chunks[1]
-    (2,
-     ['print("done")'],
-     ['print("done")'])
-
-By concatenating the second items in these tuples, i.e. original lines,
-the original file can be reconstructed.
-
-By concatenating the third items, i.e. reformatted lines,
-the complete output from Black can be reconstructed.
-
-By concatenating and choosing either the second or third item,
-a mixed result with only selected regions reformatted can be reconstructed.
+See :mod:`darker.diff` and :mod:`darker.chooser`
+for how this result is further processed with:
+
+- :func:`~darker.diff.diff_and_get_opcodes`
+  to get a diff of the reformatting
+- :func:`~darker.diff.opcodes_to_chunks`
+  to split the diff into chunks of original and reformatted content
+- :func:`~darker.chooser.choose_lines`
+  to reconstruct the source code from original and reformatted chunks
+  based on whether reformats touch user-edited lines
 
 """
 
 import logging
-from difflib import SequenceMatcher
 from functools import lru_cache
 from pathlib import Path
-from typing import Dict, Generator, List, Optional, Tuple, Union
+from typing import Dict, List, Optional, Tuple, Union
 
 from black import FileMode, format_str, read_pyproject_toml
 from click import Command, Context, Option
@@ -104,12 +63,14 @@ def read_black_config(src: Path, value: Optional[str]) -> Dict[str, Union[bool,
 
 
 def run_black(
-    src: Path, black_args: Dict[str, Union[bool, int]]
-) -> Tuple[List[str], List[str]]:
-    """Run the black formatter for the contents of the given Python file
+    src: Path, src_contents: str, black_args: Dict[str, Union[bool, int]]
+) -> List[str]:
+    """Run the black formatter for the Python source code given as a string
 
     Return lines of the original file as well as the formatted content.
 
+    :param src: The originating file path for the source code
+    :param src_contents: The source code as a string
     :param black_args: Command-line arguments to send to ``black.FileMode``
 
     """
@@ -133,59 +94,6 @@ def run_black(
     # from the command line arguments
     mode = FileMode(**effective_args)
 
-    src_contents = src.read_text()
     dst_contents = format_str(src_contents, mode=mode)
-    return src_contents.splitlines(), dst_contents.splitlines()
-
-
-def diff_and_get_opcodes(
-    src_lines: List[str], dst_lines: List[str]
-) -> List[Tuple[str, int, int, int, int]]:
-    """Return opcodes and line numbers for chunks in the diff of two lists of strings
-
-    The opcodes are 5-tuples for each chunk with
-
-    - the tag of the operation ('equal', 'delete', 'replace' or 'insert')
-    - the number of the first line in the chunk in the from-file
-    - the number of the last line in the chunk in the from-file
-    - the number of the first line in the chunk in the to-file
-    - the number of the last line in the chunk in the to-file
-
-    Line numbers are zero based.
-
-    """
-    matcher = SequenceMatcher(None, src_lines, dst_lines, autojunk=False)
-    opcodes = matcher.get_opcodes()
-    logger.debug(
-        "Diff between edited and reformatted has %s opcode%s",
-        len(opcodes),
-        "s" if len(opcodes) > 1 else "",
-    )
-    return opcodes
-
-
-def opcodes_to_chunks(
-    opcodes: List[Tuple[str, int, int, int, int]],
-    src_lines: List[str],
-    dst_lines: List[str],
-) -> Generator[Tuple[int, List[str], List[str]], None, None]:
-    """Convert each diff opcode to a line number and original plus modified lines
-
-    Each chunk is a 3-tuple with
-
-    - the 1-based number of the first line in the chunk in the from-file
-    - the original lines of the chunk in the from-file
-    - the modified lines of the chunk in the to-file
-
-    Based on this, the patch can be constructed by choosing either original or modified
-    lines for each chunk and concatenating them together.
-
-    """
-    # Make sure every other opcode is an 'equal' tag
-    assert all(
-        (tag1 == "equal") != (tag2 == "equal")
-        for (tag1, _, _, _, _), (tag2, _, _, _, _) in zip(opcodes[:-1], opcodes[1:])
-    ), opcodes
-
-    for tag, i1, i2, j1, j2 in opcodes:
-        yield i1 + 1, src_lines[i1:i2], dst_lines[j1:j2]
+    dst_lines: List[str] = dst_contents.splitlines()
+    return dst_lines