Skip to content

Commit

Permalink
Update sourcemap paths when concatenating source files.
Browse files Browse the repository at this point in the history
When building a package from source files, the built source files get
concatenated together before being post-processed by Django. Prior to
Django 4.0, the post-processing step would normalize `url(...)` entries
in CSS by looking it up in storage and replacing the path with the
hashed version.

Starting in Django 4.0, post-processing would do the same for
sourcemaps. This can break when concatenating either CSS or JavaScript
files, since Pipeline may produce a built package file that's in a
different directory from one or more built source files. Django would
fail to find the file and raise an error.

We now include sourcemap normalization as part of the concatenation
process. This is using a similar approach to `url(...)` normalization,
but now consolidated into the `Compressor.concatenate()` function. This
has been updated to take arguments controlling the concatenation
process, such as a regex for capturing paths to normalize.

The regex for capturing sourcemap lines is built to be spec-compliant,
and is currently more broad than what Django looks for during
post-processing. This will help avoid potential issues as Django makes
changes to their process.

The old functions (`concatenate_and_rewrite()`) and old default behavior
has been left intact, but with runtime deprecation warnings, so that any
code specializing Pipeline will continue to work. This helps ensure this
change is API-compatible and non-breaking.

See issue #808 for more details on the problem and the solution.
  • Loading branch information
chipx86 committed Mar 19, 2024
1 parent 2018c11 commit fd1c033
Show file tree
Hide file tree
Showing 5 changed files with 508 additions and 41 deletions.
218 changes: 187 additions & 31 deletions pipeline/compressors/__init__.py
Original file line number Diff line number Diff line change
@@ -1,9 +1,13 @@
from __future__ import annotations

import base64
import os
import posixpath
import re
import subprocess
import warnings
from itertools import takewhile
from typing import Iterator, Optional, Sequence

from django.contrib.staticfiles.storage import staticfiles_storage
from django.utils.encoding import force_str, smart_bytes
Expand All @@ -12,8 +16,58 @@
from pipeline.exceptions import CompressorError
from pipeline.utils import relpath, set_std_streams_blocking, to_class

URL_DETECTOR = r"""url\((['"]?)\s*(.*?)\1\)"""
URL_REPLACER = r"""url\(__EMBED__(.+?)(\?\d+)?\)"""

# Regex matching url(...), url('...'), and url("...") patterns.
#
# Replacements will preserve the quotes and any whitespace contained within
# the pattern, transforming only the filename.
#
# Verbose and documented, to ease future maintenance.
_CSS_URL_REWRITE_PATH_RE_STR = r"""
(?P<url_prefix>
url\( # The opening `url(`.
(?P<url_quote>['"]?) # Optional quote (' or ").
\s*
)
(?P<url_path>.*?) # The path to capture.
(?P<url_suffix>
(?P=url_quote) # The quote found earlier, if any.
\s*
\) # The end `)`, completing `url(...)`.
)
"""


# Regex matching `//@ sourceMappingURL=...` and variants.
#
# This will capture sourceMappingURL and sourceURL keywords, both
# `//@` and `//#` variants, and both `//` and `/* ... */` comment types.
#
# Verbose and documented, to ease future maintenance.
_SOURCEMAP_REWRITE_PATH_RE_STR = r"""
(?P<sourcemap_prefix>
/(?:/|(?P<sourcemap_mlcomment>\*)) # Opening comment (`//#`, `//@`,
[#@]\s+ # `/*@`, `/*#`).
source(?:Mapping)?URL= # The sourcemap indicator.
\s*
)
(?P<sourcemap_path>.*?) # The path to capture.
(?P<sourcemap_suffix>
\s*
(?(sourcemap_mlcomment)\*/\s*) # End comment (`*/`)
)
$ # The line should now end.
"""


# Implementation of the above regexes, for CSS and JavaScript.
CSS_REWRITE_PATH_RE = re.compile(
f"{_CSS_URL_REWRITE_PATH_RE_STR}|{_SOURCEMAP_REWRITE_PATH_RE_STR}", re.X | re.M
)
JS_REWRITE_PATH_RE = re.compile(_SOURCEMAP_REWRITE_PATH_RE_STR, re.X | re.M)


URL_REPLACER = re.compile(r"""url\(__EMBED__(.+?)(\?\d+)?\)""")
NON_REWRITABLE_URL = re.compile(r"^(#|http:|https:|data:|//)")

DEFAULT_TEMPLATE_FUNC = "template"
Expand Down Expand Up @@ -51,9 +105,27 @@ def js_compressor(self):
def css_compressor(self):
return to_class(settings.CSS_COMPRESSOR)

def compress_js(self, paths, templates=None, **kwargs):
def compress_js(
self,
paths: Sequence[str],
templates: Optional[Sequence[str]] = None,
*,
output_filename: Optional[str] = None,
**kwargs,
) -> str:
"""Concatenate and compress JS files"""
js = self.concatenate(paths)
# Note how a semicolon is added between the two files to make sure that
# their behavior is not changed. '(expression1)\n(expression2)' calls
# `expression1` with `expression2` as an argument! Superfluous
# semicolons are valid in JavaScript and will be removed by the
# minifier.
js = self.concatenate(
paths,
file_sep=";",
output_filename=output_filename,
rewrite_path_re=JS_REWRITE_PATH_RE,
)

if templates:
js = js + self.compile_templates(templates)

Expand All @@ -68,7 +140,13 @@ def compress_js(self, paths, templates=None, **kwargs):

def compress_css(self, paths, output_filename, variant=None, **kwargs):
"""Concatenate and compress CSS files"""
css = self.concatenate_and_rewrite(paths, output_filename, variant)
css = self.concatenate(
paths,
file_sep="",
rewrite_path_re=CSS_REWRITE_PATH_RE,
output_filename=output_filename,
variant=variant,
)
compressor = self.css_compressor
if compressor:
css = getattr(compressor(verbose=self.verbose), "compress_css")(css)
Expand Down Expand Up @@ -131,38 +209,116 @@ def template_name(self, path, base):

def concatenate_and_rewrite(self, paths, output_filename, variant=None):
"""Concatenate together files and rewrite urls"""
stylesheets = []
for path in paths:
warnings.warn(
"Compressor.concatenate_and_rewrite() is deprecated. Please "
"call concatenate() instead.",
DeprecationWarning,
stacklevel=2,
)

return self.concatenate(
paths=paths,
file_sep="",
rewrite_path_re=CSS_REWRITE_PATH_RE,
output_filename=output_filename,
variant=variant,
)

def reconstruct(match):
quote = match.group(1) or ""
asset_path = match.group(2)
if NON_REWRITABLE_URL.match(asset_path):
return f"url({quote}{asset_path}{quote})"
asset_url = self.construct_asset_path(
asset_path, path, output_filename, variant
def concatenate(
self,
paths: Sequence[str],
*,
file_sep: Optional[str] = None,
output_filename: Optional[str] = None,
rewrite_path_re: Optional[re.Pattern] = None,
variant: Optional[str] = None,
) -> str:
"""Concatenate together a list of files.
The caller can specify a delimiter between files and any regexes
used to normalize relative paths. Path normalization is important for
ensuring that local resources or sourcemaps can be updated in time
for Django's static media post-processing phase.
"""

def _reconstruct(
m: re.Match,
source_path: str,
) -> str:
groups = m.groupdict()
asset_path: Optional[str] = None
prefix = ""
suffix = ""

for prefix in ("sourcemap", "url"):
asset_path = groups.get(f"{prefix}_path")

if asset_path is not None:
asset_path = asset_path.strip()
prefix, suffix = m.group(f"{prefix}_prefix", f"{prefix}_suffix")
break

if asset_path is None:
# This is empty. Return the whole match as-is.
return m.group()

Check warning on line 263 in pipeline/compressors/__init__.py

View check run for this annotation

Codecov / codecov/patch

pipeline/compressors/__init__.py#L263

Added line #L263 was not covered by tests

if asset_path and not NON_REWRITABLE_URL.match(asset_path):
asset_path = self.construct_asset_path(
asset_path=asset_path,
source_path=source_path,
output_filename=output_filename,
variant=variant,
)

return f"{prefix}{asset_path}{suffix}"

def _iter_files() -> Iterator[str]:
if not output_filename or not rewrite_path_re:
# This is legacy call, which does not support sourcemap-aware
# asset rewriting. Pipeline itself won't invoke this outside
# of tests, but it maybe important for third-parties who
# are specializing these classes.
warnings.warn(
"Compressor.concatenate() was called without passing "
"rewrite_path_re_= or output_filename=. If you are "
"specializing Compressor, please update your call "
"to remain compatible with future changes.",
DeprecationWarning,
stacklevel=3,
)
return f"url({asset_url})"

content = self.read_text(path)
# content needs to be unicode to avoid explosions with non-ascii chars
content = re.sub(URL_DETECTOR, reconstruct, content)
stylesheets.append(content)
return "\n".join(stylesheets)
return (self.read_text(path) for path in paths)

def concatenate(self, paths):
"""Concatenate together a list of files"""
# Note how a semicolon is added between the two files to make sure that
# their behavior is not changed. '(expression1)\n(expression2)' calls
# `expression1` with `expression2` as an argument! Superfluos semicolons
# are valid in JavaScript and will be removed by the minifier.
return "\n;".join([self.read_text(path) for path in paths])
# Now that we can attempt the modern support for concatenating
# files, handling rewriting of relative assets in the process.
return (
rewrite_path_re.sub(
lambda m: _reconstruct(m, path), self.read_text(path)
)
for path in paths
)

if file_sep is None:
warnings.warn(
"Compressor.concatenate() was called without passing "
"file_sep=. If you are specializing Compressor, please "
"update your call to remain compatible with future changes. "
"Defaulting to JavaScript behavior for "
"backwards-compatibility.",
DeprecationWarning,
stacklevel=2,
)
file_sep = ";"

return f"\n{file_sep}".join(_iter_files())

def construct_asset_path(self, asset_path, css_path, output_filename, variant=None):
"""Return a rewritten asset URL for a stylesheet"""
def construct_asset_path(
self, asset_path, source_path, output_filename, variant=None
):
"""Return a rewritten asset URL for a stylesheet or JavaScript file."""
public_path = self.absolute_path(
asset_path,
os.path.dirname(css_path).replace("\\", "/"),
os.path.dirname(source_path).replace("\\", "/"),
)
if self.embeddable(public_path, variant):
return "__EMBED__%s" % public_path
Expand Down Expand Up @@ -196,7 +352,7 @@ def datauri(match):
data = self.encoded_content(path)
return f'url("data:{mime_type};charset=utf-8;base64,{data}")'

return re.sub(URL_REPLACER, datauri, css)
return URL_REPLACER.sub(datauri, css)

Check warning on line 355 in pipeline/compressors/__init__.py

View check run for this annotation

Codecov / codecov/patch

pipeline/compressors/__init__.py#L355

Added line #L355 was not covered by tests

def encoded_content(self, path):
"""Return the base64 encoded contents"""
Expand Down
1 change: 1 addition & 0 deletions pipeline/packager.py
Original file line number Diff line number Diff line change
Expand Up @@ -152,6 +152,7 @@ def pack_javascripts(self, package, **kwargs):
package,
self.compressor.compress_js,
js_compressed,
output_filename=package.output_filename,
templates=package.templates,
**kwargs,
)
Expand Down
24 changes: 24 additions & 0 deletions tests/assets/css/sourcemap.css

Some generated files are not rendered by default. Learn more about how customized files appear on GitHub.

18 changes: 18 additions & 0 deletions tests/assets/js/sourcemap.js

Some generated files are not rendered by default. Learn more about how customized files appear on GitHub.

Loading

0 comments on commit fd1c033

Please sign in to comment.