From a95b82e6a3d227e0e1295b62de9e6c760e0d484c Mon Sep 17 00:00:00 2001
From: Yuxiang Zhu <yuxzhu@redhat.com>
Date: Fri, 11 Jun 2021 06:58:10 +0000
Subject: [PATCH] ART-3034: Implement doozer images:rebase --force-yum-updates

`--force-yum-updates` will inject `yum update -y` in each stage. This ensures the component image will be able to override RPMs it is inheriting from its parent image using RPMs in the rebuild plashet. The current user will also be switched to root in every non-final stage. If `final_stage_user` is set in image meta, the current user in the final user will be switched to `root` before `yum update -y` then switched back to `final_stage_user`.

If the option is not set, previously injected `yum update -y` commands
should be removed (in case we are injecting distgit-only repos).

If no repos are enabled for an image, we shouldn't inject `yum
update -y` otherwise the command will fail.
---
 doozerlib/cli/__main__.py                     |  14 +-
 doozerlib/distgit.py                          |  64 ++++++-
 .../test_image_distgit/test_image_distgit.py  | 165 ++++++++++++++++++
 3 files changed, 229 insertions(+), 14 deletions(-)

diff --git a/doozerlib/cli/__main__.py b/doozerlib/cli/__main__.py
index 95a227602..939439d93 100644
--- a/doozerlib/cli/__main__.py
+++ b/doozerlib/cli/__main__.py
@@ -43,7 +43,7 @@
 import json
 import functools
 import datetime
-from typing import Dict, List
+from typing import Dict, List, Optional
 import re
 import semver
 import urllib
@@ -244,10 +244,12 @@ def db_select(runtime, operation, attribute, match, like, where, sort_by, limit,
 @click.option("--repo-type", metavar="REPO_TYPE", envvar="OIT_IMAGES_REPO_TYPE",
               default="unsigned",
               help="Repo group type to use for version autodetection scan (e.g. signed, unsigned).")
+@click.option("--force-yum-updates", is_flag=True, default=False,
+              help="Inject \"yum update -y\" in each stage. This ensures the component image will be able to override RPMs it is inheriting from its parent image using RPMs in the rebuild plashet.")
 @option_commit_message
 @option_push
 @pass_runtime
-def images_update_dockerfile(runtime, version, release, repo_type, message, push):
+def images_update_dockerfile(runtime: Runtime, version: Optional[str], release: Optional[str], repo_type: str, force_yum_updates: bool, message: str, push: bool):
     """
     Updates the Dockerfile in each distgit repository with the latest metadata and
     the version/release information specified. This does not update the Dockerfile
@@ -300,7 +302,7 @@ def dgr_update(image_meta):
             dgr = image_meta.distgit_repo()
             _, prev_release, prev_private_fix = dgr.extract_version_release_private_fix()
             dgr.private_fix = bool(prev_private_fix)  # preserve the .p? field when updating the release
-            (real_version, real_release) = dgr.update_distgit_dir(version, release, prev_release)
+            (real_version, real_release) = dgr.update_distgit_dir(version, release, prev_release, force_yum_updates)
             dgr.commit(message)
             dgr.tag(real_version, real_release)
             state.record_image_success(lstate, image_meta)
@@ -500,10 +502,12 @@ def delete_images(key, value):
 @click.option("--repo-type", metavar="REPO_TYPE", envvar="OIT_IMAGES_REPO_TYPE",
               default="unsigned",
               help="Repo group type to use for version autodetection scan (e.g. signed, unsigned).")
+@click.option("--force-yum-updates", is_flag=True, default=False,
+              help="Inject \"yum update -y\" in each stage. This ensures the component image will be able to override RPMs it is inheriting from its parent image using RPMs in the rebuild plashet.")
 @option_commit_message
 @option_push
 @pass_runtime
-def images_rebase(runtime, version, release, embargoed, repo_type, message, push):
+def images_rebase(runtime: Runtime, version: Optional[str], release: Optional[str], embargoed: bool, repo_type: str, force_yum_updates: bool, message: str, push: bool):
     """
     Many of the Dockerfiles stored in distgit are based off of content managed in GitHub.
     For example, openshift-enterprise-node should always closely reflect the changes
@@ -555,7 +559,7 @@ def dgr_rebase(image_meta, terminate_event):
             dgr = image_meta.distgit_repo()
             if embargoed:
                 dgr.private_fix = True
-            (real_version, real_release) = dgr.rebase_dir(version, release, terminate_event)
+            (real_version, real_release) = dgr.rebase_dir(version, release, terminate_event, force_yum_updates)
             sha = dgr.commit(message, log_diff=True)
             dgr.tag(real_version, real_release)
             runtime.add_record(
diff --git a/doozerlib/distgit.py b/doozerlib/distgit.py
index e25ce45ae..fcbe9761f 100644
--- a/doozerlib/distgit.py
+++ b/doozerlib/distgit.py
@@ -1361,11 +1361,8 @@ def _clean_repos(self, dfp):
                 if changed:
                     dfp.add_lines_at(entry, "RUN " + new_value, replace=True)
 
-    def update_distgit_dir(self, version, release, prev_release=None):
+    def update_distgit_dir(self, version, release, prev_release=None, force_yum_updates=False):
         ignore_missing_base = self.runtime.ignore_missing_base
-        # A collection of comment lines that will be included in the generated Dockerfile. They
-        # will be prefix by the OIT_COMMENT_PREFIX and followed by newlines in the Dockerfile.
-        oit_comments = []
 
         dg_path = self.dg_path
         with Dir(self.distgit_dir):
@@ -1646,10 +1643,12 @@ def update_distgit_dir(self, version, release, prev_release=None):
                 'BUILD_RELEASE': release if release else '',
             }
 
+            df_fileobj = io.StringIO(df_content)
+            df_fileobj = self._update_yum_update_commands(force_yum_updates, df_fileobj)
+
             with dg_path.joinpath('Dockerfile').open('w', encoding="utf-8") as df:
-                for comment in oit_comments:
-                    df.write("%s %s\n" % (OIT_COMMENT_PREFIX, comment))
-                df.write(df_content)
+                shutil.copyfileobj(df_fileobj, df)
+                df_fileobj.close()
 
             self._update_environment_variables(env_vars)
 
@@ -1659,6 +1658,53 @@ def update_distgit_dir(self, version, release, prev_release=None):
 
             return version, release
 
+    def _update_yum_update_commands(self, force_yum_updates: bool, df_fileobj: io.TextIOBase) -> io.StringIO:
+        """ If force_yum_updates is True, inject "yum updates -y" in each stage; Otherwise, remove the lines we injected.
+        Returns an in-memory text stream for the new Dockerfile content
+        """
+        if force_yum_updates and not self.config.get('enabled_repos'):
+            # If no yum repos are disabled in image meta, "yum update -y" will fail with "Error: There are no enabled repositories in ...".
+            # Remove "yum update -y" lines intead.
+            self.logger.warning("Will not inject \"yum updates -y\" for this image because no yum repos are enabled.")
+            force_yum_updates = False
+        if force_yum_updates:
+            self.logger.info("Injecting \"yum updates -y\" in each stage...")
+
+        parser = DockerfileParser(fileobj=df_fileobj)
+
+        df_lines_iter = iter(parser.content.splitlines(False))
+        build_stage_num = len(parser.parent_images)
+        final_stage_user = self.metadata.config.final_stage_user or 0
+
+        yum_update_line_flag = "__doozer=yum-update"
+        yum_update_line = "RUN yum update -y && yum clean all"
+        output = io.StringIO()
+        build_stage = 0
+        for line in df_lines_iter:
+            if yum_update_line_flag in line:
+                # Remove the lines we have injected by skipping 2 lines
+                next(df_lines_iter)
+                continue
+            output.write(f'{line}\n')
+            if not force_yum_updates or not line.startswith('FROM '):
+                continue
+            build_stage += 1
+            # If the curent user inherited from the base image for this stage is not root, `yum update -y` will fail.
+            # We need to change the current user to root.
+            # For non-final stages, it should be safe to inject "USER 0" before `yum update -y`.
+            # However for the final stage, injecting "USER 0" without changing the user back may cause unexpected permission issues.
+            # Per https://github.com/openshift/doozer/pull/428#issuecomment-861795424, introduce a new metadata `final_stage_user` for images so we can switch the user back later.
+            if build_stage < build_stage_num or final_stage_user:
+                output.write(f"# {yum_update_line_flag}\nUSER 0\n")
+            else:
+                self.logger.warning("Will not inject `USER 0` before `yum update -y` for the final build stage because `final_stage_user` is missing (or 0) in image meta."
+                                    " If this build fails with `yum update -y` permission denied error, please set correct `final_stage_user` and rebase again.")
+            output.write(f"# {yum_update_line_flag}\n{yum_update_line}\n")
+            if build_stage == build_stage_num and final_stage_user:
+                output.write(f"# {yum_update_line_flag}\nUSER {final_stage_user}\n")
+        output.seek(0)
+        return output
+
     def _generate_config_digest(self):
         # The config digest is used by scan-sources to detect config changes
         self.logger.info("Calculating config digest...")
@@ -2175,7 +2221,7 @@ def extract_version_release_private_fix(self) -> Tuple[str, str, str]:
             return version, prev_release, private_fix
         return None, None, None
 
-    def rebase_dir(self, version, release, terminate_event):
+    def rebase_dir(self, version, release, terminate_event, force_yum_updates=False):
         try:
             # If this image is FROM another group member, we need to wait on that group member to determine if there are embargoes in that group member.
             image_from = Model(self.config.get('from', None))
@@ -2215,7 +2261,7 @@ def rebase_dir(self, version, release, terminate_event):
             if self.private_fix:
                 self.logger.warning("The source of this image contains embargoed fixes.")
 
-            real_version, real_release = self.update_distgit_dir(version, release, prev_release)
+            real_version, real_release = self.update_distgit_dir(version, release, prev_release, force_yum_updates)
             self.rebase_status = True
             return real_version, real_release
         except Exception:
diff --git a/tests/test_distgit/test_image_distgit/test_image_distgit.py b/tests/test_distgit/test_image_distgit/test_image_distgit.py
index a70a8047a..3e0507ccf 100644
--- a/tests/test_distgit/test_image_distgit/test_image_distgit.py
+++ b/tests/test_distgit/test_image_distgit/test_image_distgit.py
@@ -1,11 +1,19 @@
 from __future__ import absolute_import, print_function, unicode_literals
+import io
+import logging
+import pathlib
+
 import re
 import tempfile
 import unittest
 from threading import Lock
+
 import flexmock
+from mock import MagicMock, Mock
+from mock.mock import patch
 
 from doozerlib import distgit, model
+from doozerlib.image import ImageMetadata
 
 from ..support import MockScanner, TestDistgit
 
@@ -301,6 +309,163 @@ def test_image_distgit_matches_source(self):
         flexmock(self.img_dg.runtime).should_receive("detect_remote_source_branch").and_return(("branch", "eggs"))
         self.assertFalse(self.img_dg.matches_source_commit({}))
 
+    def test_inject_yum_update_commands_without_final_stage_user(self):
+        runtime = MagicMock()
+        meta = ImageMetadata(runtime, model.Model({
+            "key": "foo",
+            'data': {
+                'name': 'openshift/foo',
+                'distgit': {'branch': 'fake-branch-rhel-8'},
+                "enabled_repos": ["repo-a", "repo-b"]
+            },
+        }))
+        dg = distgit.ImageDistGitRepo(meta, autoclone=False)
+        dockerfile = """
+FROM some-base-image:some-tag AS builder
+LABEL name=value
+RUN some-command
+FROM another-base-image:some-tag
+COPY --from=builder /some/path/a /some/path/b
+        """.strip()
+        dg.logger = logging.getLogger()
+        dg.dg_path = MagicMock()
+        actual = dg._update_yum_update_commands(True, io.StringIO(dockerfile)).getvalue().strip().splitlines()
+        expected = """
+FROM some-base-image:some-tag AS builder
+# __doozer=yum-update
+USER 0
+# __doozer=yum-update
+RUN yum update -y && yum clean all
+LABEL name=value
+RUN some-command
+FROM another-base-image:some-tag
+# __doozer=yum-update
+RUN yum update -y && yum clean all
+COPY --from=builder /some/path/a /some/path/b
+        """.strip().splitlines()
+        self.assertListEqual(actual, expected)
+
+    def test_inject_yum_update_commands_with_final_stage_user(self):
+        runtime = MagicMock()
+        meta = ImageMetadata(runtime, model.Model({
+            "key": "foo",
+            'data': {
+                'name': 'openshift/foo',
+                'distgit': {'branch': 'fake-branch-rhel-8'},
+                "enabled_repos": ["repo-a", "repo-b"],
+                "final_stage_user": 1002,
+            },
+        }))
+        dg = distgit.ImageDistGitRepo(meta, autoclone=False)
+        dockerfile = """
+FROM some-base-image:some-tag AS builder
+LABEL name=value
+RUN some-command
+FROM another-base-image:some-tag
+COPY --from=builder /some/path/a /some/path/b
+        """.strip()
+        dg.logger = logging.getLogger()
+        dg.dg_path = MagicMock()
+        actual = dg._update_yum_update_commands(True, io.StringIO(dockerfile)).getvalue().strip().splitlines()
+        expected = """
+FROM some-base-image:some-tag AS builder
+# __doozer=yum-update
+USER 0
+# __doozer=yum-update
+RUN yum update -y && yum clean all
+LABEL name=value
+RUN some-command
+FROM another-base-image:some-tag
+# __doozer=yum-update
+USER 0
+# __doozer=yum-update
+RUN yum update -y && yum clean all
+# __doozer=yum-update
+USER 1002
+COPY --from=builder /some/path/a /some/path/b
+        """.strip().splitlines()
+        self.assertListEqual(actual, expected)
+
+    def test_remove_yum_update_commands(self):
+        runtime = MagicMock()
+        meta = ImageMetadata(runtime, model.Model({
+            "key": "foo",
+            'data': {
+                'name': 'openshift/foo',
+                'distgit': {'branch': 'fake-branch-rhel-8'},
+                "enabled_repos": ["repo-a", "repo-b"]
+            },
+        }))
+        dg = distgit.ImageDistGitRepo(meta, autoclone=False)
+        dg.logger = logging.getLogger()
+        dg.dg_path = MagicMock()
+        dockerfile = """
+FROM some-base-image:some-tag AS builder
+# __doozer=yum-update
+USER 0
+# __doozer=yum-update
+RUN yum update -y && yum clean all
+LABEL name=value
+RUN some-command
+FROM another-base-image:some-tag
+# __doozer=yum-update
+USER 0
+# __doozer=yum-update
+RUN yum update -y && yum clean all
+# __doozer=yum-update
+USER 1001
+COPY --from=builder /some/path/a /some/path/b
+        """.strip()
+        actual = dg._update_yum_update_commands(False, io.StringIO(dockerfile)).getvalue().strip().splitlines()
+        expected = """
+FROM some-base-image:some-tag AS builder
+LABEL name=value
+RUN some-command
+FROM another-base-image:some-tag
+COPY --from=builder /some/path/a /some/path/b
+        """.strip().splitlines()
+        self.assertListEqual(actual, expected)
+
+    def test_inject_yum_update_commands_without_repos(self):
+        runtime = MagicMock()
+        meta = ImageMetadata(runtime, model.Model({
+            "key": "foo",
+            'data': {
+                'name': 'openshift/foo',
+                'distgit': {'branch': 'fake-branch-rhel-8'},
+                "enabled_repos": []
+            },
+        }))
+        dg = distgit.ImageDistGitRepo(meta, autoclone=False)
+        dg.logger = logging.getLogger()
+        dg.dg_path = MagicMock()
+        dockerfile = """
+FROM some-base-image:some-tag AS builder
+# __doozer=yum-update
+USER 0
+# __doozer=yum-update
+RUN yum update -y && yum clean all
+LABEL name=value
+RUN some-command
+FROM another-base-image:some-tag
+# __doozer=yum-update
+USER 0
+# __doozer=yum-update
+RUN yum update -y && yum clean all
+# __doozer=yum-update
+USER 1001
+COPY --from=builder /some/path/a /some/path/b
+        """.strip()
+        actual = dg._update_yum_update_commands(True, io.StringIO(dockerfile)).getvalue().strip().splitlines()
+        expected = """
+FROM some-base-image:some-tag AS builder
+LABEL name=value
+RUN some-command
+FROM another-base-image:some-tag
+COPY --from=builder /some/path/a /some/path/b
+        """.strip().splitlines()
+        self.assertListEqual(actual, expected)
+
 
 if __name__ == "__main__":
     unittest.main()