Skip to content
This repository has been archived by the owner on Apr 1, 2022. It is now read-only.

Container scan (Analyze/Test w/ upload) #173

Merged
merged 16 commits into from
Jan 14, 2021
Merged
1 change: 1 addition & 0 deletions .github/workflows/build.yml
Original file line number Diff line number Diff line change
Expand Up @@ -86,6 +86,7 @@ jobs:
run: |
mkdir vendor
touch vendor/wiggins
touch vendor/syft

- name: Build
run: |
Expand Down
7 changes: 5 additions & 2 deletions spectrometer.cabal
Original file line number Diff line number Diff line change
Expand Up @@ -85,14 +85,17 @@ library
App.Fossa.Analyze.GraphBuilder
App.Fossa.Analyze.GraphMangler
App.Fossa.Analyze.Project
App.Fossa.Container
App.Fossa.Container.Analyze
App.Fossa.Container.Test
App.Fossa.EmbeddedBinary
App.Fossa.FossaAPIV1
App.Fossa.ListTargets
App.Fossa.Main
App.Fossa.ProjectInference
App.Fossa.Report
App.Fossa.Report.Attribution
App.Fossa.Test
App.Fossa.VPS.EmbeddedBinary
App.Fossa.VPS.NinjaGraph
App.Fossa.VPS.Report
App.Fossa.VPS.Scan
Expand Down Expand Up @@ -218,7 +221,6 @@ test-suite unit-tests

-- cabal-fmt: expand test
other-modules:
App.Fossa.FossaAPIV1Spec
App.Fossa.Report.AttributionSpec
App.Fossa.VPS.NinjaGraphSpec
Cargo.MetadataSpec
Expand All @@ -232,6 +234,7 @@ test-suite unit-tests
Erlang.ConfigParserSpec
Erlang.Rebar3TreeSpec
Extra.TextSpec
Fossa.API.TypesSpec
Go.GlideLockSpec
Go.GoListSpec
Go.GomodSpec
Expand Down
6 changes: 3 additions & 3 deletions src/App/Fossa/API/BuildWait.hs
Original file line number Diff line number Diff line change
Expand Up @@ -19,7 +19,7 @@ import Control.Effect.Lift (Lift, sendIO)
import Data.Functor (($>))
import Data.Text (Text)
import Effect.Logger
import Fossa.API.Types (ApiOpts)
import Fossa.API.Types (ApiOpts, Issues (..))

pollDelaySeconds :: Int
pollDelaySeconds = 8
Expand Down Expand Up @@ -53,10 +53,10 @@ waitForIssues ::
(Has Diagnostics sig m, Has (Lift IO) sig m, Has Logger sig m) =>
ApiOpts ->
ProjectRevision ->
m Fossa.Issues
m Issues
waitForIssues apiOpts revision = do
issues <- Fossa.getIssues apiOpts revision
case Fossa.issuesStatus issues of
case issuesStatus issues of
"WAITING" -> do
sendIO $ threadDelay (pollDelaySeconds * 1_000_000)
waitForIssues apiOpts revision
Expand Down
1 change: 1 addition & 0 deletions src/App/Fossa/Analyze.hs
Original file line number Diff line number Diff line change
Expand Up @@ -8,6 +8,7 @@ module App.Fossa.Analyze
, ScanDestination(..)
, UnpackArchives(..)
, discoverFuncs
, fossaProjectUrl
) where

import App.Fossa.Analyze.GraphMangler (graphingToGraph)
Expand Down
208 changes: 208 additions & 0 deletions src/App/Fossa/Container.hs
Original file line number Diff line number Diff line change
@@ -0,0 +1,208 @@
{-# LANGUAGE QuasiQuotes #-}
{-# LANGUAGE RecordWildCards #-}
{-# LANGUAGE TypeApplications #-}

module App.Fossa.Container
( ImageText (..),
imageTextArg,
Locator (..),
SyftResponse (..),
ResponseArtifact (..),
ResponseSource (..),
ResponseDistro (..),
SourceTarget (..),
ContainerScan (..),
ContainerImage (..),
ContainerArtifact (..),
runSyft,
toContainerScan,
extractRevision,
)
where

import App.Fossa.EmbeddedBinary (BinaryPaths, toExecutablePath, withSyftBinary)
import App.Types (ProjectRevision (..), OverrideProject (..))
import Control.Effect.Diagnostics hiding (fromMaybe)
import Control.Effect.Lift (Lift)
import Control.Monad.IO.Class
import Data.Aeson
import qualified Data.Map.Lazy as LMap
import Data.Map.Strict (Map)
import Data.Maybe (listToMaybe, fromMaybe)
import Data.Text (Text, pack)
import Data.Text.Extra (breakOnAndRemove)
import Effect.Exec (AllowErr (Never), Command (..), execJson, runExecIO)
import Options.Applicative (Parser, argument, help, metavar, str)
import Path

newtype ImageText = ImageText {unImageText :: Text} deriving (Show, Eq, Ord)

imageTextArg :: Parser ImageText
imageTextArg = ImageText . pack <$> argument str (metavar "IMAGE" <> help "The image to scan")

newtype Locator = Locator {unLocator :: Text} deriving (Eq, Ord, Show)

-- | The output of the syft binary
data SyftResponse
= SyftResponse
{ responseArtifacts :: [ResponseArtifact],
responseSource :: ResponseSource,
responseDistro :: ResponseDistro
}

instance FromJSON SyftResponse where
parseJSON = withObject "SyftResponse" $ \obj ->
SyftResponse <$> obj .: "artifacts"
<*> obj .: "source"
<*> obj .: "distro"

data ResponseArtifact
= ResponseArtifact
{ artifactName :: Text,
artifactVersion :: Text,
artifactType :: Text,
artifactPkgUrl :: Text,
artifactMetadataType :: Text,
artifactMetadata :: Map Text Value
}

instance FromJSON ResponseArtifact where
parseJSON = withObject "ResponseArtifact" $ \obj ->
ResponseArtifact <$> obj .: "name"
<*> obj .: "version"
<*> obj .: "type"
<*> obj .: "purl"
<*> obj .: "metadataType"
-- We delete "files" as early as possible, which reduces
Copy link
Member

@zlav zlav Jan 7, 2021

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Could you elaborate on what a file is? I get why we need a comment here but I don't really understand what it is saying. I'm guessing this may make sense to me as I read further.

Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

There's some output from the syft binary that is under the nested json key artifacts[n].metadata.files.

{
  "artifacts": [
    {
      "metadata": {
        "files": [
          // enormous array of info we don't need
        ]
      }
    }
  ]
}

Since this value is unnecessary AND huge, we take steps to prevent loading it into memory as much as possible. Since haskell lazily evaluates data by default, and the Data.Map and Data.Map.Lazy internal map structure are strict in keys, but lazy in values, we can potentially remove the files key from this map without ever trying to hold the entire array in memory at once.

The comment is saying "we retain the json-like map structure, but we want to ignore the 'files' field to save a ton of memory."

-- the size by over 95% in many cases.
-- We use Lazy delete to avoid evaluating the innards of
-- the field, since Aeson will try to avoid evaluating it
-- as well.
<*> (LMap.delete "files" <$> obj .: "metadata")

newtype ResponseSource
= ResponseSource
{sourceTarget :: SourceTarget}

instance FromJSON ResponseSource where
parseJSON = withObject "ResponseSource" $ \obj ->
ResponseSource <$> obj .: "target"

data ResponseDistro
= ResponseDistro
{ distroName :: Text,
distroVersion :: Text
}

instance FromJSON ResponseDistro where
parseJSON = withObject "ResponseDistro" $ \obj ->
ResponseDistro <$> obj .: "name"
<*> obj .: "version"

data SourceTarget
= SourceTarget
{ targetDigest :: Text,
targetTags :: [Text]
}

instance FromJSON SourceTarget where
parseJSON = withObject "SourceTarget" $ \obj ->
SourceTarget <$> obj .: "digest"
<*> obj .: "tags"

-- | The reorganized output of syft into a slightly different format
data ContainerScan
= ContainerScan
{ imageData :: ContainerImage,
imageTag :: Text,
imageDigest :: Text
}

instance ToJSON ContainerScan where
toJSON scan = object ["image" .= imageData scan]

data ContainerImage
= ContainerImage
{ imageArtifacts :: [ContainerArtifact],
imageOs :: Text,
imageOsRelease :: Text
}

instance ToJSON ContainerImage where
toJSON ContainerImage {..} =
object
[ "os" .= imageOs,
"osRelease" .= imageOsRelease,
"artifacts" .= imageArtifacts
]

data ContainerArtifact
= ContainerArtifact
{ conArtifactName :: Text,
conArtifactVersion :: Text,
conArtifactType :: Text,
conArtifactPkgUrl :: Text,
conArtifactMetadataType :: Text,
conArtifactMetadata :: Map Text Value
}

instance ToJSON ContainerArtifact where
toJSON ContainerArtifact {..} =
object
[ "name" .= conArtifactName,
"fullVersion" .= conArtifactVersion,
"type" .= conArtifactType,
"purl" .= conArtifactPkgUrl,
"metadataType" .= conArtifactMetadataType,
"metadata" .= LMap.delete "files" conArtifactMetadata
]

extractRevision :: OverrideProject -> ContainerScan -> ProjectRevision
extractRevision OverrideProject {..} ContainerScan {..} = ProjectRevision name revision Nothing
where
name = fromMaybe imageTag overrideName
revision = fromMaybe imageDigest overrideRevision


toContainerScan :: Has Diagnostics sig m => SyftResponse -> m ContainerScan
toContainerScan SyftResponse {..} = do
let newArts = map convertArtifact responseArtifacts
image = ContainerImage newArts (distroName responseDistro) (distroVersion responseDistro)
target = sourceTarget responseSource
tag <- extractTag $ targetTags target
pure . ContainerScan image tag $ targetDigest target

convertArtifact :: ResponseArtifact -> ContainerArtifact
convertArtifact ResponseArtifact {..} =
ContainerArtifact
{ conArtifactName = artifactName,
conArtifactVersion = artifactVersion,
conArtifactType = artifactType,
conArtifactPkgUrl = artifactPkgUrl,
conArtifactMetadataType = artifactMetadataType,
conArtifactMetadata = artifactMetadata
}

extractTag :: Has Diagnostics sig m => [Text] -> m Text
extractTag tags = do
firstTag <- fromMaybeText "No image tags found" $ listToMaybe tags
tagTuple <- fromMaybeText "Image was not in the format name:tag" $ breakOnAndRemove ":" firstTag
pure $ fst tagTuple

runSyft ::
( Has Diagnostics sig m,
Has (Lift IO) sig m,
MonadIO m
) =>
ImageText ->
m SyftResponse
runSyft image = runExecIO . withSyftBinary $ \syftBin -> do
execJson @SyftResponse [reldir|.|] $ syftCommand syftBin image

syftCommand :: BinaryPaths -> ImageText -> Command
syftCommand bin (ImageText image) =
Command
{ cmdName = pack . toFilePath $ toExecutablePath bin,
cmdArgs = ["-o", "json", image],
cmdAllowErr = Never
}
50 changes: 50 additions & 0 deletions src/App/Fossa/Container/Analyze.hs
Original file line number Diff line number Diff line change
@@ -0,0 +1,50 @@

module App.Fossa.Container.Analyze
( analyzeMain,
)
where

import App.Fossa.Analyze (ScanDestination (..), fossaProjectUrl)
import App.Fossa.Container (ImageText (..), runSyft, toContainerScan, extractRevision)
import App.Fossa.FossaAPIV1 (uploadContainerScan)
import App.Types (OverrideProject (..), ProjectRevision (..))
import Control.Carrier.Diagnostics
import Control.Effect.Lift (Lift)
import Control.Monad.IO.Class (MonadIO)
import Data.Aeson
import Data.Text.Lazy.Encoding (decodeUtf8)
import Effect.Logger
import Fossa.API.Types (ApiOpts (..))

analyzeMain :: ScanDestination -> Severity -> OverrideProject -> ImageText -> IO ()
analyzeMain scanDestination logSeverity override image = withLogger logSeverity $ do
result <- runDiagnostics $ analyze scanDestination override image
case result of
Left err -> logError (renderFailureBundle err)
Right _ -> pure ()

analyze ::
( Has Diagnostics sig m,
Has (Lift IO) sig m,
Has Logger sig m,
MonadIO m
) =>
ScanDestination ->
OverrideProject ->
ImageText ->
m ()
analyze scanDestination override image = do
logDebug "Running embedded syft binary"
containerScan <- runSyft image >>= toContainerScan
case scanDestination of
OutputStdout -> logStdout . pretty . decodeUtf8 $ encode containerScan
UploadScan apiOpts projectMeta -> do
let revision = extractRevision override containerScan
logInfo ("Using project name: `" <> pretty (projectName revision) <> "`")
logInfo ("Using project revision: `" <> pretty (projectRevision revision) <> "`")
locator <- uploadContainerScan apiOpts projectMeta containerScan
logInfo "Container Analysis successfully uploaded!"
logInfo "View FOSSA Report:"
logInfo (" " <> pretty (fossaProjectUrl (apiOptsUri apiOpts) locator revision))
pure ()

Loading