Skip to content
This repository has been archived by the owner on Apr 1, 2022. It is now read-only.

Detect binaries as dependencies #353

Merged
merged 33 commits into from
Sep 7, 2021
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
Show all changes
33 commits
Select commit Hold shift + click to select a range
0a99c73
Add the concept of feature flags
jssblck Aug 26, 2021
f3eccff
Check if feature flags are enabled
jssblck Aug 26, 2021
e51a244
Gate monorepo scans on feature flag
jssblck Aug 26, 2021
30685c1
Gate --enable-vsi on feature flag
jssblck Aug 26, 2021
5805ba7
Add intent
jssblck Aug 30, 2021
8547fd3
Merge branch 'master' into feature-flags
jssblck Aug 31, 2021
83bb27f
Fix feature flag endpoint
jssblck Aug 31, 2021
014557f
Add fileIsBinary to ReadFS
jssblck Aug 30, 2021
9cd9034
Support basic binary detection strategy
jssblck Aug 30, 2021
8f505b1
Add BinaryDeps function & fetcher type
jssblck Aug 30, 2021
34b8bd3
Render shorter fingerprint
jssblck Aug 31, 2021
80953e9
Only run binary detection on feature flag
jssblck Aug 31, 2021
20fdc4d
Don't default to disabled feature flags
jssblck Aug 31, 2021
20b917a
Add to changelog
jssblck Sep 1, 2021
92ac710
Merge branch 'master' into feature-flags
jssblck Sep 1, 2021
f81c417
Merge branch 'feature-flags' into binary-enforcement
jssblck Sep 1, 2021
1c15a05
Update changelog
jssblck Sep 1, 2021
78311ff
Convert to use user-defined dependencies
jssblck Sep 2, 2021
00488f0
Require local CLI flag, remove feature flag
jssblck Sep 2, 2021
1557623
Remove binary discovery feature flag
jssblck Sep 2, 2021
f169938
Revert "Fix feature flag endpoint"
jssblck Sep 3, 2021
152ebcb
Revert "Add intent"
jssblck Sep 3, 2021
e813410
Revert "Gate --enable-vsi on feature flag"
jssblck Sep 3, 2021
43d1e5c
Revert "Gate monorepo scans on feature flag"
jssblck Sep 3, 2021
f8c8367
Revert "Check if feature flags are enabled"
jssblck Sep 3, 2021
60a6ec6
Revert "Add the concept of feature flags"
jssblck Sep 3, 2021
5f4b288
Update changelog
jssblck Sep 3, 2021
9efc673
Collapse a growing list of modes into one type
jssblck Sep 3, 2021
0fcd973
Merge branch 'master' into binary-enforcement
jssblck Sep 3, 2021
7024be5
Nits
jssblck Sep 3, 2021
8287acc
Refactor `additionalSourceUnits`
jssblck Sep 3, 2021
12073f8
Extract `fileIsBinary` from `ReadFS`
jssblck Sep 3, 2021
6615864
Merge branch 'master' into binary-enforcement
jssblck Sep 7, 2021
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
7 changes: 7 additions & 0 deletions Changelog.md
Original file line number Diff line number Diff line change
@@ -1,5 +1,12 @@
# Spectrometer Changelog

## v2.15.13

Adds another closed beta feature around FOSSA C/C++ support.
For now this functionality is considered publicly undocumented, and is only used with support from FOSSA engineering.

- Adds support for reporting detected binaries as unlicensed dependencies ([#353](https://github.com/fossas/spectrometer/pull/353))

## v2.15.12

- Yarn: Analyzes yarn.lock without runtime error, when yarn.lock includes directory dependency. ([#361](https://github.com/fossas/spectrometer/pull/361))
Expand Down
1 change: 1 addition & 0 deletions spectrometer.cabal
Original file line number Diff line number Diff line change
Expand Up @@ -139,6 +139,7 @@ library
App.Fossa.API.BuildLink
App.Fossa.API.BuildWait
App.Fossa.ArchiveUploader
App.Fossa.BinaryDeps
App.Fossa.Compatibility
App.Fossa.Configuration
App.Fossa.Container
Expand Down
65 changes: 45 additions & 20 deletions src/App/Fossa/Analyze.hs
Original file line number Diff line number Diff line change
Expand Up @@ -7,15 +7,18 @@ module App.Fossa.Analyze (
JsonOutput (..),
VSIAnalysisMode (..),
IATAssertionMode (..),
discoverFuncs,
BinaryDiscoveryMode (..),
RecordMode (..),
ModeOptions (..),
discoverFuncs,
) where

import App.Docs (userGuideUrl)
import App.Fossa.API.BuildLink (getFossaBuildUrl)
import App.Fossa.Analyze.GraphMangler (graphingToGraph)
import App.Fossa.Analyze.Project (ProjectResult (..), mkResult)
import App.Fossa.Analyze.Record (AnalyzeEffects (..), AnalyzeJournal (..), loadReplayLog, saveReplayLog)
import App.Fossa.BinaryDeps (analyzeBinaryDeps)
import App.Fossa.FossaAPIV1 (UploadResponse (..), getProject, projectIsMonorepo, uploadAnalysis, uploadContributors)
import App.Fossa.ManualDeps (analyzeFossaDepsFile)
import App.Fossa.ProjectInference (inferProjectDefault, inferProjectFromVCS, mergeOverride, saveRevision)
Expand Down Expand Up @@ -64,7 +67,7 @@ import Path (Abs, Dir, Path, fromAbsDir, toFilePath)
import Path.IO (makeRelative)
import Path.IO qualified as P
import Srclib.Converter qualified as Srclib
import Srclib.Types (Locator (locatorProject, locatorRevision), SourceUnit, parseLocator)
import Srclib.Types (Locator (locatorProject, locatorRevision), SourceUnit (..), parseLocator)
import Strategy.Bundler qualified as Bundler
import Strategy.Cargo qualified as Cargo
import Strategy.Carthage qualified as Carthage
Expand Down Expand Up @@ -119,6 +122,14 @@ data UnpackArchives = UnpackArchives

data JsonOutput = JsonOutput

-- | Collect analysis modes into a single type for ease of use.
-- These modes are intended to be different options that alter how analysis is performed or what analysis steps are followed.
data ModeOptions = ModeOptions
{ modeVSIAnalysis :: VSIAnalysisMode
, modeIATAssertion :: IATAssertionMode
, modeBinaryDiscovery :: BinaryDiscoveryMode
}
Comment on lines +127 to +131
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

[no action] 💡

the analyzeMain args sure are getting unwieldy.. I wonder if we should pack all of the args into a record


-- | "VSI analysis" modes
data VSIAnalysisMode
= -- | enable the VSI analysis strategy
Expand All @@ -133,6 +144,13 @@ data IATAssertionMode
| -- | assertion not enabled
IATAssertionDisabled

-- | "Binary Discovery" modes
data BinaryDiscoveryMode
= -- | Binary discovery enabled
BinaryDiscoveryEnabled
| -- | Binary discovery disabled
BinaryDiscoveryDisabled

-- | "Replay logging" modes
data RecordMode
= -- | record effect invocations
Expand All @@ -142,8 +160,8 @@ data RecordMode
| -- | don't record or replay
RecordModeNone

analyzeMain :: FilePath -> RecordMode -> Severity -> ScanDestination -> OverrideProject -> Flag UnpackArchives -> Flag JsonOutput -> VSIAnalysisMode -> IATAssertionMode -> AllFilters -> IO ()
analyzeMain workdir recordMode logSeverity destination project unpackArchives jsonOutput enableVSI assertionMode filters =
analyzeMain :: FilePath -> RecordMode -> Severity -> ScanDestination -> OverrideProject -> Flag UnpackArchives -> Flag JsonOutput -> ModeOptions -> AllFilters -> IO ()
analyzeMain workdir recordMode logSeverity destination project unpackArchives jsonOutput modeOptions filters =
withDefaultLogger logSeverity
. Diag.logWithExit_
. runReadFSIO
Expand All @@ -170,7 +188,7 @@ analyzeMain workdir recordMode logSeverity destination project unpackArchives js
. runReplay @Exec (effectsExec effects)
$ doAnalyze basedir
where
doAnalyze basedir = analyze basedir destination project unpackArchives jsonOutput enableVSI assertionMode filters
doAnalyze basedir = analyze basedir destination project unpackArchives jsonOutput modeOptions filters

discoverFuncs :: (TaskEffs sig m, TaskEffs rsig run) => [Path Abs Dir -> m [DiscoveredProject run]]
discoverFuncs =
Expand Down Expand Up @@ -245,19 +263,23 @@ analyze ::
OverrideProject ->
Flag UnpackArchives ->
Flag JsonOutput ->
VSIAnalysisMode ->
IATAssertionMode ->
ModeOptions ->
AllFilters ->
m ()
analyze (BaseDir basedir) destination override unpackArchives jsonOutput enableVSI iatAssertion filters = do
analyze (BaseDir basedir) destination override unpackArchives jsonOutput ModeOptions{..} filters = do
capabilities <- sendIO getNumCapabilities

let apiOpts = case destination of
OutputStdout -> Nothing
UploadScan opts _ -> Just opts

-- additional source units are built outside the standard strategy flow, because they either
-- require additional information (eg API credentials), or they return additional information (eg user deps).
manualSrcUnits <- analyzeFossaDepsFile basedir apiOpts
vsiResults <- analyzeVSI enableVSI apiOpts basedir filters
vsiResults <- analyzeVSI modeVSIAnalysis apiOpts basedir filters
binarySearchResults <- analyzeDiscoverBinaries modeBinaryDiscovery basedir filters
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

[no action] Might be useful to pair at some point on how to make these tasks run in parallel / show status updates. It's not as easy as it should be

both of these are long-running and might benefit from the parallelism / context status updates

let additionalSourceUnits :: [SourceUnit]
additionalSourceUnits = catMaybes [manualSrcUnits, vsiResults, binarySearchResults]

(projectResults, ()) <-
runOutput @ProjectResult
Expand All @@ -270,14 +292,14 @@ analyze (BaseDir basedir) destination override unpackArchives jsonOutput enableV
let filteredProjects = filterProjects (BaseDir basedir) projectResults

-- Need to check if vendored is empty as well, even if its a boolean that vendoredDeps exist
case checkForEmptyUpload projectResults filteredProjects [manualSrcUnits, vsiResults] of
case checkForEmptyUpload projectResults filteredProjects additionalSourceUnits of
NoneDiscovered -> Diag.fatal ErrNoProjectsDiscovered
FilteredAll count -> Diag.fatal (ErrFilteredAllProjects count projectResults)
FoundSome sourceUnits -> case destination of
OutputStdout -> logStdout . decodeUtf8 . Aeson.encode $ buildResult manualSrcUnits filteredProjects
OutputStdout -> logStdout . decodeUtf8 . Aeson.encode $ buildResult additionalSourceUnits filteredProjects
UploadScan opts metadata -> do
locator <- uploadSuccessfulAnalysis (BaseDir basedir) opts metadata jsonOutput override sourceUnits
doAssertRevisionBinaries iatAssertion opts locator
doAssertRevisionBinaries modeIATAssertion opts locator

analyzeVSI :: (MonadIO m, Has Diag.Diagnostics sig m, Has Exec sig m, Has (Lift IO) sig m, Has Logger sig m) => VSIAnalysisMode -> Maybe ApiOpts -> Path Abs Dir -> AllFilters -> m (Maybe SourceUnit)
analyzeVSI VSIAnalysisEnabled (Just apiOpts) dir filters = do
Expand All @@ -286,6 +308,12 @@ analyzeVSI VSIAnalysisEnabled (Just apiOpts) dir filters = do
pure $ Just results
analyzeVSI _ _ _ _ = pure Nothing

analyzeDiscoverBinaries :: (MonadIO m, Has Diag.Diagnostics sig m, Has (Lift IO) sig m, Has Logger sig m, Has ReadFS sig m) => BinaryDiscoveryMode -> Path Abs Dir -> AllFilters -> m (Maybe SourceUnit)
analyzeDiscoverBinaries BinaryDiscoveryEnabled dir filters = do
logInfo "Discovering binary files as dependencies"
analyzeBinaryDeps dir filters
analyzeDiscoverBinaries _ _ _ = pure Nothing

doAssertRevisionBinaries :: (Has Diag.Diagnostics sig m, Has ReadFS sig m, Has (Lift IO) sig m, Has Logger sig m) => IATAssertionMode -> ApiOpts -> Locator -> m ()
doAssertRevisionBinaries (IATAssertionEnabled dir) apiOpts locator = assertRevisionBinaries dir apiOpts locator
doAssertRevisionBinaries _ _ _ = pure ()
Expand Down Expand Up @@ -380,9 +408,8 @@ data CountedResult
-- Takes a list of all projects analyzed, and the list after filtering. We assume
-- that the smaller list is the latter, and return that list. Starting with user-defined deps,
-- we also include a check for an additional source unit from fossa-deps.yml.
checkForEmptyUpload :: [ProjectResult] -> [ProjectResult] -> [Maybe SourceUnit] -> CountedResult
checkForEmptyUpload xs ys potentialAdditionalUnits = do
let additionalUnits = catMaybes potentialAdditionalUnits
checkForEmptyUpload :: [ProjectResult] -> [ProjectResult] -> [SourceUnit] -> CountedResult
checkForEmptyUpload xs ys additionalUnits = do
if null additionalUnits
then case (xlen, ylen) of
-- We didn't discover, so we also didn't filter
Expand Down Expand Up @@ -463,16 +490,14 @@ buildProjectSummary project projectLocator projectUrl = do
, "id" .= projectLocator
]

buildResult :: Maybe SourceUnit -> [ProjectResult] -> Aeson.Value
buildResult maybeSrcUnit projects =
buildResult :: [SourceUnit] -> [ProjectResult] -> Aeson.Value
buildResult srcUnits projects =
Aeson.object
[ "projects" .= map buildProject projects
, "sourceUnits" .= finalSourceUnits
]
where
finalSourceUnits = case maybeSrcUnit of
Just unit -> unit : scannedUnits
Nothing -> scannedUnits
finalSourceUnits = srcUnits ++ scannedUnits
scannedUnits = map Srclib.toSourceUnit projects

buildProject :: ProjectResult -> Aeson.Value
Expand Down
115 changes: 115 additions & 0 deletions src/App/Fossa/BinaryDeps.hs
Original file line number Diff line number Diff line change
@@ -0,0 +1,115 @@
{-# LANGUAGE RecordWildCards #-}

module App.Fossa.BinaryDeps (analyzeBinaryDeps) where

import App.Fossa.Analyze.Project (ProjectResult (..))
import App.Fossa.VSI.IAT.Fingerprint (fingerprintRaw)
import App.Fossa.VSI.IAT.Types (Fingerprint (..))
import Control.Algebra (Has)
import Control.Carrier.Diagnostics (Diagnostics, fromEither)
import Control.Effect.Lift (Lift)
import Data.ByteString qualified as BS
import Data.Maybe (catMaybes)
import Data.String.Conversion (toText)
import Data.Text (Text)
import Data.Text qualified as Text
import Discovery.Filters (AllFilters (..), FilterCombination (combinedPaths))
import Discovery.Walk (WalkStep (WalkContinue), walk')
import Effect.ReadFS (ReadFS, readContentsBSLimit)
import Path (Abs, Dir, File, Path, isProperPrefixOf, stripProperPrefix, toFilePath, (</>))
import Srclib.Converter qualified as Srclib
import Srclib.Types (AdditionalDepData (..), SourceUnit (..), SourceUserDefDep (..))
import Types (GraphBreadth (Complete))

data BinaryFile = BinaryFile
{ binaryPath :: Path Abs File
, binaryFingerprint :: Fingerprint
}

-- | Binary detection is sufficiently different from other analysis types that it cannot be just another strategy.
-- Instead, binary detection is run separately over the entire scan directory, outputting its own source unit.
-- The goal of this feature is to enable a FOSSA user to flag all vendored binaries (as defined by git) in the project as dependencies.
-- Users may then use standard FOSSA UX flows to ignore or add license information to the detected binaries.
analyzeBinaryDeps :: (Has (Lift IO) sig m, Has Diagnostics sig m, Has ReadFS sig m) => Path Abs Dir -> AllFilters -> m (Maybe SourceUnit)
analyzeBinaryDeps dir filters = do
binaries <- fingerprintBinaries (toPathFilters dir filters) dir
if null binaries
then pure Nothing
else pure . Just $ toSourceUnit (toProject dir) binaries

fingerprintBinaries :: (Has (Lift IO) sig m, Has Diagnostics sig m, Has ReadFS sig m) => PathFilters -> Path Abs Dir -> m [BinaryFile]
fingerprintBinaries filters = walk' $ \dir _ files -> do
if shouldFingerprintDir dir filters
then do
someBinaries <- traverse fingerprintIfBinary files
pure (catMaybes someBinaries, WalkContinue)
else pure ([], WalkContinue)

fingerprintIfBinary :: (Has (Lift IO) sig m, Has Diagnostics sig m, Has ReadFS sig m) => Path Abs File -> m (Maybe BinaryFile)
fingerprintIfBinary file = do
isBinary <- fileIsBinary file
if isBinary
then do
fp <- fingerprintRaw file
pure . Just $ BinaryFile file fp
else pure Nothing

-- | PathFilters is a specialized filter mechanism that operates only on absolute directory paths.
data PathFilters = PathFilters
{ include :: [Path Abs Dir]
, exclude :: [Path Abs Dir]
}
deriving (Show)

toPathFilters :: Path Abs Dir -> AllFilters -> PathFilters
toPathFilters root filters =
PathFilters
{ include = map (root </>) (combinedPaths $ includeFilters filters)
, exclude = map (root </>) (combinedPaths $ excludeFilters filters)
}

shouldFingerprintDir :: Path Abs Dir -> PathFilters -> Bool
shouldFingerprintDir dir filters = (not shouldExclude) && shouldInclude
where
shouldExclude = (isPrefixedOrEqual dir) `any` (exclude filters)
shouldInclude = null (include filters) || (isPrefixedOrEqual dir) `any` (include filters)
isPrefixedOrEqual a b = a == b || isProperPrefixOf b a -- swap order of isProperPrefixOf comparison because we want to know if dir is prefixed by any filter

toProject :: Path Abs Dir -> ProjectResult
toProject dir = ProjectResult "binary-deps" dir mempty Complete []

toDependency :: Path Abs Dir -> BinaryFile -> SourceUserDefDep
toDependency root BinaryFile{..} =
SourceUserDefDep
{ srcUserDepName = renderRelative root binaryPath
, srcUserDepVersion = renderFingerprint binaryFingerprint
, srcUserDepLicense = ""
, srcUserDepDescription = Just "Binary discovered in source tree"
, srcUserDepHomepage = Nothing
}

toSourceUnit :: ProjectResult -> [BinaryFile] -> SourceUnit
toSourceUnit project binaries = do
let unit = Srclib.toSourceUnit project
let deps = map (toDependency $ projectResultPath project) binaries
unit{additionalData = Just $ AdditionalDepData (Just deps) Nothing}

-- | Just render the first few characters of the fingerprint.
-- The goal is to provide a high confidence that future binaries with the same name won't collide,
-- and we don't need all 256 bits for that.
renderFingerprint :: Fingerprint -> Text
renderFingerprint fingerprint = Text.take 12 $ unFingerprint fingerprint

renderRelative :: Path Abs Dir -> Path Abs File -> Text
renderRelative absDir absFile =
case stripProperPrefix absDir absFile of
Left _ -> toText . toFilePath $ absFile
Right relFile -> toText . toFilePath $ relFile

-- | Determine if a file is binary using the same method as git:
-- "is there a zero byte in the first 8000 bytes of the file"
fileIsBinary :: (Has ReadFS sig m, Has Diagnostics sig m) => Path Abs File -> m Bool
fileIsBinary file = do
attemptedContent <- readContentsBSLimit file 8000
content <- fromEither attemptedContent
pure $ BS.elem 0 content
12 changes: 10 additions & 2 deletions src/App/Fossa/Main.hs
Original file line number Diff line number Diff line change
Expand Up @@ -4,7 +4,7 @@ module App.Fossa.Main (
appMain,
) where

import App.Fossa.Analyze (IATAssertionMode (..), JsonOutput (..), RecordMode (..), ScanDestination (..), UnpackArchives (..), VSIAnalysisMode (..), analyzeMain)
import App.Fossa.Analyze (BinaryDiscoveryMode (..), IATAssertionMode (..), JsonOutput (..), ModeOptions (ModeOptions), RecordMode (..), ScanDestination (..), UnpackArchives (..), VSIAnalysisMode (..), analyzeMain)
import App.Fossa.Compatibility (Argument, argumentParser, compatibilityMain)
import App.Fossa.Configuration (
ConfigFile (
Expand Down Expand Up @@ -191,7 +191,8 @@ appMain = do

let analyzeOverride = override{overrideBranch = analyzeBranch <|> ((fileConfig >>= configRevision) >>= configBranch)}
combinedFilters = normalizedFilters fileConfig analyzeOptions
doAnalyze destination = analyzeMain analyzeBaseDir analyzeRecordMode logSeverity destination analyzeOverride analyzeUnpackArchives analyzeJsonOutput analyzeVSIMode assertionMode combinedFilters
modeOptions = ModeOptions analyzeVSIMode assertionMode analyzeBinaryDiscoveryMode
doAnalyze destination = analyzeMain analyzeBaseDir analyzeRecordMode logSeverity destination analyzeOverride analyzeUnpackArchives analyzeJsonOutput modeOptions combinedFilters

if analyzeOutput
then doAnalyze OutputStdout
Expand Down Expand Up @@ -414,6 +415,7 @@ analyzeOpts =
<*> many (option (eitherReader pathOpt) (long "only-path" <> help "Only scan these paths. See paths.only in the fossa.yml spec." <> metavar "PATH"))
<*> many (option (eitherReader pathOpt) (long "exclude-path" <> help "Exclude these paths from scanning. See paths.exclude in the fossa.yml spec." <> metavar "PATH"))
<*> vsiAnalyzeOpt
<*> binaryDiscoveryOpt
<*> iatAssertionOpt
<*> monorepoOpts
<*> analyzeReplayOpt
Expand All @@ -424,6 +426,11 @@ vsiAnalyzeOpt =
flag' VSIAnalysisEnabled (long "enable-vsi" <> hidden)
<|> pure VSIAnalysisDisabled

binaryDiscoveryOpt :: Parser BinaryDiscoveryMode
binaryDiscoveryOpt =
flag' BinaryDiscoveryEnabled (long "experimental-enable-binary-discovery" <> hidden)
<|> pure BinaryDiscoveryDisabled

iatAssertionOpt :: Parser AnalyzeVSIAssertionMode
iatAssertionOpt =
(AnalyzeVSIAssertionEnabled <$> strOption (long "experimental-link-project-binary" <> hidden))
Expand Down Expand Up @@ -703,6 +710,7 @@ data AnalyzeOptions = AnalyzeOptions
, analyzeOnlyPaths :: [Path Rel Dir]
, analyzeExcludePaths :: [Path Rel Dir]
, analyzeVSIMode :: VSIAnalysisMode
, analyzeBinaryDiscoveryMode :: BinaryDiscoveryMode
, analyzeAssertMode :: AnalyzeVSIAssertionMode
, monorepoAnalysisOpts :: MonorepoAnalysisOpts
, analyzeRecordMode :: RecordMode
Expand Down
1 change: 1 addition & 0 deletions src/App/Fossa/VSI/IAT/Fingerprint.hs
Original file line number Diff line number Diff line change
@@ -1,4 +1,5 @@
module App.Fossa.VSI.IAT.Fingerprint (
fingerprintRaw,
fingerprintContentsRaw,
) where

Expand Down
7 changes: 1 addition & 6 deletions src/App/Fossa/VSI/IAT/Types.hs
Original file line number Diff line number Diff line change
Expand Up @@ -9,12 +9,7 @@ module App.Fossa.VSI.IAT.Types (
) where

import App.Fossa.VSI.Types qualified as VSI
import Data.Aeson (
FromJSON (parseJSON),
ToJSON (toJSON),
withObject,
(.:),
)
import Data.Aeson (FromJSON (parseJSON), ToJSON (toJSON), withObject, (.:))
import Data.Text (Text)

-- | Fingerprint uniquely idenfies a file, derived from its content.
Expand Down
Loading