datadog-agent icon indicating copy to clipboard operation
datadog-agent copied to clipboard

Add agentless-scanner software

Open 0intro opened this issue 1 year ago • 8 comments

What does this PR do?

This change adds the agentless-scanner software.

The agentless-scanner fetches software package data from cloud resources, which is forwarded to Datadog for vulnerability scans.

The cmd/agentless-scanner code is based on https://github.com/DataDog/datadog-agent/commit/ad0db0795aeecabdd208916f2fbc879f3b5696d5 (based on the 7.51 branch), with the following changes, so it builds on the main branch:

--- a/cmd/agentless-scanner/awsmain.go
+++ b/cmd/agentless-scanner/awsmain.go
@@ -285,6 +285,7 @@
 		DefaultActions: actions,
 		DefaultRoles:   roles,
 		Statsd:         statsd,
+		EventForwarder: eventForwarder,
 	})
 	if err != nil {
 		return fmt.Errorf("could not initialize agentless-scanner: %w", err)
@@ -320,6 +321,7 @@
 		DefaultActions: actions,
 		DefaultRoles:   roles,
 		Statsd:         statsd,
+		EventForwarder: eventForwarder,
 	})
 	if err != nil {
 		return fmt.Errorf("could not initialize agentless-scanner: %w", err)
--- a/cmd/agentless-scanner/main.go
+++ b/cmd/agentless-scanner/main.go
@@ -30,6 +30,9 @@
 	complog "github.com/DataDog/datadog-agent/comp/core/log"
 	"github.com/DataDog/datadog-agent/comp/core/log/logimpl"
 	"github.com/DataDog/datadog-agent/comp/core/secrets"
+	"github.com/DataDog/datadog-agent/comp/forwarder/eventplatform"
+	"github.com/DataDog/datadog-agent/comp/forwarder/eventplatform/eventplatformimpl"
+	"github.com/DataDog/datadog-agent/comp/forwarder/eventplatformreceiver/eventplatformreceiverimpl"
 	pkgconfig "github.com/DataDog/datadog-agent/pkg/config"
 	pkgconfigsetup "github.com/DataDog/datadog-agent/pkg/config/setup"
 	"github.com/DataDog/datadog-agent/pkg/pidfile"
@@ -53,6 +56,7 @@
 )
 
 var statsd *ddogstatsd.Client
+var eventForwarder eventplatform.Component
 
 var globalFlags struct {
 	configFilePath string
@@ -64,7 +68,8 @@
 func runWithModules(run func(cmd *cobra.Command, args []string) error) func(cmd *cobra.Command, args []string) error {
 	return func(cmd *cobra.Command, args []string) error {
 		return fxutil.OneShot(
-			func(_ complog.Component, _ compconfig.Component) error {
+			func(_ complog.Component, _ compconfig.Component, evp eventplatform.Component) error {
+				eventForwarder = evp
 				return run(cmd, args)
 			},
 			fx.Supply(core.BundleParams{
@@ -73,6 +78,9 @@
 				LogParams:    logimpl.ForDaemon(runner.LoggerName, "log_file", pkgconfigsetup.DefaultAgentlessScannerLogFile),
 			}),
 			core.Bundle(),
+			eventplatformimpl.Module(),
+			fx.Supply(eventplatformimpl.NewDefaultParams()),
+			eventplatformreceiverimpl.Module(),
 		)
 	}
 }
@@ -210,6 +219,7 @@
 		DefaultActions: defaultActions,
 		DefaultRoles:   getDefaultRolesMapping(provider),
 		Statsd:         statsd,
+		EventForwarder: eventForwarder,
 	})
 	if err != nil {
 		return fmt.Errorf("could not initialize agentless-scanner: %w", err)
--- a/cmd/agentless-scanner/runner/findings_reporter.go
+++ b/cmd/agentless-scanner/runner/findings_reporter.go
@@ -10,6 +10,8 @@
 	"fmt"
 	"time"
 
+	"github.com/DataDog/datadog-agent/comp/core/hostname/hostnameimpl"
+	"github.com/DataDog/datadog-agent/comp/logs/agent"
 	"github.com/DataDog/datadog-agent/comp/logs/agent/config"
 	coreconfig "github.com/DataDog/datadog-agent/pkg/config"
 	"github.com/DataDog/datadog-agent/pkg/logs/auditor"
@@ -47,7 +49,7 @@
 	if err != nil {
 		endpoints, err = config.BuildHTTPEndpoints(coreconfig.Datadog, intakeTrackType, config.AgentJSONIntakeProtocol, config.DefaultIntakeOrigin)
 		if err == nil {
-			httpConnectivity := logshttp.CheckConnectivity(endpoints.Main)
+			httpConnectivity := logshttp.CheckConnectivity(endpoints.Main, coreconfig.Datadog)
 			endpoints, err = config.BuildEndpoints(coreconfig.Datadog, httpConnectivity, intakeTrackType, config.AgentJSONIntakeProtocol, config.DefaultIntakeOrigin)
 		}
 	}
@@ -63,7 +65,7 @@
 	auditor.Start()
 
 	// setup the pipeline provider that provides pairs of processor and sender
-	pipelineProvider := pipeline.NewProvider(config.NumberOfPipelines, auditor, &diagnostic.NoopMessageReceiver{}, nil, endpoints, dstcontext)
+	pipelineProvider := pipeline.NewProvider(config.NumberOfPipelines, auditor, &diagnostic.NoopMessageReceiver{}, nil, endpoints, dstcontext, agent.NewStatusProvider(), hostnameimpl.NewHostnameService(), coreconfig.Datadog)
 	pipelineProvider.Start()
 
 	logSource := sources.NewLogSource(
--- a/cmd/agentless-scanner/runner/runner.go
+++ b/cmd/agentless-scanner/runner/runner.go
@@ -38,10 +38,10 @@
 	"github.com/DataDog/datadog-agent/cmd/agentless-scanner/scanners"
 	"github.com/DataDog/datadog-agent/cmd/agentless-scanner/types"
 
+	"github.com/DataDog/datadog-agent/comp/forwarder/eventplatform"
 	"github.com/DataDog/datadog-agent/pkg/api/security"
 	"github.com/DataDog/datadog-agent/pkg/config"
 	"github.com/DataDog/datadog-agent/pkg/config/remote/client"
-	"github.com/DataDog/datadog-agent/pkg/epforwarder"
 	"github.com/DataDog/datadog-agent/pkg/logs/message"
 	"github.com/DataDog/datadog-agent/pkg/remoteconfig/state"
 	"github.com/DataDog/datadog-agent/pkg/util/log"
@@ -69,6 +69,7 @@
 	DefaultRoles   types.RolesMapping
 	DefaultActions []types.ScanAction
 	Statsd         *ddogstatsd.Client
+	EventForwarder eventplatform.Component
 }
 
 type scanRecord struct {
@@ -80,7 +81,6 @@
 type Runner struct {
 	Options
 
-	eventForwarder   epforwarder.EventPlatformForwarder
 	findingsReporter *LogReporter
 	rcClient         *client.Client
 
@@ -106,13 +106,15 @@
 	if opts.Statsd == nil {
 		panic("programmer error: missing Statsd option")
 	}
+	if opts.EventForwarder == nil {
+		panic("programmer error: missing EventForwarder option")
+	}
 	if opts.Workers == 0 {
 		panic("programmer error: Workers is 0")
 	}
 	if opts.ScannersMax == 0 {
 		panic("programmer error: ScannersMax is 0")
 	}
-	eventForwarder := epforwarder.NewEventPlatformForwarder()
 	findingsReporter, err := newFindingsReporter()
 	if err != nil {
 		return nil, err
@@ -122,7 +124,8 @@
 		return nil, err
 	}
 
-	rcClient, err := client.NewUnverifiedGRPCClient(ipcAddress, config.GetIPCPort(), security.FetchAuthToken,
+	rcClient, err := client.NewUnverifiedGRPCClient(ipcAddress, config.GetIPCPort(),
+		func() (string, error) { return security.FetchAuthToken(config.Datadog) },
 		client.WithAgent("sidescanner", version.AgentVersion),
 		client.WithPollInterval(5*time.Second),
 	)
@@ -132,7 +135,6 @@
 	return &Runner{
 		Options: opts,
 
-		eventForwarder:   eventForwarder,
 		findingsReporter: findingsReporter,
 		rcClient:         rcClient,
 
@@ -345,8 +347,13 @@
 	log.Infof("starting agentless-scanner main loop with %d scan workers", s.Workers)
 	defer log.Infof("stopped agentless-scanner main loop")
 
-	s.eventForwarder.Start()
-	defer s.eventForwarder.Stop()
+	eventPlatform, found := s.EventForwarder.Get()
+	if found {
+		eventPlatform.Start()
+		defer eventPlatform.Stop()
+	} else {
+		log.Info("not starting the event platform forwarder")
+	}
 
 	s.rcClient.Start()
 
@@ -728,8 +735,13 @@
 		return fmt.Errorf("unable to proto marhsal sbom: %w", err)
 	}
 
+	eventPlatform, found := s.EventForwarder.Get()
+	if !found {
+		return errors.New("event platform forwarder not initialized")
+	}
+
 	m := message.NewMessage(rawEvent, nil, "", 0)
-	return s.eventForwarder.SendEventPlatformEvent(m, epforwarder.EventTypeContainerSBOM)
+	return eventPlatform.SendEventPlatformEvent(m, eventplatform.EventTypeContainerSBOM)
 }
 
 func (s *Runner) sendFindings(findings []*types.ScanFinding) {

Motivation

Additional Notes

Possible Drawbacks / Trade-offs

Describe how to test/QA your changes

0intro avatar Mar 06 '24 13:03 0intro

Bloop Bleep... Dogbot Here

Regression Detector Results

Run ID: 3c1210c8-6fdc-4859-a10a-9cf9db0ff617 Baseline: 9923959a6525bc1dd4576ec3a682daec6e717f5a Comparison: 6d178890329e81a400b51d709e607d2c8220b83d

Performance changes are noted in the perf column of each table:

  • ✅ = significantly better comparison variant performance
  • ❌ = significantly worse comparison variant performance
  • ➖ = no significant change in performance

No significant changes in experiment optimization goals

Confidence level: 90.00% Effect size tolerance: |Δ mean %| ≥ 5.00%

There were no significant changes in experiment optimization goals at this confidence level and effect size tolerance.

Experiments ignored for regressions

Regressions in experiments with settings containing erratic: true are ignored.

perf experiment goal Δ mean % Δ mean % CI
file_to_blackhole % cpu utilization +0.64 [-5.96, +7.25]

Fine details of change detection per experiment

perf experiment goal Δ mean % Δ mean % CI
basic_py_check % cpu utilization +1.36 [-0.90, +3.63]
file_to_blackhole % cpu utilization +0.64 [-5.96, +7.25]
tcp_syslog_to_blackhole ingress throughput +0.05 [-0.01, +0.10]
trace_agent_json ingress throughput +0.01 [-0.02, +0.03]
trace_agent_msgpack ingress throughput +0.01 [-0.01, +0.02]
tcp_dd_logs_filter_exclude ingress throughput +0.00 [-0.00, +0.00]
uds_dogstatsd_to_api ingress throughput -0.00 [-0.00, +0.00]
otel_to_otel_logs ingress throughput -0.09 [-0.72, +0.54]
process_agent_standard_check_with_stats memory utilization -0.13 [-0.16, -0.09]
file_tree memory utilization -0.18 [-0.25, -0.10]
idle memory utilization -0.25 [-0.29, -0.22]
process_agent_standard_check memory utilization -0.65 [-0.68, -0.61]
process_agent_real_time_mode memory utilization -0.66 [-0.70, -0.63]
uds_dogstatsd_to_api_cpu % cpu utilization -0.79 [-2.21, +0.64]

Explanation

A regression test is an A/B test of target performance in a repeatable rig, where "performance" is measured as "comparison variant minus baseline variant" for an optimization goal (e.g., ingress throughput). Due to intrinsic variability in measuring that goal, we can only estimate its mean value for each experiment; we report uncertainty in that value as a 90.00% confidence interval denoted "Δ mean % CI".

For each experiment, we decide whether a change in performance is a "regression" -- a change worth investigating further -- if all of the following criteria are true:

  1. Its estimated |Δ mean %| ≥ 5.00%, indicating the change is big enough to merit a closer look.

  2. Its 90.00% confidence interval "Δ mean % CI" does not contain zero, indicating that if our statistical model is accurate, there is at least a 90.00% chance there is a difference in performance between baseline and comparison variants.

  3. Its configuration does not mark it "erratic".

pr-commenter[bot] avatar Mar 06 '24 14:03 pr-commenter[bot]

Hi @DataDog/documentation. I've updated the release notes to add a description of the agentless-scanner. Could you please take another look and let me know if it looks good to you? Thanks.

0intro avatar Mar 08 '24 11:03 0intro

Also, side question: this looks like a code import from another repo (8k new lines in a single commit).

If' it's the case, could we keep the code history from the original repo ?

This is an import from the following branch: jinroh/side-scanner (based on 7.51.x branch).

I'd love to import the commit history into this PR (or rather a cleaned-up history), however it will be unfortunately squashed into a single commit by the now mandatory /merge command.

Of course, we plan to keep the development history in a branch called ducolombier/agentless-scanner-7.51 or something like that.

0intro avatar Mar 11 '24 15:03 0intro

Also, side question: this looks like a code import from another repo (8k new lines in a single commit). If' it's the case, could we keep the code history from the original repo ?

This is an import from the following branch: jinroh/side-scanner (based on 7.51.x branch).

I'd love to import the commit history into this PR (or rather a cleaned-up history), however it will be unfortunately squashed into a single commit by the now mandatory /merge command.

Of course, we plan to keep the keep the developement history in a branch called ducolombier/agentless-scanner-7.51 or something like that.

You can use /merge -c rebase to use the merge queue with a rebase strategy.

hush-hush avatar Mar 11 '24 16:03 hush-hush

I'd like to clarify that the agentless-scanner command is a bit different than the other existing commands, because it's autonomous and very independent from the rest of the agent:

  • It only depends on the config, log and remoteconfig components from the agent.
  • It's packaged independently from the other commands, in its own datadog-agentless-scanner RPM or DEB package.
  • Technically, it could live in its own repository, but we chose to integrate into the datadog-agent repository, so we could reuse the existing CI and release management process.

0intro avatar Mar 11 '24 16:03 0intro

I'm having trouble getting rid of pkg/config entirely, because, for example, I couldn't find any alternative to the pkgconfig.GetBindHost function, and I couldn't find any example of replacing this function is any of the datadog-agent repository.

0intro avatar Mar 13 '24 13:03 0intro

Test changes on VM

Use this command from test-infra-definitions to manually test this PR changes on a VM:

inv create-vm --pipeline-id=30440723 --os-family=ubuntu

pr-commenter[bot] avatar Mar 15 '24 15:03 pr-commenter[bot]

Regression Detector

Regression Detector Results

Run ID: 8bce12b9-4416-4b33-afc7-fda0e283ce91 Baseline: 2a4205a56880483ea75abb8de5c8b639ca7ba991 Comparison: f6c68049a6cedcbdb086320fa147928634420263

Performance changes are noted in the perf column of each table:

  • ✅ = significantly better comparison variant performance
  • ❌ = significantly worse comparison variant performance
  • ➖ = no significant change in performance

Significant changes in experiment optimization goals

Confidence level: 90.00% Effect size tolerance: |Δ mean %| ≥ 5.00%

perf experiment goal Δ mean % Δ mean % CI
pycheck_1000_100byte_tags % cpu utilization +5.70 [+0.62, +10.77]

Experiments ignored for regressions

Regressions in experiments with settings containing erratic: true are ignored.

perf experiment goal Δ mean % Δ mean % CI
file_to_blackhole % cpu utilization -2.72 [-9.04, +3.60]

Fine details of change detection per experiment

perf experiment goal Δ mean % Δ mean % CI
pycheck_1000_100byte_tags % cpu utilization +5.70 [+0.62, +10.77]
tcp_syslog_to_blackhole ingress throughput +2.08 [+1.99, +2.17]
basic_py_check % cpu utilization +1.26 [-1.42, +3.95]
uds_dogstatsd_to_api_cpu % cpu utilization +1.05 [-1.78, +3.89]
process_agent_real_time_mode memory utilization +0.90 [+0.86, +0.94]
idle memory utilization +0.25 [+0.22, +0.29]
process_agent_standard_check_with_stats memory utilization +0.03 [-0.00, +0.06]
uds_dogstatsd_to_api ingress throughput +0.00 [-0.20, +0.20]
trace_agent_msgpack ingress throughput +0.00 [-0.00, +0.00]
trace_agent_json ingress throughput -0.01 [-0.04, +0.02]
tcp_dd_logs_filter_exclude ingress throughput -0.03 [-0.05, -0.00]
otel_to_otel_logs ingress throughput -0.05 [-0.48, +0.38]
file_tree memory utilization -0.15 [-0.24, -0.06]
process_agent_standard_check memory utilization -0.32 [-0.35, -0.28]
file_to_blackhole % cpu utilization -2.72 [-9.04, +3.60]

Explanation

A regression test is an A/B test of target performance in a repeatable rig, where "performance" is measured as "comparison variant minus baseline variant" for an optimization goal (e.g., ingress throughput). Due to intrinsic variability in measuring that goal, we can only estimate its mean value for each experiment; we report uncertainty in that value as a 90.00% confidence interval denoted "Δ mean % CI".

For each experiment, we decide whether a change in performance is a "regression" -- a change worth investigating further -- if all of the following criteria are true:

  1. Its estimated |Δ mean %| ≥ 5.00%, indicating the change is big enough to merit a closer look.

  2. Its 90.00% confidence interval "Δ mean % CI" does not contain zero, indicating that if our statistical model is accurate, there is at least a 90.00% chance there is a difference in performance between baseline and comparison variants.

  3. Its configuration does not mark it "erratic".

pr-commenter[bot] avatar Mar 15 '24 16:03 pr-commenter[bot]

Go Package Import Differences

This comment was omitted because it was over 65,536 characters. Please check the Gitlab Job logs to see its output.

cit-pr-commenter[bot] avatar Apr 03 '24 15:04 cit-pr-commenter[bot]