What does this PR do?

This change adds the agentless-scanner software.

The agentless-scanner fetches software package data from cloud resources, which is forwarded to Datadog for vulnerability scans.

The cmd/agentless-scanner code is based on https://github.com/DataDog/datadog-agent/commit/ad0db0795aeecabdd208916f2fbc879f3b5696d5 (based on the 7.51 branch), with the following changes, so it builds on the main branch:

--- a/cmd/agentless-scanner/awsmain.go
+++ b/cmd/agentless-scanner/awsmain.go
@@ -285,6 +285,7 @@
 		DefaultActions: actions,
 		DefaultRoles:   roles,
 		Statsd:         statsd,
+		EventForwarder: eventForwarder,
 	})
 	if err != nil {
 		return fmt.Errorf("could not initialize agentless-scanner: %w", err)
@@ -320,6 +321,7 @@
 		DefaultActions: actions,
 		DefaultRoles:   roles,
 		Statsd:         statsd,
+		EventForwarder: eventForwarder,
 	})
 	if err != nil {
 		return fmt.Errorf("could not initialize agentless-scanner: %w", err)
--- a/cmd/agentless-scanner/main.go
+++ b/cmd/agentless-scanner/main.go
@@ -30,6 +30,9 @@
 	complog "github.com/DataDog/datadog-agent/comp/core/log"
 	"github.com/DataDog/datadog-agent/comp/core/log/logimpl"
 	"github.com/DataDog/datadog-agent/comp/core/secrets"
+	"github.com/DataDog/datadog-agent/comp/forwarder/eventplatform"
+	"github.com/DataDog/datadog-agent/comp/forwarder/eventplatform/eventplatformimpl"
+	"github.com/DataDog/datadog-agent/comp/forwarder/eventplatformreceiver/eventplatformreceiverimpl"
 	pkgconfig "github.com/DataDog/datadog-agent/pkg/config"
 	pkgconfigsetup "github.com/DataDog/datadog-agent/pkg/config/setup"
 	"github.com/DataDog/datadog-agent/pkg/pidfile"
@@ -53,6 +56,7 @@
 )
 
 var statsd *ddogstatsd.Client
+var eventForwarder eventplatform.Component
 
 var globalFlags struct {
 	configFilePath string
@@ -64,7 +68,8 @@
 func runWithModules(run func(cmd *cobra.Command, args []string) error) func(cmd *cobra.Command, args []string) error {
 	return func(cmd *cobra.Command, args []string) error {
 		return fxutil.OneShot(
-			func(_ complog.Component, _ compconfig.Component) error {
+			func(_ complog.Component, _ compconfig.Component, evp eventplatform.Component) error {
+				eventForwarder = evp
 				return run(cmd, args)
 			},
 			fx.Supply(core.BundleParams{
@@ -73,6 +78,9 @@
 				LogParams:    logimpl.ForDaemon(runner.LoggerName, "log_file", pkgconfigsetup.DefaultAgentlessScannerLogFile),
 			}),
 			core.Bundle(),
+			eventplatformimpl.Module(),
+			fx.Supply(eventplatformimpl.NewDefaultParams()),
+			eventplatformreceiverimpl.Module(),
 		)
 	}
 }
@@ -210,6 +219,7 @@
 		DefaultActions: defaultActions,
 		DefaultRoles:   getDefaultRolesMapping(provider),
 		Statsd:         statsd,
+		EventForwarder: eventForwarder,
 	})
 	if err != nil {
 		return fmt.Errorf("could not initialize agentless-scanner: %w", err)
--- a/cmd/agentless-scanner/runner/findings_reporter.go
+++ b/cmd/agentless-scanner/runner/findings_reporter.go
@@ -10,6 +10,8 @@
 	"fmt"
 	"time"
 
+	"github.com/DataDog/datadog-agent/comp/core/hostname/hostnameimpl"
+	"github.com/DataDog/datadog-agent/comp/logs/agent"
 	"github.com/DataDog/datadog-agent/comp/logs/agent/config"
 	coreconfig "github.com/DataDog/datadog-agent/pkg/config"
 	"github.com/DataDog/datadog-agent/pkg/logs/auditor"
@@ -47,7 +49,7 @@
 	if err != nil {
 		endpoints, err = config.BuildHTTPEndpoints(coreconfig.Datadog, intakeTrackType, config.AgentJSONIntakeProtocol, config.DefaultIntakeOrigin)
 		if err == nil {
-			httpConnectivity := logshttp.CheckConnectivity(endpoints.Main)
+			httpConnectivity := logshttp.CheckConnectivity(endpoints.Main, coreconfig.Datadog)
 			endpoints, err = config.BuildEndpoints(coreconfig.Datadog, httpConnectivity, intakeTrackType, config.AgentJSONIntakeProtocol, config.DefaultIntakeOrigin)
 		}
 	}
@@ -63,7 +65,7 @@
 	auditor.Start()
 
 	// setup the pipeline provider that provides pairs of processor and sender
-	pipelineProvider := pipeline.NewProvider(config.NumberOfPipelines, auditor, &diagnostic.NoopMessageReceiver{}, nil, endpoints, dstcontext)
+	pipelineProvider := pipeline.NewProvider(config.NumberOfPipelines, auditor, &diagnostic.NoopMessageReceiver{}, nil, endpoints, dstcontext, agent.NewStatusProvider(), hostnameimpl.NewHostnameService(), coreconfig.Datadog)
 	pipelineProvider.Start()
 
 	logSource := sources.NewLogSource(
--- a/cmd/agentless-scanner/runner/runner.go
+++ b/cmd/agentless-scanner/runner/runner.go
@@ -38,10 +38,10 @@
 	"github.com/DataDog/datadog-agent/cmd/agentless-scanner/scanners"
 	"github.com/DataDog/datadog-agent/cmd/agentless-scanner/types"
 
+	"github.com/DataDog/datadog-agent/comp/forwarder/eventplatform"
 	"github.com/DataDog/datadog-agent/pkg/api/security"
 	"github.com/DataDog/datadog-agent/pkg/config"
 	"github.com/DataDog/datadog-agent/pkg/config/remote/client"
-	"github.com/DataDog/datadog-agent/pkg/epforwarder"
 	"github.com/DataDog/datadog-agent/pkg/logs/message"
 	"github.com/DataDog/datadog-agent/pkg/remoteconfig/state"
 	"github.com/DataDog/datadog-agent/pkg/util/log"
@@ -69,6 +69,7 @@
 	DefaultRoles   types.RolesMapping
 	DefaultActions []types.ScanAction
 	Statsd         *ddogstatsd.Client
+	EventForwarder eventplatform.Component
 }
 
 type scanRecord struct {
@@ -80,7 +81,6 @@
 type Runner struct {
 	Options
 
-	eventForwarder   epforwarder.EventPlatformForwarder
 	findingsReporter *LogReporter
 	rcClient         *client.Client
 
@@ -106,13 +106,15 @@
 	if opts.Statsd == nil {
 		panic("programmer error: missing Statsd option")
 	}
+	if opts.EventForwarder == nil {
+		panic("programmer error: missing EventForwarder option")
+	}
 	if opts.Workers == 0 {
 		panic("programmer error: Workers is 0")
 	}
 	if opts.ScannersMax == 0 {
 		panic("programmer error: ScannersMax is 0")
 	}
-	eventForwarder := epforwarder.NewEventPlatformForwarder()
 	findingsReporter, err := newFindingsReporter()
 	if err != nil {
 		return nil, err
@@ -122,7 +124,8 @@
 		return nil, err
 	}
 
-	rcClient, err := client.NewUnverifiedGRPCClient(ipcAddress, config.GetIPCPort(), security.FetchAuthToken,
+	rcClient, err := client.NewUnverifiedGRPCClient(ipcAddress, config.GetIPCPort(),
+		func() (string, error) { return security.FetchAuthToken(config.Datadog) },
 		client.WithAgent("sidescanner", version.AgentVersion),
 		client.WithPollInterval(5*time.Second),
 	)
@@ -132,7 +135,6 @@
 	return &Runner{
 		Options: opts,
 
-		eventForwarder:   eventForwarder,
 		findingsReporter: findingsReporter,
 		rcClient:         rcClient,
 
@@ -345,8 +347,13 @@
 	log.Infof("starting agentless-scanner main loop with %d scan workers", s.Workers)
 	defer log.Infof("stopped agentless-scanner main loop")
 
-	s.eventForwarder.Start()
-	defer s.eventForwarder.Stop()
+	eventPlatform, found := s.EventForwarder.Get()
+	if found {
+		eventPlatform.Start()
+		defer eventPlatform.Stop()
+	} else {
+		log.Info("not starting the event platform forwarder")
+	}
 
 	s.rcClient.Start()
 
@@ -728,8 +735,13 @@
 		return fmt.Errorf("unable to proto marhsal sbom: %w", err)
 	}
 
+	eventPlatform, found := s.EventForwarder.Get()
+	if !found {
+		return errors.New("event platform forwarder not initialized")
+	}
+
 	m := message.NewMessage(rawEvent, nil, "", 0)
-	return s.eventForwarder.SendEventPlatformEvent(m, epforwarder.EventTypeContainerSBOM)
+	return eventPlatform.SendEventPlatformEvent(m, eventplatform.EventTypeContainerSBOM)
 }
 
 func (s *Runner) sendFindings(findings []*types.ScanFinding) {

Motivation

Additional Notes

Possible Drawbacks / Trade-offs

Describe how to test/QA your changes

Mar 06 '24 13:03 0intro

Bloop Bleep... Dogbot Here

Regression Detector Results

Run ID: 3c1210c8-6fdc-4859-a10a-9cf9db0ff617 Baseline: 9923959a6525bc1dd4576ec3a682daec6e717f5a Comparison: 6d178890329e81a400b51d709e607d2c8220b83d

Performance changes are noted in the perf column of each table:

✅ = significantly better comparison variant performance
❌ = significantly worse comparison variant performance
➖ = no significant change in performance

No significant changes in experiment optimization goals

Confidence level: 90.00% Effect size tolerance: |Δ mean %| ≥ 5.00%

There were no significant changes in experiment optimization goals at this confidence level and effect size tolerance.

Experiments ignored for regressions

Regressions in experiments with settings containing erratic: true are ignored.

perf	experiment	goal	Δ mean %	Δ mean % CI
➖	file_to_blackhole	% cpu utilization	+0.64	[-5.96, +7.25]

Fine details of change detection per experiment

perf	experiment	goal	Δ mean %	Δ mean % CI
➖	basic_py_check	% cpu utilization	+1.36	[-0.90, +3.63]
➖	file_to_blackhole	% cpu utilization	+0.64	[-5.96, +7.25]
➖	tcp_syslog_to_blackhole	ingress throughput	+0.05	[-0.01, +0.10]
➖	trace_agent_json	ingress throughput	+0.01	[-0.02, +0.03]
➖	trace_agent_msgpack	ingress throughput	+0.01	[-0.01, +0.02]
➖	tcp_dd_logs_filter_exclude	ingress throughput	+0.00	[-0.00, +0.00]
➖	uds_dogstatsd_to_api	ingress throughput	-0.00	[-0.00, +0.00]
➖	otel_to_otel_logs	ingress throughput	-0.09	[-0.72, +0.54]
➖	process_agent_standard_check_with_stats	memory utilization	-0.13	[-0.16, -0.09]
➖	file_tree	memory utilization	-0.18	[-0.25, -0.10]
➖	idle	memory utilization	-0.25	[-0.29, -0.22]
➖	process_agent_standard_check	memory utilization	-0.65	[-0.68, -0.61]
➖	process_agent_real_time_mode	memory utilization	-0.66	[-0.70, -0.63]
➖	uds_dogstatsd_to_api_cpu	% cpu utilization	-0.79	[-2.21, +0.64]

Explanation

A regression test is an A/B test of target performance in a repeatable rig, where "performance" is measured as "comparison variant minus baseline variant" for an optimization goal (e.g., ingress throughput). Due to intrinsic variability in measuring that goal, we can only estimate its mean value for each experiment; we report uncertainty in that value as a 90.00% confidence interval denoted "Δ mean % CI".

For each experiment, we decide whether a change in performance is a "regression" -- a change worth investigating further -- if all of the following criteria are true:

Its estimated |Δ mean %| ≥ 5.00%, indicating the change is big enough to merit a closer look.
Its 90.00% confidence interval "Δ mean % CI" does not contain zero, indicating that if our statistical model is accurate, there is at least a 90.00% chance there is a difference in performance between baseline and comparison variants.
Its configuration does not mark it "erratic".

Mar 06 '24 14:03 pr-commenter[bot]

Hi @DataDog/documentation. I've updated the release notes to add a description of the agentless-scanner. Could you please take another look and let me know if it looks good to you? Thanks.

Mar 08 '24 11:03 0intro

Also, side question: this looks like a code import from another repo (8k new lines in a single commit).

If' it's the case, could we keep the code history from the original repo ?

This is an import from the following branch: jinroh/side-scanner (based on 7.51.x branch).

I'd love to import the commit history into this PR (or rather a cleaned-up history), however it will be unfortunately squashed into a single commit by the now mandatory /merge command.

Of course, we plan to keep the development history in a branch called ducolombier/agentless-scanner-7.51 or something like that.

Mar 11 '24 15:03 0intro

Also, side question: this looks like a code import from another repo (8k new lines in a single commit). If' it's the case, could we keep the code history from the original repo ?

This is an import from the following branch: jinroh/side-scanner (based on 7.51.x branch).

I'd love to import the commit history into this PR (or rather a cleaned-up history), however it will be unfortunately squashed into a single commit by the now mandatory /merge command.

Of course, we plan to keep the keep the developement history in a branch called ducolombier/agentless-scanner-7.51 or something like that.

You can use /merge -c rebase to use the merge queue with a rebase strategy.

Mar 11 '24 16:03 hush-hush

I'd like to clarify that the agentless-scanner command is a bit different than the other existing commands, because it's autonomous and very independent from the rest of the agent:

It only depends on the config, log and remoteconfig components from the agent.
It's packaged independently from the other commands, in its own datadog-agentless-scanner RPM or DEB package.
Technically, it could live in its own repository, but we chose to integrate into the datadog-agent repository, so we could reuse the existing CI and release management process.

Mar 11 '24 16:03 0intro

I'm having trouble getting rid of pkg/config entirely, because, for example, I couldn't find any alternative to the pkgconfig.GetBindHost function, and I couldn't find any example of replacing this function is any of the datadog-agent repository.

Mar 13 '24 13:03 0intro

Test changes on VM

Use this command from test-infra-definitions to manually test this PR changes on a VM:

inv create-vm --pipeline-id=30440723 --os-family=ubuntu

Mar 15 '24 15:03 pr-commenter[bot]