[sdk] Google Cloud Pipeline Components create_custom_training_job_from_component does not respect IfPresentPlaceholder
Environment
- KFP version: Vertex AI Pipelines
- KFP SDK version:
- All dependencies version: kfp 2.15.1 kfp-pipeline-spec 2.15.1 kfp-server-api 2.15.1 google-cloud-pipeline-components 2.22.0
Steps to reproduce
Let us take a look at this pipeline:
from typing import Optional
import kfp
from google_cloud_pipeline_components.v1.custom_job.utils import (
create_custom_training_job_from_component,
)
from kfp import compiler, dsl
from kfp.dsl import Input
@dsl.container_component
def say_hello(optional_name: Optional[str] = None):
return dsl.ContainerSpec(
image="alpine",
command=["sh", "-xc", "echo Hello, $0!"],
args=[
dsl.IfPresentPlaceholder(
input_name="optional_name",
then=["--name", optional_name], # pyright: ignore[reportArgumentType]
else_=["--name", "friend"],
)
],
)
def create_kubeflow_pipeline(
project,
location,
staging_bucket,
display_name,
service_account,
tensorboard_resource_name,
):
@dsl.pipeline(
pipeline_root=f"{staging_bucket}/pipeline_root",
name=display_name,
)
def pipeline(
name: Optional[str] = None,
):
say_hello(
optional_name=name,
)
compiler.Compiler().compile(
pipeline_func=pipeline, # pyright: ignore[reportArgumentType]
package_path=f"{display_name}.yaml",
)
create_kubeflow_pipeline(
project="some_project",
location="some_location",
staging_bucket="gs://some-staging-bucket",
display_name="bug-report",
service_account="[email protected]",
tensorboard_resource_name="projects/some-project/locations/some-location/tensorboards/000000000000000",
)
This compiles and works perfect and I am providing the yaml generated as bug-report01.yaml
I can pass correctly any name in vertex and execute generated, now for the second pipeline:
from typing import Optional
import kfp
from google_cloud_pipeline_components.v1.custom_job.utils import (
create_custom_training_job_from_component,
)
from kfp import compiler, dsl
from kfp.dsl import Input
@dsl.container_component
def say_hello(optional_name: Optional[str] = None):
return dsl.ContainerSpec(
image="alpine",
command=["sh", "-xc", "echo Hello, $0!"],
args=[
dsl.IfPresentPlaceholder(
input_name="optional_name",
then=["--name", optional_name], # pyright: ignore[reportArgumentType]
else_=["--name", "friend"],
)
],
)
def create_kubeflow_pipeline(
project,
location,
staging_bucket,
display_name,
service_account,
tensorboard_resource_name,
):
@dsl.pipeline(
pipeline_root=f"{staging_bucket}/pipeline_root",
name=display_name,
)
def pipeline(
name: Optional[str] = None,
):
say_hello(
# optional_name=name,
)
compiler.Compiler().compile(
pipeline_func=pipeline, # pyright: ignore[reportArgumentType]
package_path=f"{display_name}.yaml",
)
create_kubeflow_pipeline(
project="some_project",
location="some_location",
staging_bucket="gs://some-staging-bucket",
display_name="bug-report02",
service_account="[email protected]",
tensorboard_resource_name="projects/some-project/locations/some-location/tensorboards/000000000000000",
)
So as expected (see bug-report02.yaml) it works and now since it is None we get the friend value pass.
Now if use:
from google_cloud_pipeline_components.v1.custom_job.utils import (
create_custom_training_job_from_component,
)
that is with the following pipeline:
from typing import Optional
import kfp
from google_cloud_pipeline_components.v1.custom_job.utils import (
create_custom_training_job_from_component,
)
from kfp import compiler, dsl
@dsl.container_component
def say_hello(optional_name: Optional[str] = None):
return dsl.ContainerSpec(
image="alpine",
command=["sh", "-xc", "echo Hello, $0!"],
args=[
dsl.IfPresentPlaceholder(
input_name="optional_name",
then=["--name", optional_name], # pyright: ignore[reportArgumentType]
else_=["--name", "friend"],
)
],
)
def create_kubeflow_pipeline(
project,
location,
staging_bucket,
display_name,
service_account,
):
@dsl.pipeline(
pipeline_root=f"{staging_bucket}/pipeline_root",
name=display_name,
)
def pipeline(
name: Optional[str] = None,
):
custom_hello = create_custom_training_job_from_component(
component_spec=say_hello,
machine_type="e2-standard-4",
base_output_directory=staging_bucket,
service_account=service_account,
)
custom_hello(
optional_name=name,
)
compiler.Compiler().compile(
pipeline_func=pipeline, # pyright: ignore[reportArgumentType]
package_path=f"{display_name}.yaml",
)
create_kubeflow_pipeline(
project="some_project",
location="some_location",
staging_bucket="gs://some-staging-bucket",
display_name="bug-report03",
service_account="[email protected]",
)
We see that this has not the correct behaviour, see (bug-report03.yml)
becuase when executing the pipeline we are getting:
Expected result
create_custom_training_job_from_component function should also replace correctly and execute similar in VertexAI Piplines
Materials and Reference
It seems that it was also reported on: google ai-platform skd but it was open last year and it was request to move in here.
Impacted by this bug? Give it a 👍.