feat: Add new gcloud commands, API clients, and third-party libraries across various services.

2026-01-01 20:26:35 +01:00
parent 5e23cbece0
commit a19e592eb7
25221 changed files with 8324611 additions and 0 deletions
--- a/login/google-cloud-sdk/lib/surface/ai/endpoints/init.py
+++ b/login/google-cloud-sdk/lib/surface/ai/endpoints/init.py
@@ -0,0 +1,40 @@
+# -*- coding: utf-8 -*- #
+# Copyright 2020 Google LLC. All Rights Reserved.
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#    http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+"""Command group for Vertex AI endpoints."""
+
+from __future__ import absolute_import
+from __future__ import division
+from __future__ import unicode_literals
+
+from googlecloudsdk.calliope import base
+
+
+@base.ReleaseTracks(base.ReleaseTrack.ALPHA, base.ReleaseTrack.BETA,
+                    base.ReleaseTrack.GA)
+@base.UniverseCompatible
+class Endpoint(base.Group):
+  """Manage Vertex AI endpoints.
+
+     An endpoint contains one or more deployed models, all of which must have
+     the same interface but may come from different models.
+     An endpoint is to obtain online prediction and explanation from one of
+     its deployed models.
+
+     When you communicate with Vertex AI services, you identify a specific
+     endpoint that is deployed in the cloud using a combination of the current
+     project, the region, and the endpoint.
+  """
+
+  category = base.VERTEX_AI_CATEGORY
--- a/login/google-cloud-sdk/lib/surface/ai/endpoints/create.py
+++ b/login/google-cloud-sdk/lib/surface/ai/endpoints/create.py
@@ -0,0 +1,152 @@
+# -*- coding: utf-8 -*- #
+# Copyright 2020 Google LLC. All Rights Reserved.
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#    http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+"""Vertex AI endpoints create command."""
+
+from __future__ import absolute_import
+from __future__ import division
+from __future__ import unicode_literals
+
+from apitools.base.py import encoding
+from googlecloudsdk.api_lib.ai import operations
+from googlecloudsdk.api_lib.ai.endpoints import client
+from googlecloudsdk.calliope import base
+from googlecloudsdk.command_lib.ai import constants
+from googlecloudsdk.command_lib.ai import endpoint_util
+from googlecloudsdk.command_lib.ai import endpoints_util
+from googlecloudsdk.command_lib.ai import flags
+from googlecloudsdk.command_lib.ai import operations_util
+from googlecloudsdk.command_lib.ai import region_util
+from googlecloudsdk.command_lib.ai import validation
+from googlecloudsdk.command_lib.util.args import labels_util
+from googlecloudsdk.core import log
+
+
+def _AddArgsGa(parser):
+  flags.GetDisplayNameArg('endpoint').AddToParser(parser)
+  flags.AddRegionResourceArg(
+      parser, 'to create endpoint', prompt_func=region_util.PromptForOpRegion)
+  flags.GetDescriptionArg('endpoint').AddToParser(parser)
+  flags.GetUserSpecifiedIdArg('endpoint').AddToParser(parser)
+  labels_util.AddCreateLabelsFlags(parser)
+  flags.GetEndpointNetworkArg().AddToParser(parser)
+  flags.GetEncryptionKmsKeyNameArg().AddToParser(parser)
+  flags.GetHiddenGdceZoneArg().AddToParser(parser)
+  flags.AddRequestResponseLoggingConfigGroupArgs(parser)
+
+
+def _AddArgs(parser):
+  flags.GetDisplayNameArg('endpoint').AddToParser(parser)
+  flags.AddRegionResourceArg(
+      parser, 'to create endpoint', prompt_func=region_util.PromptForOpRegion)
+  flags.GetDescriptionArg('endpoint').AddToParser(parser)
+  flags.GetUserSpecifiedIdArg('endpoint').AddToParser(parser)
+  labels_util.AddCreateLabelsFlags(parser)
+  flags.GetEndpointNetworkArg().AddToParser(parser)
+  flags.GetEncryptionKmsKeyNameArg().AddToParser(parser)
+  flags.GetHiddenGdceZoneArg().AddToParser(parser)
+  flags.GetGdcZoneArg().AddToParser(parser)
+  flags.AddRequestResponseLoggingConfigGroupArgs(parser)
+
+
+def _Run(args, version):
+  """Create a new Vertex AI endpoint."""
+  validation.ValidateDisplayName(args.display_name)
+
+  region_ref = args.CONCEPTS.region.Parse()
+  args.region = region_ref.AsDict()['locationsId']
+  with endpoint_util.AiplatformEndpointOverrides(version, region=args.region):
+    endpoints_client = client.EndpointsClient(version=version)
+    operation_client = operations.OperationsClient()
+    if version == constants.GA_VERSION:
+      op = endpoints_client.Create(
+          region_ref, args.display_name,
+          labels_util.ParseCreateArgs(
+              args, endpoints_client.messages.GoogleCloudAiplatformV1Endpoint
+              .LabelsValue),
+          description=args.description,
+          network=args.network,
+          endpoint_id=args.endpoint_id,
+          encryption_kms_key_name=args.encryption_kms_key_name,
+          request_response_logging_table=args.request_response_logging_table,
+          request_response_logging_rate=args.request_response_logging_rate)
+    else:
+      op = endpoints_client.CreateBeta(
+          region_ref,
+          args.display_name,
+          labels_util.ParseCreateArgs(
+              args,
+              endpoints_client.messages.GoogleCloudAiplatformV1beta1Endpoint.LabelsValue,
+          ),
+          description=args.description,
+          network=args.network,
+          endpoint_id=args.endpoint_id,
+          encryption_kms_key_name=args.encryption_kms_key_name,
+          gdce_zone=args.gdce_zone,
+          gdc_zone=args.gdc_zone,
+          request_response_logging_table=args.request_response_logging_table,
+          request_response_logging_rate=args.request_response_logging_rate,
+      )
+    response_msg = operations_util.WaitForOpMaybe(
+        operation_client, op, endpoints_util.ParseOperation(op.name))
+    if response_msg is not None:
+      response = encoding.MessageToPyValue(response_msg)
+      if 'name' in response:
+        log.status.Print(
+            ('Created Vertex AI endpoint: {}.').format(response['name']))
+    return response_msg
+
+
+@base.ReleaseTracks(base.ReleaseTrack.GA)
+@base.UniverseCompatible
+class CreateGa(base.CreateCommand):
+  """Create a new Vertex AI endpoint.
+
+  ## EXAMPLES
+
+  To create an endpoint under project ``example'' in region ``us-central1'',
+  run:
+
+    $ {command} --project=example --region=us-central1
+    --display-name=my_endpoint
+  """
+
+  @staticmethod
+  def Args(parser):
+    _AddArgsGa(parser)
+
+  def Run(self, args):
+    return _Run(args, constants.GA_VERSION)
+
+
+@base.ReleaseTracks(base.ReleaseTrack.BETA, base.ReleaseTrack.ALPHA)
+@base.UniverseCompatible
+class CreateBeta(base.CreateCommand):
+  """Create a new Vertex AI endpoint.
+
+  ## EXAMPLES
+
+  To create an endpoint under project ``example'' in region ``us-central1'',
+  run:
+
+    $ {command} --project=example --region=us-central1
+    --display-name=my_endpoint
+  """
+
+  @staticmethod
+  def Args(parser):
+    _AddArgs(parser)
+
+  def Run(self, args):
+    return _Run(args, constants.BETA_VERSION)
--- a/login/google-cloud-sdk/lib/surface/ai/endpoints/delete.py
+++ b/login/google-cloud-sdk/lib/surface/ai/endpoints/delete.py
@@ -0,0 +1,85 @@
+# -*- coding: utf-8 -*- #
+# Copyright 2020 Google LLC. All Rights Reserved.
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#    http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+"""Vertex AI endpoints delete command."""
+
+from __future__ import absolute_import
+from __future__ import division
+from __future__ import unicode_literals
+
+from googlecloudsdk.api_lib.ai import operations
+from googlecloudsdk.api_lib.ai.endpoints import client
+from googlecloudsdk.calliope import base
+from googlecloudsdk.command_lib.ai import constants
+from googlecloudsdk.command_lib.ai import endpoint_util
+from googlecloudsdk.command_lib.ai import endpoints_util
+from googlecloudsdk.command_lib.ai import flags
+from googlecloudsdk.command_lib.ai import operations_util
+from googlecloudsdk.command_lib.ai import region_util
+from googlecloudsdk.core.console import console_io
+
+
+def _Run(args, version):
+  """Delete an existing Vertex AI endpoint."""
+  endpoint_ref = args.CONCEPTS.endpoint.Parse()
+  args.region = endpoint_ref.AsDict()['locationsId']
+  endpoint_id = endpoint_ref.AsDict()['endpointsId']
+  with endpoint_util.AiplatformEndpointOverrides(version, region=args.region):
+    endpoints_client = client.EndpointsClient(version=version)
+    operation_client = operations.OperationsClient()
+    console_io.PromptContinue(
+        'This will delete endpoint [{}]...'.format(endpoint_id),
+        cancel_on_no=True)
+    op = endpoints_client.Delete(endpoint_ref)
+    return operations_util.WaitForOpMaybe(
+        operation_client, op, endpoints_util.ParseOperation(op.name))
+
+
+@base.ReleaseTracks(base.ReleaseTrack.GA)
+@base.UniverseCompatible
+class DeleteGa(base.DeleteCommand):
+  """Delete an existing Vertex AI endpoint.
+
+  ## EXAMPLES
+
+  To delete an endpoint ``123'' under project ``example'' in region
+  ``us-central1'', run:
+
+    $ {command} 123 --project=example --region=us-central1
+  """
+
+  @staticmethod
+  def Args(parser):
+    flags.AddEndpointResourceArg(
+        parser, 'to delete', prompt_func=region_util.PromptForOpRegion)
+
+  def Run(self, args):
+    return _Run(args, constants.GA_VERSION)
+
+
+@base.ReleaseTracks(base.ReleaseTrack.BETA, base.ReleaseTrack.ALPHA)
+@base.UniverseCompatible
+class DeleteBeta(DeleteGa):
+  """Delete an existing Vertex AI endpoint.
+
+  ## EXAMPLES
+
+  To delete an endpoint ``123'' under project ``example'' in region
+  ``us-central1'', run:
+
+    $ {command} 123 --project=example --region=us-central1
+  """
+
+  def Run(self, args):
+    return _Run(args, constants.BETA_VERSION)
--- a/login/google-cloud-sdk/lib/surface/ai/endpoints/deploy_model.py
+++ b/login/google-cloud-sdk/lib/surface/ai/endpoints/deploy_model.py
@@ -0,0 +1,184 @@
+# -*- coding: utf-8 -*- #
+# Copyright 2020 Google LLC. All Rights Reserved.
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#    http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+"""Vertex AI endpoints deploy-model command."""
+
+from __future__ import absolute_import
+from __future__ import division
+from __future__ import unicode_literals
+
+from apitools.base.py import encoding
+from googlecloudsdk.api_lib.ai import operations
+from googlecloudsdk.api_lib.ai.endpoints import client
+from googlecloudsdk.calliope import base
+from googlecloudsdk.command_lib.ai import constants
+from googlecloudsdk.command_lib.ai import endpoint_util
+from googlecloudsdk.command_lib.ai import endpoints_util
+from googlecloudsdk.command_lib.ai import flags
+from googlecloudsdk.command_lib.ai import operations_util
+from googlecloudsdk.command_lib.ai import region_util
+from googlecloudsdk.command_lib.ai import validation
+from googlecloudsdk.core import log
+
+
+def _AddArgs(parser, version):
+  """Prepares for the arguments of the command."""
+  flags.GetDisplayNameArg('deployed model').AddToParser(parser)
+  flags.GetTrafficSplitArg().AddToParser(parser)
+  flags.AddPredictionResourcesArgs(parser, version)
+  flags.AddScaleToZeroArgs(parser, version)
+  flags.GetEnableAccessLoggingArg().AddToParser(parser)
+  flags.GetServiceAccountArg().AddToParser(parser)
+  flags.GetUserSpecifiedIdArg('deployed-model').AddToParser(parser)
+  flags.GetAutoscalingMetricSpecsArg().AddToParser(parser)
+  flags.AddModelIdArg(version, parser)
+  flags.AddEndpointResourceArg(
+      parser,
+      'to deploy a model to',
+      prompt_func=region_util.PromptForOpRegion)
+  if version != constants.GA_VERSION:
+    flags.AddSharedResourcesArg(
+        parser,
+        'to co-host a model on')
+
+
+def _Run(args, version):
+  """Deploy a model to an existing Vertex AI endpoint."""
+  validation.ValidateDisplayName(args.display_name)
+  validation.ValidateAutoscalingMetricSpecs(args.autoscaling_metric_specs)
+  validation.ValidateRequiredReplicaCount(args.required_replica_count,
+                                          args.min_replica_count)
+  validation.ValidateGpuPartitionSize(args.gpu_partition_size)
+  endpoint_ref = args.CONCEPTS.endpoint.Parse()
+  args.region = endpoint_ref.AsDict()['locationsId']
+  with endpoint_util.AiplatformEndpointOverrides(version, region=args.region):
+    endpoints_client = client.EndpointsClient(version=version)
+    operation_client = operations.OperationsClient()
+    if version == constants.GA_VERSION:
+      op = endpoints_client.DeployModel(
+          endpoint_ref,
+          args.model,
+          args.region,
+          args.display_name,
+          machine_type=args.machine_type,
+          accelerator_dict=args.accelerator,
+          min_replica_count=args.min_replica_count,
+          max_replica_count=args.max_replica_count,
+          required_replica_count=args.required_replica_count,
+          reservation_affinity=args.reservation_affinity,
+          autoscaling_metric_specs=args.autoscaling_metric_specs,
+          spot=args.spot,
+          enable_access_logging=args.enable_access_logging,
+          disable_container_logging=args.disable_container_logging,
+          service_account=args.service_account,
+          traffic_split=args.traffic_split,
+          deployed_model_id=args.deployed_model_id,
+          gpu_partition_size=args.gpu_partition_size,
+      )
+    else:
+      shared_resources_ref = args.CONCEPTS.shared_resources.Parse()
+      validation.ValidateSharedResourceArgs(
+          shared_resources_ref=shared_resources_ref,
+          machine_type=args.machine_type,
+          accelerator_dict=args.accelerator,
+          min_replica_count=args.min_replica_count,
+          max_replica_count=args.max_replica_count,
+          required_replica_count=args.required_replica_count,
+          autoscaling_metric_specs=args.autoscaling_metric_specs)
+      validation.ValidateScaleToZeroArgs(
+          args.min_replica_count,
+          args.initial_replica_count, args.max_replica_count,
+          args.min_scaleup_period, args.idle_scaledown_period
+      )
+      op = endpoints_client.DeployModelBeta(
+          endpoint_ref,
+          args.model,
+          args.region,
+          args.display_name,
+          machine_type=args.machine_type,
+          tpu_topology=args.tpu_topology,
+          multihost_gpu_node_count=args.multihost_gpu_node_count,
+          accelerator_dict=args.accelerator,
+          min_replica_count=args.min_replica_count,
+          max_replica_count=args.max_replica_count,
+          required_replica_count=args.required_replica_count,
+          reservation_affinity=args.reservation_affinity,
+          autoscaling_metric_specs=args.autoscaling_metric_specs,
+          spot=args.spot,
+          enable_access_logging=args.enable_access_logging,
+          enable_container_logging=args.enable_container_logging,
+          service_account=args.service_account,
+          traffic_split=args.traffic_split,
+          deployed_model_id=args.deployed_model_id,
+          shared_resources_ref=shared_resources_ref,
+          min_scaleup_period=args.min_scaleup_period,
+          idle_scaledown_period=args.idle_scaledown_period,
+          initial_replica_count=args.initial_replica_count,
+          gpu_partition_size=args.gpu_partition_size)
+    response_msg = operations_util.WaitForOpMaybe(
+        operation_client, op, endpoints_util.ParseOperation(op.name))
+    if response_msg is not None:
+      response = encoding.MessageToPyValue(response_msg)
+      if 'deployedModel' in response and 'id' in response['deployedModel']:
+        log.status.Print(('Deployed a model to the endpoint {}. '
+                          'Id of the deployed model: {}.').format(
+                              endpoint_ref.AsDict()['endpointsId'],
+                              response['deployedModel']['id']))
+    return response_msg
+
+
+@base.ReleaseTracks(base.ReleaseTrack.GA)
+@base.UniverseCompatible
+class DeployModelGa(base.Command):
+  """Deploy a model to an existing Vertex AI endpoint.
+
+  ## EXAMPLES
+
+  To deploy a model ``456'' to an endpoint ``123'' under project ``example'' in
+  region ``us-central1'', run:
+
+    $ {command} 123 --project=example --region=us-central1 --model=456
+    --display-name=my_deployed_model
+  """
+
+  @staticmethod
+  def Args(parser):
+    _AddArgs(parser, constants.GA_VERSION)
+    flags.GetDisableContainerLoggingArg().AddToParser(parser)
+
+  def Run(self, args):
+    _Run(args, constants.GA_VERSION)
+
+
+@base.ReleaseTracks(base.ReleaseTrack.BETA, base.ReleaseTrack.ALPHA)
+@base.UniverseCompatible
+class DeployModelBeta(DeployModelGa):
+  """Deploy a model to an existing Vertex AI endpoint.
+
+  ## EXAMPLES
+
+  To deploy a model ``456'' to an endpoint ``123'' under project ``example'' in
+  region ``us-central1'', run:
+
+    $ {command} 123 --project=example --region=us-central1 --model=456
+    --display-name=my_deployed_model
+  """
+
+  @staticmethod
+  def Args(parser):
+    _AddArgs(parser, constants.BETA_VERSION)
+    flags.GetEnableContainerLoggingArg().AddToParser(parser)
+
+  def Run(self, args):
+    _Run(args, constants.BETA_VERSION)
--- a/login/google-cloud-sdk/lib/surface/ai/endpoints/describe.py
+++ b/login/google-cloud-sdk/lib/surface/ai/endpoints/describe.py
@@ -0,0 +1,72 @@
+# -*- coding: utf-8 -*- #
+# Copyright 2020 Google LLC. All Rights Reserved.
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#    http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+"""Vertex AI endpoints describe command."""
+
+from __future__ import absolute_import
+from __future__ import division
+from __future__ import unicode_literals
+
+from googlecloudsdk.api_lib.ai.endpoints import client
+from googlecloudsdk.calliope import base
+from googlecloudsdk.command_lib.ai import constants
+from googlecloudsdk.command_lib.ai import endpoint_util
+from googlecloudsdk.command_lib.ai import flags
+from googlecloudsdk.command_lib.ai import region_util
+
+
+def _Run(args, version):
+  endpoint_ref = args.CONCEPTS.endpoint.Parse()
+  args.region = endpoint_ref.AsDict()['locationsId']
+  with endpoint_util.AiplatformEndpointOverrides(version, region=args.region):
+    return client.EndpointsClient(version=version).Get(endpoint_ref)
+
+
+@base.ReleaseTracks(base.ReleaseTrack.GA)
+@base.UniverseCompatible
+class DescribeGa(base.DescribeCommand):
+  """Describe an existing Vertex AI endpoint.
+
+  ## EXAMPLES
+
+  To describe an endpoint ``123'' under project ``example'' in region
+  ``us-central1'', run:
+
+    $ {command} 123 --project=example --region=us-central1
+  """
+
+  @staticmethod
+  def Args(parser):
+    flags.AddEndpointResourceArg(
+        parser, 'to describe', prompt_func=region_util.PromptForOpRegion)
+
+  def Run(self, args):
+    return _Run(args, constants.GA_VERSION)
+
+
+@base.ReleaseTracks(base.ReleaseTrack.BETA, base.ReleaseTrack.ALPHA)
+@base.UniverseCompatible
+class DescribeBeta(DescribeGa):
+  """Describe an existing Vertex AI endpoint.
+
+  ## EXAMPLES
+
+  To describe an endpoint ``123'' under project ``example'' in region
+  ``us-central1'', run:
+
+    $ {command} 123 --project=example --region=us-central1
+  """
+
+  def Run(self, args):
+    return _Run(args, constants.BETA_VERSION)
--- a/login/google-cloud-sdk/lib/surface/ai/endpoints/direct_predict.py
+++ b/login/google-cloud-sdk/lib/surface/ai/endpoints/direct_predict.py
@@ -0,0 +1,105 @@
+# -*- coding: utf-8 -*- #
+# Copyright 2024 Google LLC. All Rights Reserved.
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#    http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+"""Vertex AI endpoints direct predict command."""
+
+from __future__ import absolute_import
+from __future__ import division
+from __future__ import unicode_literals
+
+from googlecloudsdk.api_lib.ai.endpoints import client
+from googlecloudsdk.calliope import base
+from googlecloudsdk.command_lib.ai import constants
+from googlecloudsdk.command_lib.ai import endpoint_util
+from googlecloudsdk.command_lib.ai import endpoints_util
+from googlecloudsdk.command_lib.ai import flags
+from googlecloudsdk.command_lib.ai import region_util
+
+
+def _AddArgs(parser):
+  flags.AddEndpointResourceArg(
+      parser,
+      'to do online direct prediction',
+      prompt_func=region_util.PromptForOpRegion,
+  )
+  flags.AddDirectPredictInputsArg(parser)
+
+
+def _Run(args, version):
+  """Run Vertex AI online direct prediction."""
+  endpoint_ref = args.CONCEPTS.endpoint.Parse()
+  args.region = endpoint_ref.AsDict()['locationsId']
+  with endpoint_util.AiplatformEndpointOverrides(
+      version, region=args.region, is_prediction=True
+  ):
+    endpoints_client = client.EndpointsClient(version=version)
+
+    inputs_json = endpoints_util.ReadInputsFromArgs(args.json_request)
+    if version == constants.GA_VERSION:
+      results = endpoints_client.DirectPredict(endpoint_ref, inputs_json)
+    else:
+      results = endpoints_client.DirectPredictBeta(endpoint_ref, inputs_json)
+
+    if not args.IsSpecified('format'):
+      # default format is based on the response.
+      args.format = endpoints_util.GetDefaultFormat(
+          results.outputs, key_name='outputs'
+      )
+    return results
+
+
+@base.UniverseCompatible
+@base.ReleaseTracks(base.ReleaseTrack.GA)
+class DirectPredictGa(base.Command):
+  """Run Vertex AI online direct prediction.
+
+     `{command}` sends a direct prediction request to Vertex AI endpoint for the
+     given instances. The request limit is 10MB.
+
+  ## EXAMPLES
+
+  To direct predict against an endpoint ``123'' under project ``example'' in
+  region ``us-central1'', run:
+
+    $ {command} 123 --project=example --region=us-central1
+    --json-request=input.json
+  """
+
+  @staticmethod
+  def Args(parser):
+    _AddArgs(parser)
+
+  def Run(self, args):
+    return _Run(args, constants.GA_VERSION)
+
+
+@base.UniverseCompatible
+@base.ReleaseTracks(base.ReleaseTrack.BETA, base.ReleaseTrack.ALPHA)
+class DirectPredictBeta(DirectPredictGa):
+  """Run Vertex AI online direct prediction.
+
+     `{command}` sends a direct prediction request to Vertex AI endpoint for the
+     given instances. The request limit is 10MB.
+
+  ## EXAMPLES
+
+  To direct predict against an endpoint ``123'' under project ``example'' in
+  region ``us-central1'', run:
+
+    $ {command} 123 --project=example --region=us-central1
+    --json-request=input.json
+  """
+
+  def Run(self, args):
+    return _Run(args, constants.BETA_VERSION)
--- a/login/google-cloud-sdk/lib/surface/ai/endpoints/direct_raw_predict.py
+++ b/login/google-cloud-sdk/lib/surface/ai/endpoints/direct_raw_predict.py
@@ -0,0 +1,113 @@
+# -*- coding: utf-8 -*- #
+# Copyright 2024 Google LLC. All Rights Reserved.
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#    http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+"""Vertex AI endpoints direct raw predict command."""
+
+from __future__ import absolute_import
+from __future__ import division
+from __future__ import unicode_literals
+
+import base64
+
+from googlecloudsdk.api_lib.ai.endpoints import client
+from googlecloudsdk.calliope import base
+from googlecloudsdk.command_lib.ai import constants
+from googlecloudsdk.command_lib.ai import endpoint_util
+from googlecloudsdk.command_lib.ai import endpoints_util
+from googlecloudsdk.command_lib.ai import flags
+from googlecloudsdk.command_lib.ai import region_util
+
+
+def _AddArgs(parser):
+  flags.AddEndpointResourceArg(
+      parser,
+      'to do online direct raw prediction',
+      prompt_func=region_util.PromptForOpRegion,
+  )
+  flags.AddDirectRawPredictInputArg(parser)
+
+
+def _Run(args, version):
+  """Run Vertex AI online direct raw prediction."""
+  endpoint_ref = args.CONCEPTS.endpoint.Parse()
+  args.region = endpoint_ref.AsDict()['locationsId']
+  with endpoint_util.AiplatformEndpointOverrides(
+      version, region=args.region, is_prediction=True
+  ):
+    endpoints_client = client.EndpointsClient(version=version)
+
+    # Decode the base64 encoded input, because it will be encoded by the
+    # apitools client.
+    input_json = endpoints_util.ReadInputFromArgs(args.json_request)
+    input_json['input'] = base64.b64decode(input_json['input']).decode('utf-8')
+    if version == constants.GA_VERSION:
+      results = endpoints_client.DirectRawPredict(endpoint_ref, input_json)
+    else:
+      results = endpoints_client.DirectRawPredictBeta(endpoint_ref, input_json)
+
+    # Decode the base64 encoded output.
+    results.output = base64.b64decode(results.output)
+
+    if not args.IsSpecified('format'):
+      # default format is based on the response.
+      args.format = endpoints_util.GetDefaultFormat(
+          results.output, key_name='output'
+      )
+    return results
+
+
+@base.UniverseCompatible
+@base.ReleaseTracks(base.ReleaseTrack.GA)
+class DirectRawPredictGa(base.Command):
+  """Run Vertex AI online direct raw prediction.
+
+     `{command}` sends a direct raw prediction request to Vertex AI endpoint for
+     the given input. The request limit is 10MB.
+
+  ## EXAMPLES
+
+  To direct predict against an endpoint ``123'' under project ``example'' in
+  region ``us-central1'', run:
+
+    $ {command} 123 --project=example --region=us-central1
+    --json-request=input.json
+  """
+
+  @staticmethod
+  def Args(parser):
+    _AddArgs(parser)
+
+  def Run(self, args):
+    return _Run(args, constants.GA_VERSION)
+
+
+@base.UniverseCompatible
+@base.ReleaseTracks(base.ReleaseTrack.BETA, base.ReleaseTrack.ALPHA)
+class DirectRawPredictBeta(DirectRawPredictGa):
+  """Run Vertex AI online direct raw prediction.
+
+     `{command}` sends a direct raw prediction request to Vertex AI endpoint for
+     the given input. The request limit is 10MB.
+
+  ## EXAMPLES
+
+  To direct raw predict against an endpoint ``123'' under project ``example'' in
+  region ``us-central1'', run:
+
+    $ {command} 123 --project=example --region=us-central1
+    --json-request=input.json
+  """
+
+  def Run(self, args):
+    return _Run(args, constants.BETA_VERSION)
--- a/login/google-cloud-sdk/lib/surface/ai/endpoints/explain.py
+++ b/login/google-cloud-sdk/lib/surface/ai/endpoints/explain.py
@@ -0,0 +1,106 @@
+# -*- coding: utf-8 -*- #
+# Copyright 2020 Google LLC. All Rights Reserved.
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#    http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+"""Vertex AI endpoints explain command."""
+
+from __future__ import absolute_import
+from __future__ import division
+from __future__ import unicode_literals
+
+from googlecloudsdk.api_lib.ai.endpoints import client
+from googlecloudsdk.calliope import base
+from googlecloudsdk.command_lib.ai import constants
+from googlecloudsdk.command_lib.ai import endpoint_util
+from googlecloudsdk.command_lib.ai import endpoints_util
+from googlecloudsdk.command_lib.ai import flags
+from googlecloudsdk.command_lib.ai import region_util
+from googlecloudsdk.core import log
+
+
+def _Run(args, version):
+  """Run Vertex AI online explanation."""
+  endpoint_ref = args.CONCEPTS.endpoint.Parse()
+  args.region = endpoint_ref.AsDict()['locationsId']
+  with endpoint_util.AiplatformEndpointOverrides(
+      version, region=args.region, is_prediction=True):
+    endpoints_client = client.EndpointsClient(version=version)
+
+    instances_json = endpoints_util.ReadInstancesFromArgs(args.json_request)
+    if version == constants.GA_VERSION:
+      results = endpoints_client.Explain(endpoint_ref, instances_json, args)
+    else:
+      results = endpoints_client.ExplainBeta(endpoint_ref, instances_json, args)
+
+    if getattr(results, 'deployedModelId') is not None:
+      log.status.Print(
+          'Deployed model id to be used for explanation: {}'.format(
+              results.deployedModelId))
+    if not args.IsSpecified('format'):
+      # default format is based on the response.
+      args.format = endpoints_util.GetDefaultFormat(
+          results, key_name='explanations')
+    return results
+
+
+@base.ReleaseTracks(base.ReleaseTrack.GA)
+@base.UniverseCompatible
+class ExplainGa(base.Command):
+  """Request an online explanation from an Vertex AI endpoint.
+
+     `{command}` sends an explanation request to the Vertex AI endpoint for
+     the given instances. This command reads up to 100 instances, though the
+     service itself accepts instances up to the payload limit size
+     (currently, 1.5MB).
+
+     ## EXAMPLES
+
+     To send an explanation request to the endpoint for the json file,
+     input.json, run:
+
+     $ {command} ENDPOINT_ID --region=us-central1 --json-request=input.json
+  """
+
+  @staticmethod
+  def Args(parser):
+    flags.AddEndpointResourceArg(
+        parser,
+        'to request an online explanation',
+        prompt_func=region_util.PromptForOpRegion)
+    flags.AddPredictInstanceArg(parser)
+    flags.GetDeployedModelId(required=False).AddToParser(parser)
+
+  def Run(self, args):
+    return _Run(args, constants.GA_VERSION)
+
+
+@base.ReleaseTracks(base.ReleaseTrack.BETA, base.ReleaseTrack.ALPHA)
+@base.UniverseCompatible
+class ExplainBeta(ExplainGa):
+  """Request an online explanation from an Vertex AI endpoint.
+
+     `{command}` sends an explanation request to the Vertex AI endpoint for
+     the given instances. This command reads up to 100 instances, though the
+     service itself accepts instances up to the payload limit size
+     (currently, 1.5MB).
+
+     ## EXAMPLES
+
+     To send an explanation request to the endpoint for the json file,
+     input.json, run:
+
+     $ {command} ENDPOINT_ID --region=us-central1 --json-request=input.json
+  """
+
+  def Run(self, args):
+    return _Run(args, constants.BETA_VERSION)
--- a/login/google-cloud-sdk/lib/surface/ai/endpoints/list.py
+++ b/login/google-cloud-sdk/lib/surface/ai/endpoints/list.py
@@ -0,0 +1,132 @@
+# -*- coding: utf-8 -*- #
+# Copyright 2020 Google LLC. All Rights Reserved.
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#    http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+"""Vertex AI endpoints list command."""
+
+from __future__ import absolute_import
+from __future__ import division
+from __future__ import unicode_literals
+
+from googlecloudsdk.api_lib.ai.endpoints import client
+from googlecloudsdk.calliope import base
+from googlecloudsdk.command_lib.ai import constants
+from googlecloudsdk.command_lib.ai import endpoint_util
+from googlecloudsdk.command_lib.ai import flags
+from googlecloudsdk.command_lib.ai import region_util
+from googlecloudsdk.core import resources
+
+_DEFAULT_FORMAT = """
+        table(
+            name.basename():label=ENDPOINT_ID,
+            displayName,
+            deployedModels.yesno(yes=Yes).if(list_model_garden_endpoints_only):label=HAS_DEPLOYED_MODEL,
+            deployedModels[0].id.if(list_model_garden_endpoints_only):label=DEPLOYED_MODEL_ID
+        )
+    """
+_API_DEPLOY_FILTER = 'labels.mg-deploy:*'
+_ONE_CLICK_DEPLOY_FILTER = 'labels.mg-one-click-deploy:*'
+
+
+def _GetUri(endpoint):
+  ref = resources.REGISTRY.ParseRelativeName(endpoint.name,
+                                             constants.ENDPOINTS_COLLECTION)
+  return ref.SelfLink()
+
+
+def _AddArgs(parser):
+  parser.display_info.AddFormat(_DEFAULT_FORMAT)
+  parser.display_info.AddUriFunc(_GetUri)
+  flags.AddRegionResourceArg(
+      parser, 'to list endpoints', prompt_func=region_util.PromptForOpRegion)
+  parser.add_argument(
+      '--list-model-garden-endpoints-only',
+      action='store_true',
+      default=False,
+      required=False,
+      help='Whether to only list endpoints related to Model Garden.',
+  )
+
+
+def _Run(args, version):
+  """List existing Vertex AI endpoints."""
+  region_ref = args.CONCEPTS.region.Parse()
+  args.region = region_ref.AsDict()['locationsId']
+
+  with endpoint_util.AiplatformEndpointOverrides(version, region=args.region):
+    if args.list_model_garden_endpoints_only:
+      return client.EndpointsClient(version=version).List(
+          region_ref,
+          ' OR '.join([_API_DEPLOY_FILTER, _ONE_CLICK_DEPLOY_FILTER]),
+      )
+    elif version == constants.BETA_VERSION:
+      return client.EndpointsClient(version=version).List(
+          region_ref, gdc_zone=args.gdc_zone
+      )
+    else:
+      return client.EndpointsClient(version=version).List(region_ref)
+
+
+@base.ReleaseTracks(base.ReleaseTrack.GA)
+@base.UniverseCompatible
+class ListGa(base.ListCommand):
+  """List existing Vertex AI endpoints.
+
+  ## EXAMPLES
+
+  To list the endpoints under project ``example'' in region ``us-central1'',
+  run:
+
+    $ {command} --project=example --region=us-central1
+
+  To list the endpoints under project ``example'' in region ``us-central1''
+  that are created from Model Garden, run:
+
+    $ {command} --project=example --region=us-central1
+    --list-model-garden-endpoints-only
+  """
+
+  @staticmethod
+  def Args(parser):
+    _AddArgs(parser)
+
+  def Run(self, args):
+    return _Run(args, constants.GA_VERSION)
+
+
+@base.ReleaseTracks(base.ReleaseTrack.BETA, base.ReleaseTrack.ALPHA)
+@base.UniverseCompatible
+class ListBeta(base.ListCommand):
+  """List existing Vertex AI endpoints.
+
+  ## EXAMPLES
+
+  To list the endpoints under project ``example'' in region ``us-central1'',
+  run:
+
+    $ {command} --project=example --region=us-central1
+
+  To list the endpoints under project ``example'' in region ``us-central1''
+  that are created from Model Garden, run:
+
+    $ {command} --project=example --region=us-central1
+    --list-model-garden-endpoints-only
+  """
+
+  @staticmethod
+  def Args(parser):
+    _AddArgs(parser)
+    flags.GetGdcZoneArg().AddToParser(parser)
+
+  def Run(self, args):
+    return _Run(args, constants.BETA_VERSION)
--- a/login/google-cloud-sdk/lib/surface/ai/endpoints/predict.py
+++ b/login/google-cloud-sdk/lib/surface/ai/endpoints/predict.py
@@ -0,0 +1,105 @@
+# -*- coding: utf-8 -*- #
+# Copyright 2020 Google LLC. All Rights Reserved.
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#    http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+"""Vertex AI endpoints predict command."""
+
+from __future__ import absolute_import
+from __future__ import division
+from __future__ import unicode_literals
+
+from googlecloudsdk.api_lib.ai.endpoints import client
+from googlecloudsdk.calliope import base
+from googlecloudsdk.command_lib.ai import constants
+from googlecloudsdk.command_lib.ai import endpoint_util
+from googlecloudsdk.command_lib.ai import endpoints_util
+from googlecloudsdk.command_lib.ai import flags
+from googlecloudsdk.command_lib.ai import region_util
+
+
+def _AddArgs(parser):
+  flags.AddEndpointResourceArg(
+      parser,
+      'to do online prediction',
+      prompt_func=region_util.PromptForOpRegion)
+  flags.AddPredictInstanceArg(parser)
+
+
+def _Run(args, version):
+  """Run Vertex AI online prediction."""
+  endpoint_ref = args.CONCEPTS.endpoint.Parse()
+  args.region = endpoint_ref.AsDict()['locationsId']
+  with endpoint_util.AiplatformEndpointOverrides(
+      version, region=args.region, is_prediction=True):
+    endpoints_client = client.EndpointsClient(version=version)
+
+    instances_json = endpoints_util.ReadInstancesFromArgs(args.json_request)
+    if version == constants.GA_VERSION:
+      results = endpoints_client.Predict(endpoint_ref, instances_json)
+    else:
+      results = endpoints_client.PredictBeta(endpoint_ref, instances_json)
+
+    if not args.IsSpecified('format'):
+      # default format is based on the response.
+      args.format = endpoints_util.GetDefaultFormat(results.predictions)
+    return results
+
+
+@base.ReleaseTracks(base.ReleaseTrack.GA)
+@base.UniverseCompatible
+class PredictGa(base.Command):
+  """Run Vertex AI online prediction.
+
+     `{command}` sends a prediction request to Vertex AI endpoint for the
+     given instances. This command will read up to 100 instances, though the
+     service itself will accept instances up to the payload limit size
+     (currently, 1.5MB).
+
+  ## EXAMPLES
+
+  To predict against an endpoint ``123'' under project ``example'' in region
+  ``us-central1'', run:
+
+    $ {command} 123 --project=example --region=us-central1
+    --json-request=input.json
+  """
+
+  @staticmethod
+  def Args(parser):
+    _AddArgs(parser)
+
+  def Run(self, args):
+    return _Run(args, constants.GA_VERSION)
+
+
+@base.ReleaseTracks(base.ReleaseTrack.BETA, base.ReleaseTrack.ALPHA)
+@base.UniverseCompatible
+class PredictBeta(PredictGa):
+  """Run Vertex AI online prediction.
+
+     `{command}` sends a prediction request to Vertex AI endpoint for the
+     given instances. This command will read up to 100 instances, though the
+     service itself will accept instances up to the payload limit size
+     (currently, 1.5MB).
+
+  ## EXAMPLES
+
+  To predict against an endpoint ``123'' under project ``example'' in region
+  ``us-central1'', run:
+
+    $ {command} 123 --project=example --region=us-central1
+    --json-request=input.json
+  """
+
+  def Run(self, args):
+    return _Run(args, constants.BETA_VERSION)
--- a/login/google-cloud-sdk/lib/surface/ai/endpoints/raw_predict.py
+++ b/login/google-cloud-sdk/lib/surface/ai/endpoints/raw_predict.py
@@ -0,0 +1,145 @@
+# -*- coding: utf-8 -*- #
+# Copyright 2021 Google LLC. All Rights Reserved.
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#    http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+"""Vertex AI endpoints raw-predict command."""
+
+from __future__ import absolute_import
+from __future__ import division
+from __future__ import unicode_literals
+
+import json
+import sys
+
+from googlecloudsdk.api_lib.ai.endpoints import client
+from googlecloudsdk.calliope import base
+from googlecloudsdk.command_lib.ai import constants
+from googlecloudsdk.command_lib.ai import endpoint_util
+from googlecloudsdk.command_lib.ai import flags
+from googlecloudsdk.command_lib.ai import region_util
+from googlecloudsdk.core import exceptions as core_exceptions
+from googlecloudsdk.core.console import console_io
+
+import six
+
+
+def _AddArgs(parser):
+  flags.AddEndpointResourceArg(
+      parser,
+      'to do online raw prediction',
+      prompt_func=region_util.PromptForOpRegion)
+
+  flags.GetRawPredictHeadersArg().AddToParser(parser)
+  flags.GetRawPredictRequestArg().AddToParser(parser)
+
+
+def _Run(args, version):
+  """Run Vertex AI online prediction."""
+  endpoint_ref = args.CONCEPTS.endpoint.Parse()
+  args.region = endpoint_ref.AsDict()['locationsId']
+
+  with endpoint_util.AiplatformEndpointOverrides(version, region=args.region):
+    if args.request.startswith('@'):
+      request = console_io.ReadFromFileOrStdin(args.request[1:], binary=True)
+    else:
+      request = args.request.encode('utf-8')
+
+    endpoints_client = client.EndpointsClient(version=version)
+    _, response = endpoints_client.RawPredict(endpoint_ref, args.http_headers,
+                                              request)
+
+    # Workaround since gcloud only supports protobufs as JSON objects. Since
+    # raw predict can return anything, write raw bytes to stdout.
+    if not args.IsSpecified('format'):
+      sys.stdout.buffer.write(response)
+      return None
+
+    # If user asked for formatting, assume it's a JSON object.
+    try:
+      return json.loads(response.decode('utf-8'))
+    except ValueError:
+      raise core_exceptions.Error('No JSON object could be decoded from the '
+                                  'HTTP response body:\n' +
+                                  six.text_type(response))
+
+
+@base.ReleaseTracks(base.ReleaseTrack.GA)
+@base.UniverseCompatible
+class RawPredict(base.Command):
+  """Run Vertex AI online raw prediction.
+
+  `{command}` sends a raw prediction request to a Vertex AI endpoint. The
+  request can be given on the command line or read from a file or stdin.
+
+  ## EXAMPLES
+
+  To predict against an endpoint ``123'' under project ``example'' in region
+  ``us-central1'', reading the request from the command line, run:
+
+    $ {command} 123 --project=example --region=us-central1 --request='{
+        "instances": [
+          { "values": [1, 2, 3, 4], "key": 1 },
+          { "values": [5, 6, 7, 8], "key": 2 }
+        ]
+      }'
+
+  If the request body was in the file ``input.json'', run:
+
+    $ {command} 123 --project=example --region=us-central1 --request=@input.json
+
+  To send the image file ``image.jpeg'' and set the *content type*, run:
+
+    $ {command} 123 --project=example --region=us-central1
+    --http-headers=Content-Type=image/jpeg --request=@image.jpeg
+  """
+
+  @staticmethod
+  def Args(parser):
+    _AddArgs(parser)
+
+  def Run(self, args):
+    return _Run(args, constants.GA_VERSION)
+
+
+@base.ReleaseTracks(base.ReleaseTrack.BETA, base.ReleaseTrack.ALPHA)
+@base.UniverseCompatible
+class RawPredictBeta(RawPredict):
+  """Run Vertex AI online raw prediction.
+
+  `{command}` sends a raw prediction request to a Vertex AI endpoint. The
+  request can be given on the command line or read from a file or stdin.
+
+  ## EXAMPLES
+
+  To predict against an endpoint ``123'' under project ``example'' in region
+  ``us-central1'', reading the request from the command line, run:
+
+    $ {command} 123 --project=example --region=us-central1 --request='{
+        "instances": [
+          { "values": [1, 2, 3, 4], "key": 1 },
+          { "values": [5, 6, 7, 8], "key": 2 }
+        ]
+      }'
+
+  If the request body was in the file ``input.json'', run:
+
+    $ {command} 123 --project=example --region=us-central1 --request=@input.json
+
+  To send the image file ``image.jpeg'' and set the *content type*, run:
+
+    $ {command} 123 --project=example --region=us-central1
+    --http-headers=Content-Type=image/jpeg --request=@image.jpeg
+  """
+
+  def Run(self, args):
+    return _Run(args, constants.BETA_VERSION)
--- a/login/google-cloud-sdk/lib/surface/ai/endpoints/stream_direct_predict.py
+++ b/login/google-cloud-sdk/lib/surface/ai/endpoints/stream_direct_predict.py
@@ -0,0 +1,105 @@
+# -*- coding: utf-8 -*- #
+# Copyright 2024 Google LLC. All Rights Reserved.
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#    http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+"""Vertex AI endpoints stream direct predict command."""
+
+from __future__ import absolute_import
+from __future__ import division
+from __future__ import unicode_literals
+
+from googlecloudsdk.api_lib.ai.endpoints import prediction_streamer
+from googlecloudsdk.calliope import base
+from googlecloudsdk.command_lib.ai import constants
+from googlecloudsdk.command_lib.ai import endpoint_util
+from googlecloudsdk.command_lib.ai import endpoints_util
+from googlecloudsdk.command_lib.ai import flags
+from googlecloudsdk.command_lib.ai import region_util
+
+
+def _AddArgs(parser):
+  flags.AddEndpointResourceArg(
+      parser,
+      'to do online stream direct prediction',
+      prompt_func=region_util.PromptForOpRegion,
+  )
+  flags.AddDirectPredictInputsArg(parser)
+
+
+def _Run(args, version):
+  """Run Vertex AI online stream direct prediction."""
+  endpoint_ref = args.CONCEPTS.endpoint.Parse()
+  args.region = endpoint_ref.AsDict()['locationsId']
+  with endpoint_util.AiplatformEndpointOverrides(
+      version, region=args.region, is_prediction=True
+  ):
+    inputs_json = endpoints_util.ReadInputsFromArgs(args.json_request)
+    if version == constants.GA_VERSION:
+      streamer = prediction_streamer.PredictionStreamer('v1')
+    else:
+      streamer = prediction_streamer.PredictionStreamer('v1beta1')
+
+    if not args.IsSpecified('format'):
+      args.format = 'json'
+
+    return streamer.StreamDirectPredict(
+        endpoint=endpoint_ref.RelativeName(),
+        inputs=inputs_json['inputs'],
+        parameters=inputs_json.get('parameters', {}),
+    )
+
+
+@base.UniverseCompatible
+@base.ReleaseTracks(base.ReleaseTrack.GA)
+class StreamDirectPredictGa(base.Command):
+  """Run Vertex AI online stream direct prediction.
+
+     `{command}` sends a stream direct prediction request to Vertex AI endpoint
+     for the given inputs. The request limit is 10MB.
+
+  ## EXAMPLES
+
+  To stream direct predict against an endpoint ``123'' under project ``example''
+  in region ``us-central1'', run:
+
+    $ {command} 123 --project=example --region=us-central1
+    --json-request=input.json
+  """
+
+  @staticmethod
+  def Args(parser):
+    _AddArgs(parser)
+
+  def Run(self, args):
+    return _Run(args, constants.GA_VERSION)
+
+
+@base.UniverseCompatible
+@base.ReleaseTracks(base.ReleaseTrack.BETA, base.ReleaseTrack.ALPHA)
+class StreamDirectPredictBeta(StreamDirectPredictGa):
+  """Run Vertex AI online stream direct prediction.
+
+     `{command}` sends a stream direct prediction request to Vertex AI endpoint
+     for the given inputs. The request limit is 10MB.
+
+  ## EXAMPLES
+
+  To stream direct predict against an endpoint ``123'' under project ``example''
+  in region ``us-central1'', run:
+
+    $ {command} 123 --project=example --region=us-central1
+    --json-request=input.json
+  """
+
+  def Run(self, args):
+    return _Run(args, constants.BETA_VERSION)
--- a/login/google-cloud-sdk/lib/surface/ai/endpoints/stream_direct_raw_predict.py
+++ b/login/google-cloud-sdk/lib/surface/ai/endpoints/stream_direct_raw_predict.py
@@ -0,0 +1,109 @@
+# -*- coding: utf-8 -*- #
+# Copyright 2024 Google LLC. All Rights Reserved.
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#    http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+"""Vertex AI endpoints stream direct raw predict command."""
+
+from __future__ import absolute_import
+from __future__ import division
+from __future__ import unicode_literals
+
+import base64
+
+from googlecloudsdk.api_lib.ai.endpoints import prediction_streamer
+from googlecloudsdk.calliope import base
+from googlecloudsdk.command_lib.ai import constants
+from googlecloudsdk.command_lib.ai import endpoint_util
+from googlecloudsdk.command_lib.ai import endpoints_util
+from googlecloudsdk.command_lib.ai import flags
+from googlecloudsdk.command_lib.ai import region_util
+
+
+def _AddArgs(parser):
+  flags.AddEndpointResourceArg(
+      parser,
+      'to do online stream direct raw prediction',
+      prompt_func=region_util.PromptForOpRegion,
+  )
+  flags.AddDirectRawPredictInputArg(parser)
+
+
+def _Run(args, version):
+  """Run Vertex AI online stream direct raw prediction."""
+  endpoint_ref = args.CONCEPTS.endpoint.Parse()
+  args.region = endpoint_ref.AsDict()['locationsId']
+  with endpoint_util.AiplatformEndpointOverrides(
+      version, region=args.region, is_prediction=True
+  ):
+    input_json = endpoints_util.ReadInputFromArgs(args.json_request)
+    if version == constants.GA_VERSION:
+      streamer = prediction_streamer.PredictionStreamer('v1')
+    else:
+      streamer = prediction_streamer.PredictionStreamer('v1beta1')
+
+    if not args.IsSpecified('format'):
+      args.format = 'json'
+
+    for resp in streamer.StreamDirectRawPredict(
+        endpoint=endpoint_ref.RelativeName(),
+        method_name=input_json['method_name'],
+        input=input_json['input'],
+    ):
+      resp.output = base64.b64decode(resp.output)
+      yield resp
+
+
+@base.UniverseCompatible
+@base.ReleaseTracks(base.ReleaseTrack.GA)
+class StreamDirectRawPredictGa(base.Command):
+  """Run Vertex AI online stream direct raw prediction.
+
+     `{command}` sends a stream direct raw prediction request to Vertex AI
+     endpoint for the given input. The request limit is 10MB.
+
+  ## EXAMPLES
+
+  To stream direct predict against an endpoint ``123'' under project ``example''
+  in region ``us-central1'', run:
+
+    $ {command} 123 --project=example --region=us-central1
+    --json-request=input.json
+  """
+
+  @staticmethod
+  def Args(parser):
+    _AddArgs(parser)
+
+  def Run(self, args):
+    return _Run(args, constants.GA_VERSION)
+
+
+@base.UniverseCompatible
+@base.ReleaseTracks(base.ReleaseTrack.BETA, base.ReleaseTrack.ALPHA)
+class StreamDirectRawPredictBeta(StreamDirectRawPredictGa):
+  """Run Vertex AI online stream direct raw prediction.
+
+     `{command}` sends a stream direct raw prediction request to Vertex AI
+     endpoint for the given input. The request limit is 10MB.
+
+  ## EXAMPLES
+
+  To stream direct raw predict against an endpoint ``123'' under project
+  ``example'' in region ``us-central1'', run:
+
+    $ {command} 123 --project=example --region=us-central1
+    --json-request=input.json
+  """
+
+  def Run(self, args):
+    return _Run(args, constants.BETA_VERSION)
--- a/login/google-cloud-sdk/lib/surface/ai/endpoints/stream_raw_predict.py
+++ b/login/google-cloud-sdk/lib/surface/ai/endpoints/stream_raw_predict.py
@@ -0,0 +1,147 @@
+# -*- coding: utf-8 -*- #
+# Copyright 2024 Google LLC. All Rights Reserved.
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#    http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+"""Vertex AI endpoints stream-raw-predict command."""
+
+from __future__ import absolute_import
+from __future__ import division
+from __future__ import unicode_literals
+
+import json
+import sys
+
+from googlecloudsdk.api_lib.ai.endpoints import client
+from googlecloudsdk.calliope import base
+from googlecloudsdk.command_lib.ai import constants
+from googlecloudsdk.command_lib.ai import endpoint_util
+from googlecloudsdk.command_lib.ai import flags
+from googlecloudsdk.command_lib.ai import region_util
+from googlecloudsdk.core import exceptions as core_exceptions
+from googlecloudsdk.core.console import console_io
+import six
+
+
+def _AddArgs(parser):
+  flags.AddEndpointResourceArg(
+      parser,
+      'to do online stream raw prediction',
+      prompt_func=region_util.PromptForOpRegion,
+  )
+
+  flags.GetRawPredictHeadersArg().AddToParser(parser)
+  flags.GetRawPredictRequestArg().AddToParser(parser)
+
+
+def _Run(args, version):
+  """Run Vertex AI online prediction."""
+  endpoint_ref = args.CONCEPTS.endpoint.Parse()
+  args.region = endpoint_ref.AsDict()['locationsId']
+
+  with endpoint_util.AiplatformEndpointOverrides(version, region=args.region):
+    if args.request.startswith('@'):
+      request = console_io.ReadFromFileOrStdin(args.request[1:], binary=True)
+    else:
+      request = args.request.encode('utf-8')
+
+    endpoints_client = client.EndpointsClient(version=version)
+    for response in endpoints_client.StreamRawPredict(
+        endpoint_ref, args.http_headers, request
+    ):
+      # Workaround since gcloud only supports protobufs as JSON objects. Since
+      # stream raw predict can return anything, write stream raw bytes to
+      # stdout.
+      if not args.IsSpecified('format'):
+        sys.stdout.buffer.write(response)
+        continue
+
+      # If user asked for formatting, assume it's a JSON object.
+      try:
+        yield json.loads(response.decode('utf-8'))
+      except ValueError:
+        raise core_exceptions.Error(
+            'No JSON object could be decoded from the HTTP response body:\n'
+            + six.text_type(response)
+        )
+
+
+@base.UniverseCompatible
+@base.ReleaseTracks(base.ReleaseTrack.GA)
+class StreamRawPredict(base.Command):
+  """Run Vertex AI online stream raw prediction.
+
+  `{command}` sends a stream raw prediction request to a Vertex AI endpoint. The
+  request can be given on the command line or read from a file or stdin.
+
+  ## EXAMPLES
+
+  To predict against an endpoint ``123'' under project ``example'' in region
+  ``us-central1'', reading the request from the command line, run:
+
+    $ {command} 123 --project=example --region=us-central1 --request='{
+        "instances": [
+          { "values": [1, 2, 3, 4], "key": 1 },
+          { "values": [5, 6, 7, 8], "key": 2 }
+        ]
+      }'
+
+  If the request body was in the file ``input.json'', run:
+
+    $ {command} 123 --project=example --region=us-central1 --request=@input.json
+
+  To send the image file ``image.jpeg'' and set the *content type*, run:
+
+    $ {command} 123 --project=example --region=us-central1
+    --http-headers=Content-Type=image/jpeg --request=@image.jpeg
+  """
+
+  @staticmethod
+  def Args(parser):
+    _AddArgs(parser)
+
+  def Run(self, args):
+    return _Run(args, constants.GA_VERSION)
+
+
+@base.UniverseCompatible
+@base.ReleaseTracks(base.ReleaseTrack.BETA, base.ReleaseTrack.ALPHA)
+class StreamRawPredictBeta(StreamRawPredict):
+  """Run Vertex AI online stream raw prediction.
+
+  `{command}` sends a stream raw prediction request to a Vertex AI endpoint. The
+  request can be given on the command line or read from a file or stdin.
+
+  ## EXAMPLES
+
+  To predict against an endpoint ``123'' under project ``example'' in region
+  ``us-central1'', reading the request from the command line, run:
+
+    $ {command} 123 --project=example --region=us-central1 --request='{
+        "instances": [
+          { "values": [1, 2, 3, 4], "key": 1 },
+          { "values": [5, 6, 7, 8], "key": 2 }
+        ]
+      }'
+
+  If the request body was in the file ``input.json'', run:
+
+    $ {command} 123 --project=example --region=us-central1 --request=@input.json
+
+  To send the image file ``image.jpeg'' and set the *content type*, run:
+
+    $ {command} 123 --project=example --region=us-central1
+    --http-headers=Content-Type=image/jpeg --request=@image.jpeg
+  """
+
+  def Run(self, args):
+    return _Run(args, constants.BETA_VERSION)
--- a/login/google-cloud-sdk/lib/surface/ai/endpoints/undeploy_model.py
+++ b/login/google-cloud-sdk/lib/surface/ai/endpoints/undeploy_model.py
@@ -0,0 +1,99 @@
+# -*- coding: utf-8 -*- #
+# Copyright 2020 Google LLC. All Rights Reserved.
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#    http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+"""Vertex AI endpoints undeploy-model command."""
+
+from __future__ import absolute_import
+from __future__ import division
+from __future__ import unicode_literals
+
+from googlecloudsdk.api_lib.ai import operations
+from googlecloudsdk.api_lib.ai.endpoints import client
+from googlecloudsdk.calliope import base
+from googlecloudsdk.command_lib.ai import constants
+from googlecloudsdk.command_lib.ai import endpoint_util
+from googlecloudsdk.command_lib.ai import endpoints_util
+from googlecloudsdk.command_lib.ai import flags
+from googlecloudsdk.command_lib.ai import operations_util
+from googlecloudsdk.command_lib.ai import region_util
+
+
+def _AddArgs(parser):
+  flags.AddEndpointResourceArg(
+      parser,
+      'to undeploy a model from',
+      prompt_func=region_util.PromptForOpRegion)
+  flags.GetDeployedModelId().AddToParser(parser)
+  flags.GetTrafficSplitArg().AddToParser(parser)
+
+
+def _Run(args, version):
+  """Undeploy a model fro man existing Vertex AI endpoint."""
+  endpoint_ref = args.CONCEPTS.endpoint.Parse()
+  args.region = endpoint_ref.AsDict()['locationsId']
+  with endpoint_util.AiplatformEndpointOverrides(version, region=args.region):
+    endpoints_client = client.EndpointsClient(version=version)
+    operation_client = operations.OperationsClient()
+    if version == constants.GA_VERSION:
+      op = endpoints_client.UndeployModel(
+          endpoint_ref,
+          args.deployed_model_id,
+          traffic_split=args.traffic_split)
+    else:
+      op = endpoints_client.UndeployModelBeta(
+          endpoint_ref,
+          args.deployed_model_id,
+          traffic_split=args.traffic_split)
+    return operations_util.WaitForOpMaybe(
+        operation_client, op, endpoints_util.ParseOperation(op.name))
+
+
+@base.ReleaseTracks(base.ReleaseTrack.GA)
+@base.UniverseCompatible
+class UndeployModelGa(base.Command):
+  """Undeploy a model from an existing Vertex AI endpoint.
+
+  ## EXAMPLES
+
+  To undeploy a model ``456'' from an endpoint ``123'' under project ``example''
+  in region ``us-central1'', run:
+
+    $ {command} 123 --project=example --region=us-central1
+    --deployed-model-id=456
+  """
+
+  @staticmethod
+  def Args(parser):
+    _AddArgs(parser)
+
+  def Run(self, args):
+    _Run(args, constants.GA_VERSION)
+
+
+@base.ReleaseTracks(base.ReleaseTrack.BETA, base.ReleaseTrack.ALPHA)
+@base.UniverseCompatible
+class UndeployModelBeta(UndeployModelGa):
+  """Undeploy a model from an existing Vertex AI endpoint.
+
+  ## EXAMPLES
+
+  To undeploy a model ``456'' from an endpoint ``123'' under project ``example''
+  in region ``us-central1'', run:
+
+    $ {command} 123 --project=example --region=us-central1
+    --deployed-model-id=456
+  """
+
+  def Run(self, args):
+    _Run(args, constants.BETA_VERSION)
--- a/login/google-cloud-sdk/lib/surface/ai/endpoints/update.py
+++ b/login/google-cloud-sdk/lib/surface/ai/endpoints/update.py
@@ -0,0 +1,137 @@
+# -*- coding: utf-8 -*- #
+# Copyright 2020 Google LLC. All Rights Reserved.
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#    http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+"""Vertex AI endpoints update command."""
+
+from __future__ import absolute_import
+from __future__ import division
+from __future__ import unicode_literals
+
+from googlecloudsdk.api_lib.ai.endpoints import client
+from googlecloudsdk.calliope import base
+from googlecloudsdk.command_lib.ai import constants
+from googlecloudsdk.command_lib.ai import endpoint_util
+from googlecloudsdk.command_lib.ai import errors
+from googlecloudsdk.command_lib.ai import flags
+from googlecloudsdk.command_lib.ai import region_util
+from googlecloudsdk.command_lib.ai import validation
+from googlecloudsdk.command_lib.util.args import labels_util
+from googlecloudsdk.core import log
+
+
+def _AddArgs(parser):
+  flags.AddEndpointResourceArg(
+      parser, 'to update', prompt_func=region_util.PromptForOpRegion)
+  flags.GetDisplayNameArg('endpoint', required=False).AddToParser(parser)
+  flags.GetDescriptionArg('endpoint').AddToParser(parser)
+  flags.AddTrafficSplitGroupArgs(parser)
+  flags.AddRequestResponseLoggingConfigUpdateGroupArgs(parser)
+  labels_util.AddUpdateLabelsFlags(parser)
+
+
+def _Run(args, version):
+  """Update an existing Vertex AI endpoint."""
+  validation.ValidateDisplayName(args.display_name)
+
+  endpoint_ref = args.CONCEPTS.endpoint.Parse()
+  args.region = endpoint_ref.AsDict()['locationsId']
+  with endpoint_util.AiplatformEndpointOverrides(version, region=args.region):
+    endpoints_client = client.EndpointsClient(version=version)
+
+    def GetLabels():
+      return endpoints_client.Get(endpoint_ref).labels
+
+    try:
+      if version == constants.GA_VERSION:
+        op = endpoints_client.Patch(
+            endpoint_ref,
+            labels_util.ProcessUpdateArgsLazy(
+                args, endpoints_client.messages.GoogleCloudAiplatformV1Endpoint
+                .LabelsValue, GetLabels),
+            display_name=args.display_name,
+            description=args.description,
+            traffic_split=args.traffic_split,
+            clear_traffic_split=args.clear_traffic_split,
+            request_response_logging_table=args.request_response_logging_table,
+            request_response_logging_rate=args.request_response_logging_rate,
+            disable_request_response_logging=args
+            .disable_request_response_logging)
+      else:
+        op = endpoints_client.PatchBeta(
+            endpoint_ref,
+            labels_util.ProcessUpdateArgsLazy(
+                args, endpoints_client.messages
+                .GoogleCloudAiplatformV1beta1Endpoint.LabelsValue, GetLabels),
+            display_name=args.display_name,
+            description=args.description,
+            traffic_split=args.traffic_split,
+            clear_traffic_split=args.clear_traffic_split,
+            request_response_logging_table=args.request_response_logging_table,
+            request_response_logging_rate=args.request_response_logging_rate,
+            disable_request_response_logging=args
+            .disable_request_response_logging)
+    except errors.NoFieldsSpecifiedError:
+      available_update_args = [
+          'display_name', 'traffic_split', 'clear_traffic_split',
+          'update_labels', 'clear_labels', 'remove_labels', 'description',
+          'request_response_logging_table', 'request_response_logging_rate',
+          'disable_request_response_logging'
+      ]
+      if not any(args.IsSpecified(arg) for arg in available_update_args):
+        raise
+      log.status.Print('No update to perform.')
+      return None
+    else:
+      log.UpdatedResource(op.name, kind='Vertex AI endpoint')
+      return op
+
+
+@base.ReleaseTracks(base.ReleaseTrack.GA)
+@base.UniverseCompatible
+class UpdateGa(base.UpdateCommand):
+  """Update an existing Vertex AI endpoint.
+
+  ## EXAMPLES
+
+  To update an endpoint ``123'' under project ``example'' in region
+  ``us-central1'', run:
+
+    $ {command} 123 --project=example --region=us-central1
+    --display-name=new_name
+  """
+
+  @staticmethod
+  def Args(parser):
+    _AddArgs(parser)
+
+  def Run(self, args):
+    return _Run(args, constants.GA_VERSION)
+
+
+@base.ReleaseTracks(base.ReleaseTrack.BETA, base.ReleaseTrack.ALPHA)
+@base.UniverseCompatible
+class UpdateBeta(UpdateGa):
+  """Update an existing Vertex AI endpoint.
+
+  ## EXAMPLES
+
+  To update an endpoint ``123'' under project ``example'' in region
+  ``us-central1'', run:
+
+    $ {command} 123 --project=example --region=us-central1
+    --display-name=new_name
+  """
+
+  def Run(self, args):
+    return _Run(args, constants.BETA_VERSION)