90 lines
3.5 KiB
YAML
90 lines
3.5 KiB
YAML
# TODO(b/414798340): Modify this when PresetMetadata design is finalized.
|
|
# TODO(b/436350694): Add boolean for presets with ingress container.
|
|
// TODO(b/446719563): Use type and name instead of name and display_name. Use proper proto enums instead for type and data_type.
|
|
presets:
|
|
- name: ai-inference
|
|
display_name: AI Inference
|
|
version: 0.0.1
|
|
category: CATEGORY_QUICKSTART
|
|
description: Create a service for running inference on AI models.
|
|
supported_resources:
|
|
- SERVICE
|
|
config_values:
|
|
CPU limit: 4 vCPUs
|
|
Memory limit: 16GiB
|
|
GPU: 1 NVIDIA L4 (no zonal redundancy)
|
|
Billing: instance-based
|
|
example_gcloud_usage: gcloud alpha run deploy <service-name> --preset=ai-inference
|
|
- name: ollama
|
|
display_name: Ollama
|
|
version: 0.0.1
|
|
category: CATEGORY_QUICKSTART
|
|
description: Inference server for open LLMs, using GPUs and Cloud Storage. Deploys the latest Ollama container, configured for Cloud Run with L4 GPUs and a Cloud Storage bucket for model storage.
|
|
supported_resources:
|
|
- SERVICE
|
|
parameters:
|
|
- name: bucket
|
|
label: GCS Bucket Name
|
|
description: Connect your service to a Cloud Storage bucket. Models downloaded by Ollama will be stored in this bucket, improving overall performance and minimizing cold start times.
|
|
type: GCS_BUCKET
|
|
data_type: DATA_TYPE_STRING
|
|
config_values:
|
|
Container Image: ollama/ollama:latest
|
|
CPU limit: 4 vCPUs
|
|
Memory llimit: 16GiB
|
|
GPU: 1 NVIDIA L4 (no zonal redundancy)
|
|
Billing: instance-based
|
|
example_gcloud_usage:
|
|
- gcloud alpha run deploy <service-name> --preset=ollama
|
|
- gcloud alpha run deploy <service-name> --preset=ollama:bucket=<bucket-name>
|
|
- name: private-service
|
|
display_name: Private Service
|
|
version: 0.0.1
|
|
category: CATEGORY_QUICKSTART
|
|
description: Create a private, internal service with access control enforced based on identity and IAM roles.
|
|
supported_resources:
|
|
- SERVICE
|
|
config_values:
|
|
Ingress: internal
|
|
Authentication: required
|
|
example_gcloud_usage: gcloud alpha run deploy <service-name> --preset=private-service
|
|
- name: public-service
|
|
display_name: Public Service
|
|
version: 0.0.1
|
|
category: CATEGORY_QUICKSTART
|
|
description: Publicly accessible endpoint, exposing your service to anyone on the internet, without any authentication.
|
|
supported_resources:
|
|
- SERVICE
|
|
config_values:
|
|
Ingress: all
|
|
Authentication: allow public access
|
|
example_gcloud_usage: gcloud alpha run deploy <service-name> --preset=public-service
|
|
- name: single-concurrency
|
|
display_name: Single Concurrency
|
|
version: 0.0.1
|
|
category: CATEGORY_QUICKSTART
|
|
description: Create a service where requests are handled strictly one at a time.
|
|
supported_resources:
|
|
- SERVICE
|
|
config_values:
|
|
Container Concurrency: 1
|
|
example_gcloud_usage: gcloud alpha run deploy <service-name> --preset=single-concurrency
|
|
- name: static-website
|
|
display_name: Static Website
|
|
version: 0.0.1
|
|
category: CATEGORY_QUICKSTART
|
|
description: Serve your static website assets from a Cloud Storage bucket with an NGINX server behind a Cloud Run endpoint, allowing you to use IAP, a load balancer or private networking.
|
|
supported_resources:
|
|
- SERVICE
|
|
parameters:
|
|
- name: bucket
|
|
label: Storage Bucket Name
|
|
description: Unique bucket name (and optional folder path) with static assets.
|
|
type: GCS_BUCKET
|
|
required: true
|
|
data_type: DATA_TYPE_STRING
|
|
config_values:
|
|
Container Image: nginx:stable
|
|
Storage Bucket: <bucket-name>
|
|
example_gcloud_usage: gcloud alpha run deploy <service-name> --preset=static-website:bucket=<bucket-name>
|