apiVersion: serving.knative.dev/v1 # Current version of Knative
name: helloworld # The name of the app
autoscaling.knative.dev/minScale: "0" # Allow scale to Zero
autoscaling.knative.dev/maxScale: "10" # Maximum number of Pods allowed to auto-scale to
# The container concurrency defines how many active requests are sent to a single
# backend pod at a time. This configuration is important as it effects how well requests
# are load balanced over Pods. For a standard, non-blocking web applocation this can usually
# be rather high, ie 100. For GPU Inference however, this should usually be set to 1.
# GPU Inference only processes one request at a time, and one wants to avoid a queue being
# built up in the local pod instead of centrally in the Load Balancer.
- image: gcr.io/knative-samples/helloworld-go # The URL to the image of the app
- name: TARGET # The environment variable printed out by the sample app