> ## Documentation Index
> Fetch the complete documentation index at: https://docs.coreweave.com/llms.txt
> Use this file to discover all available pages before exploring further.

# Get deployment

> Fetch a single deployment by its ID.

<Info>
  * The API server is `https://api.coreweave.com`.
  * Replace `{API_ACCESS_TOKEN}` with your [CoreWeave API access token](/security/authn-authz/manage-api-access-tokens).
  * For required permissions, see [IAM Access Policies](/security/iam/access-policies).
</Info>

Substitute `{id}` with the deployment ID returned from List
deployments or Create deployment.

```bash title="Example request" theme={"system"}
curl -X GET https://api.coreweave.com/v1alpha1/inference/deployments/{id} \
       -H "Content-Type: application/json" \
       -H "Authorization: Bearer {API_ACCESS_TOKEN}"
```


## OpenAPI

````yaml /openapi/inference/openapi.yaml get /v1alpha1/inference/deployments/{id}
openapi: 3.0.3
info:
  title: CoreWeave Inference API
  version: 0.0.1
  description: >-
    The CoreWeave Inference API provides programmatic control over inference
    gateways, model deployments, and capacity claims.
servers:
  - url: https://api.coreweave.com
    description: CoreWeave production API.
security:
  - bearerAuth: []
tags:
  - name: CapacityClaimService
    description: >-
      Endpoints for creating, listing, getting, updating, and deleting
      CapacityClaim reservations of GPU hardware for inference deployments.
  - name: DeploymentService
    description: >-
      Endpoints for creating, listing, getting, updating, and deleting model
      deployments. Each deployment associates a model with one or more gateways
      and configures runtime, resources, autoscaling, and traffic.
  - name: GatewayService
    description: >-
      Endpoints for creating, listing, getting, updating, and deleting inference
      gateways. Gateways provide authentication, request routing, load
      balancing, and traffic splitting for one or more deployments.
paths:
  /v1alpha1/inference/deployments/{id}:
    get:
      tags:
        - DeploymentService
      summary: Get deployment
      description: Retrieves a single deployment by ID.
      operationId: DeploymentService_GetDeployment
      parameters:
        - name: id
          in: path
          description: The ID of the deployment to get
          required: true
          schema:
            type: string
      responses:
        '200':
          description: OK
          content:
            application/json:
              schema:
                $ref: '#/components/schemas/GetDeploymentResponse'
        default:
          description: Default error response
          content:
            application/json:
              schema:
                $ref: '#/components/schemas/Status'
components:
  schemas:
    GetDeploymentResponse:
      description: Response for GetDeployment
      type: object
      properties:
        deployment:
          description: The deployment
          allOf:
            - $ref: '#/components/schemas/Deployment'
          readOnly: true
    Status:
      description: >-
        Standard error response. `code` is a
        [`google.rpc.Code`](https://cloud.google.com/apis/design/errors#error_codes);
        `message` is human-readable English; `details` carries machine-readable
        error details when present.
      type: object
      properties:
        code:
          description: >-
            The status code, which should be an enum value of
            [google.rpc.Code][google.rpc.Code].
          type: integer
          format: int32
        message:
          description: >-
            A developer-facing error message, which should be in English. Any
            user-facing error message should be localized and sent in the
            [google.rpc.Status.details][google.rpc.Status.details] field, or
            localized by the client.
          type: string
        details:
          description: >-
            A list of messages that carry the error details.  There is a common
            set of message types for APIs to use.
          type: array
          items:
            $ref: '#/components/schemas/GoogleProtobufAny'
    Deployment:
      description: Deployment object with specification and status fields
      type: object
      properties:
        spec:
          description: The specification of the deployment
          allOf:
            - $ref: '#/components/schemas/DeploymentSpec'
          readOnly: true
        status:
          description: The status of the deployment
          allOf:
            - $ref: '#/components/schemas/DeploymentStatus'
          readOnly: true
    GoogleProtobufAny:
      description: >-
        Contains an arbitrary serialized message along with a @type that
        describes the type of the serialized message.
      type: object
      properties:
        '@type':
          description: The type of the serialized message.
          type: string
      additionalProperties: true
    DeploymentSpec:
      description: DeploymentSpec object with configuration fields
      type: object
      properties:
        id:
          description: The ID of the deployment
          type: string
          readOnly: true
        name:
          description: The name of the deployment
          type: string
          readOnly: true
        gatewayIds:
          description: >-
            The IDs of the gateways the deployment is associated with. A
            deployment may be attached to multiple gateways; traffic from each
            gateway is routed by that gateway's strategy.
          type: array
          items:
            type: string
          readOnly: true
        runtime:
          description: >-
            Runtime selection and configuration. The available engines,
            versions, and per-engine config keys are returned by `GET
            /v1alpha1/inference/deployments/parameters`.
          allOf:
            - $ref: '#/components/schemas/DeploymentRuntime'
          readOnly: true
        resources:
          description: >-
            Resource configuration for the deployment. CPU and RAM are
            automatically assigned based on `gpuCount` for the chosen
            `instanceType`.
          allOf:
            - $ref: '#/components/schemas/DeploymentResources'
          readOnly: true
        model:
          description: >-
            The model configuration. The model is loaded from a path in a
            CoreWeave AI Object Storage (CAIOS) bucket.
          allOf:
            - $ref: '#/components/schemas/DeploymentModel'
          readOnly: true
        autoscaling:
          description: >-
            Autoscaling configuration. CoreWeave manages scaling within the
            configured `min` and `max` bounds; scale-to-zero is not supported.
          allOf:
            - $ref: '#/components/schemas/DeploymentAutoscaling'
          readOnly: true
        traffic:
          description: >-
            Traffic configuration for the deployment. When multiple deployments
            share a model name on the same gateway, `traffic.weight` controls
            the share of traffic each deployment receives.
          allOf:
            - $ref: '#/components/schemas/DeploymentTraffic'
          readOnly: true
        organizationId:
          description: The organization ID that owns the deployment.
          type: string
          readOnly: true
        disabled:
          description: >-
            When `true`, the deployment does not receive traffic and is not
            scaled.
          type: boolean
          readOnly: true
    DeploymentStatus:
      description: DeploymentStatus object for status fields
      type: object
      properties:
        createdAt:
          description: The time at which the deployment was created.
          type: string
          format: date-time
          readOnly: true
        updatedAt:
          description: The time at which the deployment was last updated.
          type: string
          format: date-time
          readOnly: true
        conditions:
          description: List of conditions representing detailed status information
          type: array
          items:
            $ref: '#/components/schemas/Condition'
          readOnly: true
        status:
          description: >-
            The overall status of the deployment. See the [Inference API
            overview](https://docs.coreweave.com/products/inference/reference/api-overview#status-values)
            for the meaning of each value.
          type: string
          enum:
            - STATUS_CREATING
            - STATUS_READY
            - STATUS_UPDATING
            - STATUS_DELETING
            - STATUS_ERROR
            - STATUS_FAILED
          readOnly: true
    DeploymentRuntime:
      description: Runtime selection and configuration for deployments
      type: object
      properties:
        engine:
          description: >-
            The inference engine to use (for example, `vllm`). The available
            engines are returned by `GET
            /v1alpha1/inference/deployments/parameters`.
          type: string
        version:
          description: >-
            The version of the engine. Defaults to `latest` when unset. Must be
            one of the engine's available versions returned by the parameters
            endpoint.
          type: string
        engineConfig:
          description: >-
            Engine-specific configuration arguments. The allowed keys per engine
            are returned under `runtimeParameters.runtimeConfigOptions` by the
            parameters endpoint.
          type: object
          additionalProperties:
            type: string
      required:
        - engine
    DeploymentResources:
      description: Resource configuration for the deployment
      type: object
      properties:
        instanceType:
          description: >-
            The instance type to use for the deployment. The available values
            are returned by `GET /v1alpha1/inference/deployments/parameters`.
          type: string
        gpuCount:
          description: >-
            Number of GPUs needed per instance. CPU and RAM are automatically
            assigned based on the chosen GPU count.
          type: integer
          format: uint32
      required:
        - instanceType
        - gpuCount
    DeploymentModel:
      description: Configuration of the model
      type: object
      properties:
        name:
          description: >-
            The model name used in inference requests (and returned by the
            gateway's `/models` endpoint). When multiple deployments on the same
            gateway share a name, traffic is split between them by
            `traffic.weight`.
          type: string
        bucket:
          description: >-
            The CoreWeave AI Object Storage (CAIOS) bucket the model is stored
            in.
          type: string
        path:
          description: The path within `bucket` to the model and its configuration files.
          type: string
      required:
        - name
        - bucket
        - path
    DeploymentAutoscaling:
      description: Autoscaling configuration for the deployment
      type: object
      properties:
        min:
          description: >-
            The minimum number of replicas. Must be at least `1`; scale-to-zero
            is not supported.
          type: integer
          format: uint32
        max:
          description: >-
            The maximum number of replicas. Must be greater than or equal to
            `min`. Set equal to `min` to disable autoscaling.
          type: integer
          format: uint32
        priority:
          description: >-
            Scaling priority relative to other deployments, from `0` to `1000`.
            Higher values receive scaling preference during resource contention.
          type: integer
          format: uint32
        concurrency:
          description: >-
            Target concurrent requests per replica. Lower values reduce latency;
            higher values increase throughput. Must be at least `1`.
          type: integer
          format: uint32
        capacityClasses:
          description: >-
            The capacity classes the autoscaler may use for this deployment. Set
            `CAPACITY_CLASS_RESERVED` to schedule replicas onto capacity
            reserved by a CapacityClaim; set `CAPACITY_CLASS_ON_DEMAND` to use
            shared on-demand capacity.
          type: array
          items:
            type: string
            enum:
              - CAPACITY_CLASS_RESERVED
              - CAPACITY_CLASS_ON_DEMAND
      required:
        - min
        - max
    DeploymentTraffic:
      description: Configuration for traffic control to the deployment
      type: object
      properties:
        weight:
          description: >-
            The relative weight of traffic routed to this deployment compared to
            other deployments with the same model name on the same gateway.
            Weights are normalized into percentages.
          type: integer
          format: uint32
    Condition:
      description: Condition represents a detailed status condition for resources
      type: object
      properties:
        type:
          description: Type of condition
          type: string
          readOnly: true
        lastUpdateTime:
          description: Last time the condition was updated
          type: string
          format: date-time
          readOnly: true
        reason:
          description: Reason for the condition's last transition
          type: string
          readOnly: true
        message:
          description: Human-readable message indicating details about the condition
          type: string
          readOnly: true
        zone:
          description: Zone associated with the condition
          type: string
          readOnly: true
        status:
          description: >-
            Whether the condition is currently `True`, `False`, or `Unknown`.
            Mirrors the Kubernetes condition convention.
          type: string
          enum:
            - 'True'
            - 'False'
            - Unknown
          readOnly: true
  securitySchemes:
    bearerAuth:
      type: http
      scheme: bearer
      bearerFormat: JWT
      description: CoreWeave API access token sent as a bearer token.
      x-default: Bearer {API_ACCESS_TOKEN}

````