openapi: 3.1.0
info:
  title: Anvat Gateway API
  version: "2026-06-06"
  summary: OpenAI- and Anthropic-compatible AI gateway with metered billing and prepaid credit.
  description: |
    Anvat is a drop-in API gateway for Claude Code, Cursor, Codex, and any
    Anthropic / OpenAI SDK client. Point one base URL at `https://api.anvat.app`
    and call frontier models at 30% below published list prices, with 2× credit
    matching on prepaid packs.

    ## Authentication

    All endpoints accept either header style — pick the one your SDK already
    sends.

    - **Bearer:** `Authorization: Bearer <ANVAT_API_KEY>` (OpenAI SDK default)
    - **x-api-key:** `x-api-key: <ANVAT_API_KEY>` (Anthropic SDK default,
      Claude Code default)

    Get a key from <https://anvat.app/app/keys>.

    ## Compatibility

    - Anthropic: `POST /v1/messages` is byte-compatible with
      `https://api.anthropic.com/v1/messages`.
    - OpenAI: `POST /v1/chat/completions` and `POST /v1/responses` mirror the
      official OpenAI surface — same request body, same SSE event order.
    - Model discovery: `GET /v1/models` returns OpenAI-shaped or Anthropic-shaped
      payloads based on caller-supplied headers (`anthropic-version`, `x-api-key`,
      User-Agent containing `claude-code`).

    ## Billing

    Every successful request returns a `x-anvat-cost-usd` header with the
    metered cost in USD (post-discount). Failed requests are not charged.
    Subscribers consume their monthly credit grant first; metered tokens roll
    over to credit balance once the grant is exhausted.

    ## Rate limits

    Returned as standard headers on every response:

    - `x-anvat-rpm-remaining` — requests left in the rolling minute window
    - `x-anvat-concurrent-remaining` — concurrent in-flight requests left
    - `x-anvat-window-spend-remaining-usd` — rolling-spend cap remaining

    Limits scale with your subscription tier. See
    <https://anvat.app/blog/cheap-claude-api-2026> for current numbers.
  termsOfService: https://anvat.app/terms
  contact:
    name: Anvat support
    email: hello@anvat.app
    url: https://anvat.app
  license:
    name: Proprietary
    url: https://anvat.app/terms

servers:
  - url: https://api.anvat.app
    description: Production gateway

security:
  - bearerAuth: []
  - anthropicKey: []

tags:
  - name: anthropic
    description: Anthropic-compatible Messages API. Use this with Claude Code, the Anthropic SDK, or any client that targets `api.anthropic.com`.
  - name: openai
    description: OpenAI-compatible Chat Completions and Responses API. Use this with Cursor, the OpenAI SDK, LangChain, LiteLLM, or any client that targets `api.openai.com`.
  - name: discovery
    description: Model and capability discovery.
  - name: billing
    description: Caller balance and usage metering.
  - name: ops
    description: Operational health checks.

paths:
  /v1/messages:
    post:
      tags: [anthropic]
      summary: Create a Claude message (Anthropic-compatible)
      description: |
        Drop-in replacement for `POST https://api.anthropic.com/v1/messages`.
        Streaming (SSE) is supported via `"stream": true` — events match
        Anthropic's `message_start`, `content_block_*`, `message_delta`,
        `message_stop` schema exactly.

        Tool use, prompt caching (`cache_control: { type: "ephemeral" }`),
        and extended thinking (`thinking: { type: "enabled", budget_tokens: N }`)
        all work without modification.
      operationId: createMessage
      requestBody:
        required: true
        content:
          application/json:
            schema:
              $ref: "#/components/schemas/AnthropicMessageRequest"
            examples:
              opus48-coding:
                summary: Opus 4.8 — code refactor
                value:
                  model: claude-opus-4-8
                  max_tokens: 4096
                  messages:
                    - role: user
                      content: "Refactor the bubble-sort below to use merge-sort in TypeScript..."
              sonnet46-streaming:
                summary: Sonnet 4.6 — streaming
                value:
                  model: claude-sonnet-4-6
                  max_tokens: 1024
                  stream: true
                  messages:
                    - role: user
                      content: "Explain prompt caching in two paragraphs."
      responses:
        "200":
          description: Message created.
          content:
            application/json:
              schema:
                $ref: "#/components/schemas/AnthropicMessageResponse"
            text/event-stream:
              schema:
                type: string
                description: SSE stream. Each event is a JSON payload prefixed with `event:` and `data:`.
          headers:
            x-anvat-cost-usd:
              $ref: "#/components/headers/CostUsd"
            x-anvat-rpm-remaining:
              $ref: "#/components/headers/RpmRemaining"
        "401":
          $ref: "#/components/responses/Unauthorized"
        "402":
          $ref: "#/components/responses/InsufficientCredit"
        "429":
          $ref: "#/components/responses/RateLimited"

  /v1/messages/count_tokens:
    post:
      tags: [anthropic]
      summary: Count tokens for an Anthropic message body without dispatching it
      operationId: countTokens
      requestBody:
        required: true
        content:
          application/json:
            schema:
              $ref: "#/components/schemas/AnthropicMessageRequest"
      responses:
        "200":
          description: Token count.
          content:
            application/json:
              schema:
                type: object
                properties:
                  input_tokens:
                    type: integer

  /v1/chat/completions:
    post:
      tags: [openai]
      summary: Chat completion (OpenAI-compatible)
      description: |
        Drop-in replacement for `POST https://api.openai.com/v1/chat/completions`.
        Streaming, tool calls, and JSON mode are all supported with the same
        payload shape.
      operationId: createChatCompletion
      requestBody:
        required: true
        content:
          application/json:
            schema:
              $ref: "#/components/schemas/ChatCompletionRequest"
            examples:
              gpt5:
                summary: GPT-5.5 — JSON mode
                value:
                  model: gpt-5
                  messages:
                    - role: user
                      content: "Return a JSON object with keys name + city."
                  response_format:
                    type: json_object
              claude-via-openai:
                summary: Call Claude through the OpenAI shape
                value:
                  model: claude-sonnet-4-6
                  messages:
                    - role: user
                      content: "What is prompt caching?"
      responses:
        "200":
          description: Chat completion.
          content:
            application/json:
              schema:
                $ref: "#/components/schemas/ChatCompletionResponse"
            text/event-stream:
              schema:
                type: string
          headers:
            x-anvat-cost-usd:
              $ref: "#/components/headers/CostUsd"
        "401":
          $ref: "#/components/responses/Unauthorized"
        "402":
          $ref: "#/components/responses/InsufficientCredit"

  /v1/responses:
    post:
      tags: [openai]
      summary: Responses API (OpenAI-compatible)
      description: |
        Drop-in replacement for `POST https://api.openai.com/v1/responses`.
        Use this for OpenAI's reasoning models and tool-orchestration patterns.
      operationId: createResponse
      requestBody:
        required: true
        content:
          application/json:
            schema:
              type: object
              additionalProperties: true
      responses:
        "200":
          description: Response object.
          content:
            application/json:
              schema:
                type: object
                additionalProperties: true

  /v1/models:
    get:
      tags: [discovery]
      summary: List available models
      description: |
        Returns the catalog of models reachable through Anvat. The response
        shape auto-adapts to the caller:

        - **OpenAI-style** by default — `{ object: "list", data: [...] }`.
        - **Anthropic-style** when the request carries `anthropic-version`,
          `x-api-key`, or a Claude Code User-Agent —
          `{ data: [{ type: "model", id, display_name, created_at }], has_more }`.
      operationId: listModels
      responses:
        "200":
          description: Model catalog.
          content:
            application/json:
              schema:
                oneOf:
                  - $ref: "#/components/schemas/OpenAIModelList"
                  - $ref: "#/components/schemas/AnthropicModelList"

  /v1/balance:
    get:
      tags: [billing]
      summary: Caller credit balance (USD)
      description: |
        Used by Claude Code's `statusLine` and lightweight balance widgets.
        Returns `null` for unauthenticated callers instead of erroring so
        the widget renders neutrally.
      operationId: getBalance
      responses:
        "200":
          description: Balance.
          content:
            application/json:
              schema:
                type: object
                properties:
                  balance_usd:
                    type: [number, "null"]
                    example: 124.83
                  currency:
                    type: string
                    example: USD
                  updated_at:
                    type: string
                    format: date-time

  /healthz:
    get:
      tags: [ops]
      summary: Liveness probe
      operationId: getHealth
      security: []
      responses:
        "200":
          description: Healthy.
          content:
            application/json:
              schema:
                type: object
                properties:
                  status:
                    type: string
                    example: ok
                  uptimeSeconds:
                    type: integer

  /readyz:
    get:
      tags: [ops]
      summary: Readiness probe
      operationId: getReady
      security: []
      responses:
        "200":
          description: Ready.

components:
  securitySchemes:
    bearerAuth:
      type: http
      scheme: bearer
      bearerFormat: anvat_key
      description: |
        OpenAI-style: `Authorization: Bearer sk-anvat-...`. Used by the OpenAI
        SDK, Cursor, LangChain, LiteLLM, etc.
    anthropicKey:
      type: apiKey
      in: header
      name: x-api-key
      description: |
        Anthropic-style: `x-api-key: sk-anvat-...`. Used by the Anthropic SDK,
        Claude Code, and most Anthropic-compatible clients.

  headers:
    CostUsd:
      description: Metered cost of this request in USD (post-discount).
      schema:
        type: string
        example: "0.00427"
    RpmRemaining:
      description: Requests left in the rolling 60-second window.
      schema:
        type: integer
        example: 287

  schemas:
    AnthropicMessageRequest:
      type: object
      required: [model, max_tokens, messages]
      properties:
        model:
          type: string
          examples:
            - claude-opus-4-8
            - claude-sonnet-4-6
            - claude-haiku-4-5
        max_tokens:
          type: integer
          minimum: 1
          maximum: 200000
        messages:
          type: array
          items:
            type: object
            properties:
              role:
                type: string
                enum: [user, assistant]
              content:
                oneOf:
                  - type: string
                  - type: array
                    items:
                      type: object
        system:
          oneOf:
            - type: string
            - type: array
        stream:
          type: boolean
          default: false
        temperature:
          type: number
          minimum: 0
          maximum: 1
        tools:
          type: array
        tool_choice:
          type: object
        thinking:
          type: object
          properties:
            type:
              type: string
              enum: [enabled, disabled]
            budget_tokens:
              type: integer

    AnthropicMessageResponse:
      type: object
      properties:
        id:
          type: string
        type:
          type: string
          example: message
        role:
          type: string
          example: assistant
        model:
          type: string
        content:
          type: array
          items:
            type: object
        stop_reason:
          type: string
        usage:
          type: object
          properties:
            input_tokens:
              type: integer
            output_tokens:
              type: integer
            cache_creation_input_tokens:
              type: integer
            cache_read_input_tokens:
              type: integer

    ChatCompletionRequest:
      type: object
      required: [model, messages]
      properties:
        model:
          type: string
          examples:
            - gpt-5
            - gpt-5-mini
            - claude-sonnet-4-6
            - gemini-3-5-flash
            - deepseek-v4-pro
        messages:
          type: array
          items:
            type: object
            properties:
              role:
                type: string
                enum: [system, user, assistant, tool]
              content:
                oneOf:
                  - type: string
                  - type: array
        max_tokens:
          type: integer
        stream:
          type: boolean
        temperature:
          type: number
        tools:
          type: array
        tool_choice:
          oneOf:
            - type: string
            - type: object
        response_format:
          type: object

    ChatCompletionResponse:
      type: object
      properties:
        id:
          type: string
        object:
          type: string
          example: chat.completion
        created:
          type: integer
        model:
          type: string
        choices:
          type: array
          items:
            type: object
            properties:
              index:
                type: integer
              message:
                type: object
              finish_reason:
                type: string
        usage:
          type: object
          properties:
            prompt_tokens:
              type: integer
            completion_tokens:
              type: integer
            total_tokens:
              type: integer

    OpenAIModelList:
      type: object
      properties:
        object:
          type: string
          example: list
        data:
          type: array
          items:
            type: object
            properties:
              id:
                type: string
              object:
                type: string
                example: model
              owned_by:
                type: string
              context_length:
                type: integer
              max_output_tokens:
                type: integer

    AnthropicModelList:
      type: object
      properties:
        data:
          type: array
          items:
            type: object
            properties:
              type:
                type: string
                example: model
              id:
                type: string
              display_name:
                type: string
              created_at:
                type: string
                format: date-time
        has_more:
          type: boolean

    ErrorEnvelope:
      type: object
      properties:
        error:
          type: object
          properties:
            type:
              type: string
            message:
              type: string

  responses:
    Unauthorized:
      description: Missing or invalid API key.
      content:
        application/json:
          schema:
            $ref: "#/components/schemas/ErrorEnvelope"
    InsufficientCredit:
      description: Caller has insufficient credit balance to satisfy the request.
      content:
        application/json:
          schema:
            $ref: "#/components/schemas/ErrorEnvelope"
    RateLimited:
      description: Rate limit (RPM, concurrent, or rolling-spend) exhausted.
      headers:
        retry-after:
          schema:
            type: integer
            description: Seconds to wait before retrying.
      content:
        application/json:
          schema:
            $ref: "#/components/schemas/ErrorEnvelope"

externalDocs:
  description: Anvat setup guide + use cases
  url: https://anvat.app/guide
