Introduces a provider abstraction so Axon can proxy to either Claude SDK
(existing behavior) or a vLLM-compatible endpoint. Toggled via
AXON_PROVIDER env var ("claude" | "vllm"). When vllm, requests pass
through as-is (no prompt translation), session pool and OAuth are skipped.
Closes openova-io/openova#36
Co-Authored-By: Claude Opus 4.6 <noreply@anthropic.com>
53 lines
977 B
YAML
53 lines
977 B
YAML
image:
|
|
repository: ghcr.io/openova-io/openova/axon
|
|
tag: latest
|
|
pullPolicy: IfNotPresent
|
|
pullSecretName: ghcr-pull-secret
|
|
|
|
axon:
|
|
port: 3000
|
|
provider: claude # "claude" or "vllm"
|
|
defaultModel: claude-sonnet-4-6
|
|
poolSize: 2
|
|
conversationTtl: 604800
|
|
existingSecret: axon-secrets
|
|
claudeAuthSecret: axon-claude-auth
|
|
tokenRefresh:
|
|
enabled: false
|
|
schedule: "0 */4 * * *" # Every 4 hours
|
|
vllm:
|
|
baseUrl: ""
|
|
defaultModel: qwen3-coder
|
|
existingSecret: axon-vllm-secret
|
|
|
|
valkey:
|
|
enabled: true
|
|
image: valkey/valkey:8-alpine
|
|
storage: 1Gi
|
|
|
|
ingress:
|
|
enabled: true
|
|
className: traefik
|
|
host: api.openova.io
|
|
path: /axon
|
|
tls:
|
|
enabled: true
|
|
clusterIssuer: letsencrypt-prod
|
|
secretName: api-openova-io-tls
|
|
|
|
resources:
|
|
axon:
|
|
requests:
|
|
cpu: 50m
|
|
memory: 128Mi
|
|
limits:
|
|
cpu: 2000m
|
|
memory: 2Gi
|
|
valkey:
|
|
requests:
|
|
cpu: 10m
|
|
memory: 32Mi
|
|
limits:
|
|
cpu: 500m
|
|
memory: 256Mi
|