# Your existing code — unchanged
from openai import OpenAI
# Before
client = OpenAI(api_key="sk-...")
# After — this is the only change
client = OpenAI(
api_key="vf-your-key",
base_url="https://api.vectorflo.co/v1"
)
# Everything else stays exactly the same
response = client.chat.completions.create(
model="gpt-4o",
messages=[{"role": "user", "content": "..."}],
# Optional: tag by feature for dashboard breakdown
extra_body={"feature_tag": "support-bot"}
)
// Your existing code — unchanged
const { OpenAI } = require("openai")
// Before
const client = new OpenAI({ apiKey: "sk-..." })
// After — this is the only change
const client = new OpenAI({
apiKey: "vf-your-key",
baseURL: "https://api.vectorflo.co/v1"
})
// Everything else stays exactly the same
const response = await client.chat.completions.create({
model: "gpt-4o",
messages: [{ role: "user", content: "..." }]
})
# Before
curl https://api.openai.com/v1/chat/completions \
-H "Authorization: Bearer sk-..." \
-H "Content-Type: application/json" \
-d '{"model":"gpt-4o","messages":[...]}'
# After — change base URL and key only
curl https://api.vectorflo.co/v1/chat/completions \
-H "Authorization: Bearer vf-your-key" \
-H "Content-Type: application/json" \
-d '{"model":"gpt-4o","messages":[...]}'
What happens automatically
✓Identical prompts served from Redis cache
✓Similar prompts matched via vector search
✓Simple queries routed to cheaper models
✓Failed requests retried with fallback model
✓Every request logged with cost + savings
Works with any framework
LangChain
LlamaIndex
FastAPI
Express
Next.js
Django
average time to integrate
4 min
from signup to first optimised request