Deploy LangChain applications to production with LangServe, Docker, and cloud platforms (Cloud Run, AWS Lambda). Trigger: "deploy langchain", "langchain production deploy", "langchain docker", "langchain cloud run", "LangServe".
Deploy LangChain chains and agents as APIs using LangServe (Python) or custom Express/Fastify servers (Node.js). Covers containerization, cloud deployment, health checks, and production observability.
# serve.py
from fastapi import FastAPI
from langserve import add_routes
from langchain_openai import ChatOpenAI
from langchain_core.prompts import ChatPromptTemplate
from langchain_core.output_parsers import StrOutputParser
app = FastAPI(title="LangChain API", version="1.0.0")
# Define chains
summarize_chain = (
ChatPromptTemplate.from_template("Summarize in 3 sentences: {text}")
| ChatOpenAI(model="gpt-4o-mini", temperature=0)
| StrOutputParser()
)
qa_chain = (
ChatPromptTemplate.from_messages([
("system", "Answer based on the given context only."),
("human", "Context: {context}\n\nQuestion: {question}"),
])
| ChatOpenAI(model="gpt-4o-mini")
| StrOutputParser()
)
# Auto-generates /invoke, /batch, /stream, /input_schema, /output_schema
add_routes(app, summarize_chain, path="/summarize")
add_routes(app, qa_chain, path="/qa")
@app.get("/health")
async def health():
return {"status": "healthy"}
if __name__ == "__main__":
import uvicorn
uvicorn.run(app, host="0.0.0.0", port=8000)
// server.ts
import express from "express";
import { ChatOpenAI } from "@langchain/openai";
import { ChatPromptTemplate } from "@langchain/core/prompts";
import { StringOutputParser } from "@langchain/core/output_parsers";
import "dotenv/config";
const app = express();
app.use(express.json());
const model = new ChatOpenAI({ model: "gpt-4o-mini" });
const summarizeChain = ChatPromptTemplate.fromTemplate("Summarize: {text}")
.pipe(model)
.pipe(new StringOutputParser());
app.post("/api/summarize", async (req, res) => {
try {
const result = await summarizeChain.invoke({ text: req.body.text });
res.json({ result });
} catch (error: any) {
res.status(500).json({ error: error.message });
}
});
// Streaming endpoint
app.post("/api/summarize/stream", async (req, res) => {
res.setHeader("Content-Type", "text/event-stream");
res.setHeader("Cache-Control", "no-cache");
const stream = await summarizeChain.stream({ text: req.body.text });
for await (const chunk of stream) {
res.write(`data: ${JSON.stringify({ chunk })}\n\n`);
}
res.write("data: [DONE]\n\n");
res.end();
});
app.get("/health", (_req, res) => res.json({ status: "healthy" }));
app.listen(8000, () => console.log("Server running on :8000"));
# Multi-stage build for Node.js
FROM node:20-slim AS builder
WORKDIR /app
COPY package*.json ./
RUN npm ci --production=false
COPY . .
RUN npm run build
FROM node:20-slim
WORKDIR /app
COPY --from=builder /app/dist ./dist
COPY --from=builder /app/node_modules ./node_modules
COPY package*.json ./
ENV NODE_ENV=production
ENV LANGSMITH_TRACING=true
EXPOSE 8000
HEALTHCHECK --interval=30s --timeout=5s \
CMD curl -f http://localhost:8000/health || exit 1
CMD ["node", "dist/server.js"]