企业级 Java AI 应用架构设计
构建稳定、可扩展的企业级 AI 应用需要考虑架构、性能、安全等多方面因素。
架构原则
1. 分层架构
┌─────────────────────────────────────┐
│ Presentation Layer (API Gateway) │
├─────────────────────────────────────┤
│ Application Layer (Services) │
├─────────────────────────────────────┤
│ Domain Layer (AI Core) │
├─────────────────────────────────────┤
│ Infrastructure Layer (Data/ML) │
└─────────────────────────────────────┘各层职责:
- Presentation Layer: API 网关、认证、限流
- Application Layer: 业务逻辑、编排、缓存
- Domain Layer: AI 模型、Prompt 工程、RAG
- Infrastructure Layer: 向量存储、模型部署、监控
2. 六边形架构
java
// 领域模型不依赖基础设施
@Service
public class ChatService {
private final ChatModel model;
private final ChatMemory memory;
private final VectorStore store;
public ChatService(ChatModel model, ChatMemory memory, VectorStore store) {
this.model = model;
this.memory = memory;
this.store = store;
}
public String chat(String userId, String message) {
// 依赖注入,不依赖具体实现
List<Document> docs = store.search(message);
memory.add(userId, message);
return model.generate(docs, memory.get(userId));
}
}核心组件设计
1. 模型服务层
java
@Service
public class ModelService {
private final Map<String, ChatModel> models;
public ModelService(List<ChatModel> modelList) {
this.models = modelList.stream()
.collect(Collectors.toMap(
this::getModelName,
Function.identity()
));
}
@Cacheable("model-response")
public String generate(String modelName, String prompt) {
ChatModel model = models.get(modelName);
if (model == null) {
throw new ModelNotFoundException(modelName);
}
return model.generate(prompt);
}
// 模型路由
public String routeAndGenerate(String prompt) {
if (prompt.contains("代码")) {
return generate("gpt-4", prompt);
} else if (prompt.contains("翻译")) {
return generate("gpt-3.5-turbo", prompt);
}
return generate("gpt-3.5-turbo", prompt);
}
}2. 向量存储层
java
@Service
public class VectorStoreService {
private final EmbeddingModel embeddingModel;
private final EmbeddingStore<TextSegment> store;
// 批量索引
@Async
public void batchIndex(List<String> documents) {
documents.parallelStream().forEach(doc -> {
TextSegment segment = TextSegment.from(doc);
Embedding embedding = embeddingModel.embed(segment).content();
store.add(embedding, segment);
});
}
// 混合检索
public List<Document> hybridSearch(String query, int topK) {
// 语义检索
List<EmbeddingMatch<TextSegment>> semanticResults =
store.findRelevant(query, topK);
// 关键词检索(可选)
List<Document> keywordResults = keywordSearch(query, topK);
// 融合排序
return mergeAndRank(semanticResults, keywordResults);
}
}3. 缓存层
java
@Service
public class CacheService {
private final CacheManager cacheManager;
// 多级缓存
@Cacheable(value = "llm-response", key = "#prompt.hashCode()")
public String getCache(String prompt) {
// L1: 本地缓存(Caffeine)
// L2: Redis 缓存
// L3: 数据库缓存
}
// 缓存预热
@Scheduled(fixedRate = 3600000)
public void warmUpCache() {
List<String> hotPrompts = getHotPrompts();
hotPrompts.forEach(prompt -> getCache(prompt));
}
}4. 监控层
java
@Component
public class AIClMetrics {
private final MeterRegistry registry;
public void recordModelUsage(String modelName, long duration) {
Timer.Sample sample = Timer.start(registry);
sample.stop(Timer.builder("ai.model.usage")
.tag("model", modelName)
.register(registry));
}
public void recordTokenUsage(String modelName, int inputTokens, int outputTokens) {
registry.counter("ai.tokens.input",
"model", modelName).increment(inputTokens);
registry.counter("ai.tokens.output",
"model", modelName).increment(outputTokens);
}
public void recordCost(String modelName, double cost) {
registry.gauge("ai.cost", cost,
Tags.of("model", modelName));
}
}安全设计
1. API 认证
java
@Configuration
public class SecurityConfig {
@Bean
public SecurityFilterChain filterChain(HttpSecurity http) throws Exception {
http
.authorizeHttpRequests(auth -> auth
.requestMatchers("/api/v1/chat/public/**").permitAll()
.requestMatchers("/api/v1/chat/**").authenticated()
)
.oauth2ResourceServer(oauth2 -> oauth2
.jwt(jwt -> jwt.jwtDecoder(jwtDecoder()))
);
return http.build();
}
}2. 内容过滤
java
@Service
public class ContentFilterService {
private final ModerationModel moderationModel;
public boolean isSafe(String content) {
ModerationResult result = moderationModel.moderate(content);
return !result.isFlagged();
}
public String filterSensitive(String input) {
// 脱敏处理
return input.replaceAll("\\d{11}", "***********")
.replaceAll("\\S+@\\S+\\.\\S+", "***@***.***");
}
}3. 限流
java
@Service
public class RateLimitService {
private final RateLimiter limiter = RateLimiter.create(10.0); // 10 QPS
public <T> T executeWithRateLimit(Supplier<T> supplier) {
if (!limiter.tryAcquire()) {
throw new RateLimitExceededException();
}
return supplier.get();
}
// 用户级限流
@Cacheable("user-rate-limit")
public boolean checkUserLimit(String userId) {
int count = getUserRequestCount(userId);
return count < 100; // 每分钟 100 次
}
}性能优化
1. 异步处理
java
@Service
public class AsyncChatService {
@Async
public CompletableFuture<String> chatAsync(String prompt) {
return CompletableFuture.supplyAsync(() -> {
return model.generate(prompt);
});
}
// 批量处理
public List<String> batchChat(List<String> prompts) {
return prompts.parallelStream()
.map(this::chatAsync)
.map(CompletableFuture::join)
.collect(Collectors.toList());
}
}2. 流式输出
java
@RestController
public class StreamController {
@GetMapping(value = "/stream", produces = MediaType.TEXT_EVENT_STREAM_VALUE)
public Flux<String> streamChat(@RequestParam String message) {
return Flux.create(sink -> {
TokenStream stream = model.generate(message);
stream.onNext(token -> sink.next(token))
.onComplete(() -> sink.complete())
.onError(sink::error)
.start();
});
}
}3. 连接池
java
@Configuration
public class ModelPoolConfig {
@Bean
public ObjectPool<ChatModel> modelPool() {
return new GenericObjectPool<>(new BasePooledObjectFactory<ChatModel>() {
@Override
public ChatModel create() {
return OpenAiChatModel.builder().apiKey(apiKey).build();
}
@Override
public PooledObject<ChatModel> wrap(ChatModel model) {
return new DefaultPooledObject<>(model);
}
});
}
}部署架构
单体架构
┌─────────────┐
│ Nginx │
└──────┬──────┘
│
┌──────▼────────────────┐
│ Spring Boot App │
│ - Model Service │
│ - Vector Store │
│ - Cache │
└──────────────────────┘
│
┌──────▼────────────────┐
│ PostgreSQL │
│ + PgVector │
└──────────────────────┘微服务架构
┌──────────┐
│ API GW │
└────┬─────┘
┌──────────────┼──────────────┐
│ │ │
┌────▼────┐ ┌────▼────┐ ┌───▼─────┐
│ Chat │ │ RAG │ │ Model │
│ Service │ │ Service │ │ Service │
└────┬────┘ └────┬────┘ └────┬────┘
│ │ │
┌────▼────────────────▼────────────▼────┐
│ Vector Store Cluster │
│ (Milvus / Weaviate / Qdrant) │
└───────────────────────────────────────┘监控和日志
1. Prometheus + Grafana
yaml
# Prometheus 配置
scrape_configs:
- job_name: 'ai-application'
static_configs:
- targets: ['localhost:8080']2. ELK Stack
java
@Slf4j
@Service
public class LoggingService {
public void logPrompt(String userId, String prompt, String response) {
log.info("AI_CHAT | userId: {} | prompt: {} | response: {} | tokens: {}",
userId,
prompt.substring(0, 100),
response.substring(0, 100),
calculateTokens(prompt + response)
);
}
}3. Tracing(OpenTelemetry)
java
@Configuration
public class TracingConfig {
@Bean
public OpenTelemetry openTelemetry() {
return OpenTelemetrySdk.builder()
.addTracerProvider(SdkTracerProvider.builder()
.addSpanProcessor(BatchSpanProcessor.builder(
OtlpGrpcSpanExporter.builder()
.setEndpoint("http://jaeger:4317")
.build()
).build())
.build())
.buildAndRegisterGlobal();
}
}成本控制
1. Token 计费
java
@Service
public class CostService {
private final Map<String, Double> pricing = Map.of(
"gpt-4", 0.03 / 1000, // $0.03 per 1K tokens
"gpt-3.5-turbo", 0.002 / 1000
);
public double calculateCost(String model, int inputTokens, int outputTokens) {
return pricing.getOrDefault(model, 0.0) *
(inputTokens + outputTokens);
}
@Scheduled(cron = "0 0 * * * *")
public void reportDailyCost() {
double total = getDailyTokenUsage().entrySet().stream()
.mapToDouble(e -> calculateCost(e.getKey(), e.getValue(), 0))
.sum();
log.info("Daily cost: ${}", total);
}
}2. 预算告警
java
@Service
public class BudgetAlertService {
private static final double DAILY_BUDGET = 100.0;
public void checkBudget(double cost) {
double current = getCurrentMonthCost();
if (current + cost > DAILY_BUDGET) {
alert("Budget exceeded! Current: $" + current);
// 可以切换到更便宜的模型
}
}
}最佳实践总结
- 分层架构 - 清晰的职责划分
- 缓存优先 - 减少重复调用
- 异步处理 - 提高吞吐量
- 监控完备 - 及时发现问题
- 安全第一 - 认证、限流、内容过滤
- 成本控制 - 实时监控和告警
- 高可用 - 故障转移和降级
更新时间: 2026-03-14 分类: Java AI 平台 | 架构设计