ai-ml/t5-model-serving/kubernetes/serving-cpu.yaml (85 lines of code) (raw):
# Copyright 2023 Google LLC
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# https://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
---
apiVersion: apps/v1
kind: Deployment
metadata:
name: t5-inference
labels:
model: t5
version: v1.0
machine: cpu
spec:
replicas: 1
selector:
matchLabels:
model: t5
version: v1.0
machine: cpu
template:
metadata:
labels:
model: t5
version: v1.0
machine: cpu
spec:
securityContext:
fsGroup: 1000
runAsUser: 1000
runAsGroup: 1000
containers:
- name: inference
image: us-central1-docker.pkg.dev/PROJECT_ID/models/t5-small:1.0-cpu
imagePullPolicy: IfNotPresent
args: ["torchserve", "--start", "--foreground"]
resources:
limits:
cpu: "3000m"
memory: 16Gi
ephemeral-storage: 10Gi
requests:
cpu: "3000m"
memory: 16Gi
ephemeral-storage: 10Gi
ports:
- containerPort: 8080
name: http
- containerPort: 8081
name: management
- containerPort: 8082
name: metrics
readinessProbe:
httpGet:
path: /ping
port: http
initialDelaySeconds: 120
failureThreshold: 10
livenessProbe:
httpGet:
path: /models/t5-small
port: management
initialDelaySeconds: 150
periodSeconds: 5
---
apiVersion: v1
kind: Service
metadata:
name: t5-inference
labels:
model: t5
version: v1.0
machine: cpu
spec:
type: ClusterIP
selector:
model: t5
version: v1.0
machine: cpu
ports:
- port: 8080
name: http
targetPort: http
- port: 8081
name: management
targetPort: management
- port: 8082
name: metrics
targetPort: metrics