gpu-workload/t5/kubernetes/application.yaml (56 lines of code) (raw):
apiVersion: apps/v1
kind: Deployment
metadata:
name: fastdash
labels:
app: fastdash
spec:
replicas: 1
selector:
matchLabels:
app: fastdash
template:
metadata:
labels:
app: fastdash
spec:
containers:
- name: main
image: gcr.io/lustrous-baton-363720/apps/fastdash:latest
# image: APP_IMAGE
imagePullPolicy: IfNotPresent
env:
- name: MODEL_PREDICTION
value: "http://t5-inference:8080/predictions/t5-small/1.0"
- name: MODEL_MANAGEMENT
value: "http://t5-inference:8081/predictions/models/t5-small"
resources:
limits:
cpu: 250m
memory: 512Mi
ephemeral-storage: 1Gi
requests:
cpu: 250m
memory: 512Mi
ephemeral-storage: 1Gi
ports:
- containerPort: 8050
name: http
readinessProbe:
httpGet:
path: /
port: http
---
apiVersion: v1
kind: Service
metadata:
name: fastdash
labels:
app: fastdash
spec:
type: ClusterIP
selector:
app: fastdash
ports:
- port: 8050
name: http
targetPort: http