gpu-workload/t5/kubernetes/application.yaml

apiVersion: apps/v1 kind: Deployment metadata: name: fastdash labels: app: fastdash spec: replicas: 1 selector: matchLabels: app: fastdash template: metadata: labels: app: fastdash spec: containers: - name: main image: gcr.io/lustrous-baton-363720/apps/fastdash:latest # image: APP_IMAGE imagePullPolicy: IfNotPresent env: - name: MODEL_PREDICTION value: "http://t5-inference:8080/predictions/t5-small/1.0" - name: MODEL_MANAGEMENT value: "http://t5-inference:8081/predictions/models/t5-small" resources: limits: cpu: 250m memory: 512Mi ephemeral-storage: 1Gi requests: cpu: 250m memory: 512Mi ephemeral-storage: 1Gi ports: - containerPort: 8050 name: http readinessProbe: httpGet: path: / port: http --- apiVersion: v1 kind: Service metadata: name: fastdash labels: app: fastdash spec: type: ClusterIP selector: app: fastdash ports: - port: 8050 name: http targetPort: http

gpu-workload/t5/kubernetes/application.yaml (56 lines of code) (raw):