ai-ml/t5-model-serving/kubernetes/serving-cpu.yaml (85 lines of code) (raw):

# Copyright 2023 Google LLC # # Licensed under the Apache License, Version 2.0 (the "License"); # you may not use this file except in compliance with the License. # You may obtain a copy of the License at # # https://www.apache.org/licenses/LICENSE-2.0 # # Unless required by applicable law or agreed to in writing, software # distributed under the License is distributed on an "AS IS" BASIS, # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. # See the License for the specific language governing permissions and # limitations under the License. --- apiVersion: apps/v1 kind: Deployment metadata: name: t5-inference labels: model: t5 version: v1.0 machine: cpu spec: replicas: 1 selector: matchLabels: model: t5 version: v1.0 machine: cpu template: metadata: labels: model: t5 version: v1.0 machine: cpu spec: securityContext: fsGroup: 1000 runAsUser: 1000 runAsGroup: 1000 containers: - name: inference image: us-central1-docker.pkg.dev/PROJECT_ID/models/t5-small:1.0-cpu imagePullPolicy: IfNotPresent args: ["torchserve", "--start", "--foreground"] resources: limits: cpu: "3000m" memory: 16Gi ephemeral-storage: 10Gi requests: cpu: "3000m" memory: 16Gi ephemeral-storage: 10Gi ports: - containerPort: 8080 name: http - containerPort: 8081 name: management - containerPort: 8082 name: metrics readinessProbe: httpGet: path: /ping port: http initialDelaySeconds: 120 failureThreshold: 10 livenessProbe: httpGet: path: /models/t5-small port: management initialDelaySeconds: 150 periodSeconds: 5 --- apiVersion: v1 kind: Service metadata: name: t5-inference labels: model: t5 version: v1.0 machine: cpu spec: type: ClusterIP selector: model: t5 version: v1.0 machine: cpu ports: - port: 8080 name: http targetPort: http - port: 8081 name: management targetPort: management - port: 8082 name: metrics targetPort: metrics