diff --git a/redhat/ocp4/4.13/4.13.gpu.llm.md b/redhat/ocp4/4.13/4.13.gpu.llm.md index bc14f23f..2b32ca63 100644 --- a/redhat/ocp4/4.13/4.13.gpu.llm.md +++ b/redhat/ocp4/4.13/4.13.gpu.llm.md @@ -1217,6 +1217,197 @@ oc create -f custom-manifests/opendatahub/kserve-dsc.yaml # modelmeshserving to Removed, can not active, do not know why. +``` + +### deploy model + +- https://github.com/wangzheng422/caikit-tgis-serving/blob/main/demo/kserve/deploy-remove.md + +```bash + +# on helper +cd ~/tmp + +cd caikit-tgis-serving/demo/kserve + +export TARGET_OPERATOR=rhods + +source ./scripts/env.sh +source ./scripts/utils.sh +export TARGET_OPERATOR_TYPE=$(getOpType $TARGET_OPERATOR) +export TARGET_OPERATOR_NS=$(getOpNS) +export KSERVE_OPERATOR_NS=$(getKserveNS) + +# Deploy the MinIO image that contains the LLM model. + +ACCESS_KEY_ID=admin +SECRET_ACCESS_KEY=password +MINIO_NS=minio + +oc new-project ${MINIO_NS} +oc apply -f ./custom-manifests/minio/minio.yaml -n ${MINIO_NS} +sed "s//$MINIO_NS/g" ./custom-manifests/minio/minio-secret.yaml | tee ./minio-secret-current.yaml | oc -n ${MINIO_NS} apply -f - +sed "s//$MINIO_NS/g" ./custom-manifests/minio/serviceaccount-minio.yaml | tee ./serviceaccount-minio-current.yaml | oc -n ${MINIO_NS} apply -f - + +# Endpoint: http://10.133.0.36:9000 http://127.0.0.1:9000 +# Browser Access: +# http://10.133.0.36:9000 http://127.0.0.1:9000 +# Object API (Amazon S3 compatible): +# Go: https://docs.min.io/docs/golang-client-quickstart-guide +# Java: https://docs.min.io/docs/java-client-quickstart-guide +# Python: https://docs.min.io/docs/python-client-quickstart-guide +# JavaScript: https://docs.min.io/docs/javascript-client-quickstart-guide +# .NET: https://docs.min.io/docs/dotnet-client-quickstart-guide +# IAM initialization complete + + +# Deploy the LLM model with Caikit+TGIS Serving runtime +export TEST_NS=kserve-demo +oc new-project ${TEST_NS} + +oc apply -f ./custom-manifests/caikit/caikit-tgis-servingruntime.yaml -n ${TEST_NS} + +# to this step, pod is not created + +oc apply -f ./minio-secret-current.yaml -n ${TEST_NS} +oc create -f ./serviceaccount-minio-current.yaml -n ${TEST_NS} + +# change ths url to s3://minio.minio.svc.cluster.local:9000/modelmesh-example-models/llm/models/flan-t5-small-caikit +oc apply -f ./custom-manifests/caikit/caikit-tgis-isvc.yaml -n ${TEST_NS} + + +# Error from server (storageUri, must be one of: [gs://, s3://, pvc://, file://, https://, http://, hdfs://, webhdfs://] or match https://{}.blob.core.windows.net/{}/{} or be an absolute or relative local path. StorageUri [proto://path/to/model] is not supported.): error when creating "./custom-manifests/caikit/caikit-tgis-isvc.yaml": admission webhook "inferenceservice.kserve-webhook-server.validator" denied the request: storageUri, must be one of: [gs://, s3://, pvc://, file://, https://, http://, hdfs://, webhdfs://] or match https://{}.blob.core.windows.net/{}/{} or be an absolute or relative local path. StorageUri [proto://path/to/model] is not supported. + +# oc delete -f ./custom-manifests/caikit/caikit-tgis-isvc.yaml -n ${TEST_NS} + + +# to this step, the pod is created. + +oc get pod -n kserve-demo +# NAME READY STATUS RESTARTS AGE +# caikit-tgis-example-isvc-predictor-00001-deployment-dcbfddrk2hv 4/4 Running 0 30m + +# 这个pod里面有4个 container +# kserve-container 是真正运行llm的容器,镜像是text-generation-inference,他能看到宿主机上所有的gpu,但是奇怪的是,yaml里面,并没有给他授权,后面再慢慢研究。这个容器,向外暴露了grpc, http服务,但是没有对外声明服务。 +# log end with +# Shard 0: Server started at unix:///tmp/text-generation-server-0 +# 2023-11-19T13:51:48.465888Z INFO text_generation_launcher: Shard 0 ready in 3.107531075s +# 2023-11-19T13:51:48.565905Z INFO text_generation_launcher: Starting Router +# 2023-11-19T13:51:48.606326Z INFO text_generation_router: Token decoder: Some(Metaspace(Metaspace { replacement: '▁', add_prefix_space: true, str_rep: "▁" })) +# 2023-11-19T13:51:48.615392Z INFO text_generation_router: Connected +# 2023-11-19T13:51:48.615840Z INFO text_generation_router::server: Shard model info: is_seq2seq = true, eos_token_id = 1, use_padding = true +# 2023-11-19T13:51:48.662055Z INFO text_generation_router::grpc_server: gRPC server started on port 8033 +# 2023-11-19T13:51:50.663300Z INFO text_generation_router::server: HTTP server started on port 3000 + + +# transformer-container 这个运行了一个caikit-tgis-serving镜像的容器,作用应该是提供grpc服务,然后调用kserve-container,这个容器暴露了user-http服务,并且对外声明了。 并且,这个容器应该是看不到GPU的. +# log end with : "Uvicorn running on http://0.0.0.0:8080 (Press CTRL+C to quit)" + +# transformer-container的作用,还要再研究一下。 + +oc get isvc/caikit-tgis-example-isvc -n ${TEST_NS} +# NAME URL READY PREV LATEST PREVROLLEDOUTREVISION LATESTREADYREVISION AGE +# caikit-tgis-example-isvc https://caikit-tgis-example-isvc-predictor-kserve-demo.apps.demo-gpu.wzhlab.top True 100 caikit-tgis-example-isvc-predictor-00001 79m + +oc get isvc/caikit-tgis-example-isvc -n ${TEST_NS} -o yaml +# apiVersion: serving.kserve.io/v1beta1 +# kind: InferenceService +# metadata: +# annotations: +# kubectl.kubernetes.io/last-applied-configuration: | +# {"apiVersion":"serving.kserve.io/v1beta1","kind":"InferenceService","metadata":{"annotations":{"serving.knative.openshift.io/enablePassthrough":"true","sidecar.istio +# .io/inject":"true","sidecar.istio.io/rewriteAppHTTPProbers":"true"},"name":"caikit-tgis-example-isvc","namespace":"kserve-demo"},"spec":{"predictor":{"model":{"modelFormat +# ":{"name":"caikit"},"runtime":"caikit-tgis-runtime","storageUri":"s3://modelmesh-example-models/llm/models/flan-t5-small-caikit"},"serviceAccountName":"sa"}}} +# serving.knative.openshift.io/enablePassthrough: "true" +# sidecar.istio.io/inject: "true" +# sidecar.istio.io/rewriteAppHTTPProbers: "true" +# creationTimestamp: "2023-11-19T13:38:56Z" +# finalizers: +# - inferenceservice.finalizers +# generation: 1 +# name: caikit-tgis-example-isvc +# namespace: kserve-demo +# resourceVersion: "5641996" +# uid: 3683461f-0976-4dcd-8ec2-d5eabec6e3af +# spec: +# predictor: +# model: +# modelFormat: +# name: caikit +# name: "" +# resources: {} +# runtime: caikit-tgis-runtime +# storageUri: s3://modelmesh-example-models/llm/models/flan-t5-small-caikit +# serviceAccountName: sa +# status: +# address: +# url: http://caikit-tgis-example-isvc-predictor.kserve-demo.svc.cluster.local +# components: +# predictor: +# address: +# url: http://caikit-tgis-example-isvc-predictor.kserve-demo.svc.cluster.local +# latestCreatedRevision: caikit-tgis-example-isvc-predictor-00001 +# latestReadyRevision: caikit-tgis-example-isvc-predictor-00001 +# latestRolledoutRevision: caikit-tgis-example-isvc-predictor-00001 +# traffic: +# - latestRevision: true +# percent: 100 +# revisionName: caikit-tgis-example-isvc-predictor-00001 +# url: https://caikit-tgis-example-isvc-predictor-kserve-demo.apps.demo-gpu.wzhlab.top +# conditions: +# - lastTransitionTime: "2023-11-19T13:59:17Z" +# status: "True" +# type: IngressReady +# - lastTransitionTime: "2023-11-19T13:59:16Z" +# severity: Info +# status: "True" +# type: LatestDeploymentReady +# - lastTransitionTime: "2023-11-19T13:59:16Z" +# severity: Info +# status: "True" +# type: PredictorConfigurationReady +# - lastTransitionTime: "2023-11-19T13:59:16Z" +# status: "True" +# type: PredictorReady +# - lastTransitionTime: "2023-11-19T13:59:16Z" +# severity: Info +# status: "True" +# type: PredictorRouteReady +# - lastTransitionTime: "2023-11-19T13:59:17Z" +# status: "True" +# type: Ready +# - lastTransitionTime: "2023-11-19T13:59:16Z" +# severity: Info +# status: "True" +# type: RoutesReady +# modelStatus: +# copies: +# failedCopies: 0 +# totalCopies: 1 +# states: +# activeModelState: Loaded +# targetModelState: Loaded +# transitionStatus: UpToDate +# observedGeneration: 1 +# url: https://caikit-tgis-example-isvc-predictor-kserve-demo.apps.demo-gpu.wzhlab.top + + +# Perform inference with Remote Procedure Call (gPRC) commands. +oc get ingresses.config/cluster -ojson | grep ingress.operator.openshift.io/default-enable-http2 +# empty return + +oc annotate ingresses.config/cluster ingress.operator.openshift.io/default-enable-http2=true + +# https://github.com/fullstorydev/grpcurl/releases +curl -sSL "https://github.com/fullstorydev/grpcurl/releases/download/v1.8.7/grpcurl_1.8.7_linux_x86_64.tar.gz" | sudo tar -xz -C /usr/local/bin + + +export KSVC_HOSTNAME=$(oc get ksvc caikit-tgis-example-isvc-predictor -n ${TEST_NS} -o jsonpath='{.status.url}' | cut -d'/' -f3) +# export KSVC_HOSTNAME=$(oc get ksvc caikit-tgis-example-isvc-predictor -n ${TEST_NS} -o jsonpath='{.status.url}') +echo ${KSVC_HOSTNAME} +grpcurl -vv -insecure -d '{"text": "At what temperature does liquid Nitrogen boil?"}' -H "mm-model-id: flan-t5-small-caikit" ${KSVC_HOSTNAME}:443 caikit.runtime.Nlp.NlpService/TextGenerationTaskPredict + + ``` diff --git a/redhat/ocp4/4.13/files/ocp.ds.caikit.tgis.pod.yaml b/redhat/ocp4/4.13/files/ocp.ds.caikit.tgis.pod.yaml new file mode 100644 index 00000000..e381fe26 --- /dev/null +++ b/redhat/ocp4/4.13/files/ocp.ds.caikit.tgis.pod.yaml @@ -0,0 +1,1039 @@ +kind: Pod +apiVersion: v1 +metadata: + generateName: caikit-tgis-example-isvc-predictor-00001-deployment-dcbfddb- + annotations: + openshift.io/scc: restricted-v2 + internal.serving.kserve.io/storage-initializer-sourceuri: 's3://modelmesh-example-models/llm/models/flan-t5-small-caikit' + sidecar.istio.io/interceptionMode: REDIRECT + serving.knative.dev/creator: 'system:serviceaccount:redhat-ods-applications:kserve-controller-manager' + prometheus.io/port: '15020' + k8s.ovn.org/pod-networks: >- + {"default":{"ip_addresses":["10.133.0.59/23"],"mac_address":"0a:58:0a:85:00:3b","gateway_ips":["10.133.0.1"],"ip_address":"10.133.0.59/23","gateway_ip":"10.133.0.1"}} + k8s.v1.cni.cncf.io/networks: v2-4-istio-cni + prometheus.io/path: /stats/prometheus + traffic.sidecar.istio.io/excludeInboundPorts: '15090,8444,8022,15021' + sidecar.istio.io/inject: 'true' + serving.kserve.io/enable-prometheus-scraping: 'false' + sidecar.istio.io/status: >- + {"initContainers":null,"containers":["istio-proxy"],"volumes":["workload-socket","credential-socket","workload-certs","istio-envoy","istio-data","istio-podinfo","istio-token","istiod-ca-cert"],"imagePullSecrets":null,"revision":"minimal"} + traffic.sidecar.istio.io/includeOutboundIPRanges: '*' + serving.kserve.io/enable-metric-aggregation: 'false' + kubectl.kubernetes.io/default-logs-container: kserve-container + autoscaling.knative.dev/min-scale: '1' + seccomp.security.alpha.kubernetes.io/pod: runtime/default + kubectl.kubernetes.io/default-container: kserve-container + k8s.v1.cni.cncf.io/network-status: |- + [{ + "name": "ovn-kubernetes", + "interface": "eth0", + "ips": [ + "10.133.0.59" + ], + "mac": "0a:58:0a:85:00:3b", + "default": true, + "dns": {} + },{ + "name": "kserve-demo/v2-4-istio-cni", + "dns": {} + }] + traffic.sidecar.istio.io/includeInboundPorts: '*' + prometheus.io/scrape: 'true' + sidecar.istio.io/rewriteAppHTTPProbers: 'true' + autoscaling.knative.dev/class: kpa.autoscaling.knative.dev + resourceVersion: '5641919' + name: caikit-tgis-example-isvc-predictor-00001-deployment-dcbfddrk2hv + uid: f905d861-f9e3-4596-936e-de5c931299f1 + creationTimestamp: '2023-11-19T13:39:03Z' + managedFields: + - manager: kube-controller-manager + operation: Update + apiVersion: v1 + time: '2023-11-19T13:39:03Z' + fieldsType: FieldsV1 + fieldsV1: + 'f:metadata': + 'f:annotations': + .: {} + 'f:autoscaling.knative.dev/class': {} + 'f:autoscaling.knative.dev/min-scale': {} + 'f:internal.serving.kserve.io/storage-initializer-sourceuri': {} + 'f:serving.knative.dev/creator': {} + 'f:sidecar.istio.io/inject': {} + 'f:sidecar.istio.io/rewriteAppHTTPProbers': {} + 'f:generateName': {} + 'f:labels': + 'f:service.istio.io/canonical-name': {} + 'f:pod-template-hash': {} + 'f:component': {} + 'f:app': {} + .: {} + 'f:serving.kserve.io/inferenceservice': {} + 'f:serving.knative.dev/configurationGeneration': {} + 'f:service.istio.io/canonical-revision': {} + 'f:serving.knative.dev/configurationUID': {} + 'f:serving.knative.dev/serviceUID': {} + 'f:serving.knative.dev/revision': {} + 'f:serving.knative.dev/service': {} + 'f:serving.knative.dev/revisionUID': {} + 'f:serving.knative.dev/configuration': {} + 'f:ownerReferences': + .: {} + 'k:{"uid":"d062c511-8daf-416f-b247-db730df3e2d1"}': {} + 'f:spec': + 'f:containers': + 'k:{"name":"kserve-container"}': + 'f:image': {} + 'f:terminationMessagePolicy': {} + .: {} + 'f:resources': {} + 'f:args': {} + 'f:lifecycle': + .: {} + 'f:preStop': + .: {} + 'f:httpGet': + .: {} + 'f:path': {} + 'f:port': {} + 'f:scheme': {} + 'f:command': {} + 'f:env': + .: {} + 'k:{"name":"K_CONFIGURATION"}': + .: {} + 'f:name': {} + 'f:value': {} + 'k:{"name":"K_REVISION"}': + .: {} + 'f:name': {} + 'f:value': {} + 'k:{"name":"K_SERVICE"}': + .: {} + 'f:name': {} + 'f:value': {} + 'k:{"name":"TRANSFORMERS_CACHE"}': + .: {} + 'f:name': {} + 'f:value': {} + 'f:terminationMessagePath': {} + 'f:imagePullPolicy': {} + 'f:name': {} + 'k:{"name":"queue-proxy"}': + 'f:image': {} + 'f:terminationMessagePolicy': {} + .: {} + 'f:resources': + .: {} + 'f:requests': + .: {} + 'f:cpu': {} + 'f:env': + 'k:{"name":"SERVING_ENABLE_PROBE_REQUEST_LOG"}': + .: {} + 'f:name': {} + 'f:value': {} + 'k:{"name":"REVISION_TIMEOUT_SECONDS"}': + .: {} + 'f:name': {} + 'f:value': {} + 'k:{"name":"SERVING_LOGGING_LEVEL"}': + .: {} + 'f:name': {} + 'k:{"name":"METRICS_DOMAIN"}': + .: {} + 'f:name': {} + 'f:value': {} + 'k:{"name":"SERVING_POD"}': + .: {} + 'f:name': {} + 'f:valueFrom': + .: {} + 'f:fieldRef': {} + 'k:{"name":"QUEUE_SERVING_PORT"}': + .: {} + 'f:name': {} + 'f:value': {} + 'k:{"name":"USER_PORT"}': + .: {} + 'f:name': {} + 'f:value': {} + 'k:{"name":"CONTAINER_CONCURRENCY"}': + .: {} + 'f:name': {} + 'f:value': {} + 'k:{"name":"SERVING_REQUEST_METRICS_BACKEND"}': + .: {} + 'f:name': {} + 'f:value': {} + 'k:{"name":"CONCURRENCY_STATE_TOKEN_PATH"}': + .: {} + 'f:name': {} + 'f:value': {} + 'k:{"name":"TRACING_CONFIG_ZIPKIN_ENDPOINT"}': + .: {} + 'f:name': {} + 'k:{"name":"QUEUE_SERVING_TLS_PORT"}': + .: {} + 'f:name': {} + 'f:value': {} + 'k:{"name":"REVISION_IDLE_TIMEOUT_SECONDS"}': + .: {} + 'f:name': {} + 'f:value': {} + 'k:{"name":"ROOT_CA"}': + .: {} + 'f:name': {} + .: {} + 'k:{"name":"ENABLE_PROFILING"}': + .: {} + 'f:name': {} + 'f:value': {} + 'k:{"name":"SERVING_ENABLE_REQUEST_LOG"}': + .: {} + 'f:name': {} + 'f:value': {} + 'k:{"name":"SYSTEM_NAMESPACE"}': + .: {} + 'f:name': {} + 'f:value': {} + 'k:{"name":"HOST_IP"}': + .: {} + 'f:name': {} + 'f:valueFrom': + .: {} + 'f:fieldRef': {} + 'k:{"name":"SERVING_REVISION"}': + .: {} + 'f:name': {} + 'f:value': {} + 'k:{"name":"CONCURRENCY_STATE_ENDPOINT"}': + .: {} + 'f:name': {} + 'k:{"name":"SERVING_SERVICE"}': + .: {} + 'f:name': {} + 'f:value': {} + 'k:{"name":"SERVING_CONFIGURATION"}': + .: {} + 'f:name': {} + 'f:value': {} + 'k:{"name":"TRACING_CONFIG_DEBUG"}': + .: {} + 'f:name': {} + 'f:value': {} + 'k:{"name":"ENABLE_HTTP2_AUTO_DETECTION"}': + .: {} + 'f:name': {} + 'f:value': {} + 'k:{"name":"SERVING_POD_IP"}': + .: {} + 'f:name': {} + 'f:valueFrom': + .: {} + 'f:fieldRef': {} + 'k:{"name":"TRACING_CONFIG_BACKEND"}': + .: {} + 'f:name': {} + 'f:value': {} + 'k:{"name":"METRICS_COLLECTOR_ADDRESS"}': + .: {} + 'f:name': {} + 'k:{"name":"SERVING_LOGGING_CONFIG"}': + .: {} + 'f:name': {} + 'k:{"name":"TRACING_CONFIG_SAMPLE_RATE"}': + .: {} + 'f:name': {} + 'f:value': {} + 'k:{"name":"SERVING_NAMESPACE"}': + .: {} + 'f:name': {} + 'f:value': {} + 'k:{"name":"SERVING_READINESS_PROBE"}': + .: {} + 'f:name': {} + 'f:value': {} + 'k:{"name":"SERVING_REQUEST_LOG_TEMPLATE"}': + .: {} + 'f:name': {} + 'f:value': {} + 'k:{"name":"REVISION_RESPONSE_START_TIMEOUT_SECONDS"}': + .: {} + 'f:name': {} + 'f:value': {} + 'f:readinessProbe': + .: {} + 'f:failureThreshold': {} + 'f:httpGet': + .: {} + 'f:httpHeaders': {} + 'f:path': {} + 'f:port': {} + 'f:scheme': {} + 'f:periodSeconds': {} + 'f:successThreshold': {} + 'f:timeoutSeconds': {} + 'f:securityContext': + .: {} + 'f:allowPrivilegeEscalation': {} + 'f:capabilities': + .: {} + 'f:drop': {} + 'f:readOnlyRootFilesystem': {} + 'f:runAsNonRoot': {} + 'f:terminationMessagePath': {} + 'f:imagePullPolicy': {} + 'f:ports': + .: {} + 'k:{"containerPort":8012,"protocol":"TCP"}': + .: {} + 'f:containerPort': {} + 'f:name': {} + 'f:protocol': {} + 'k:{"containerPort":8022,"protocol":"TCP"}': + .: {} + 'f:containerPort': {} + 'f:name': {} + 'f:protocol': {} + 'k:{"containerPort":8112,"protocol":"TCP"}': + .: {} + 'f:containerPort': {} + 'f:name': {} + 'f:protocol': {} + 'k:{"containerPort":9090,"protocol":"TCP"}': + .: {} + 'f:containerPort': {} + 'f:name': {} + 'f:protocol': {} + 'k:{"containerPort":9091,"protocol":"TCP"}': + .: {} + 'f:containerPort': {} + 'f:name': {} + 'f:protocol': {} + 'f:name': {} + 'k:{"name":"transformer-container"}': + 'f:image': {} + 'f:terminationMessagePolicy': {} + .: {} + 'f:resources': {} + 'f:lifecycle': + .: {} + 'f:preStop': + .: {} + 'f:httpGet': + .: {} + 'f:path': {} + 'f:port': {} + 'f:scheme': {} + 'f:env': + .: {} + 'k:{"name":"K_CONFIGURATION"}': + .: {} + 'f:name': {} + 'f:value': {} + 'k:{"name":"K_REVISION"}': + .: {} + 'f:name': {} + 'f:value': {} + 'k:{"name":"K_SERVICE"}': + .: {} + 'f:name': {} + 'f:value': {} + 'k:{"name":"PORT"}': + .: {} + 'f:name': {} + 'f:value': {} + 'k:{"name":"RUNTIME_LOCAL_MODELS_DIR"}': + .: {} + 'f:name': {} + 'f:value': {} + 'f:terminationMessagePath': {} + 'f:imagePullPolicy': {} + 'f:ports': + .: {} + 'k:{"containerPort":8080,"protocol":"TCP"}': + .: {} + 'f:containerPort': {} + 'f:name': {} + 'f:protocol': {} + 'f:name': {} + 'f:dnsPolicy': {} + 'f:serviceAccount': {} + 'f:restartPolicy': {} + 'f:schedulerName': {} + 'f:terminationGracePeriodSeconds': {} + 'f:serviceAccountName': {} + 'f:enableServiceLinks': {} + 'f:securityContext': {} + - manager: master-01-demo + operation: Update + apiVersion: v1 + time: '2023-11-19T13:39:04Z' + fieldsType: FieldsV1 + fieldsV1: + 'f:metadata': + 'f:annotations': + 'f:k8s.ovn.org/pod-networks': {} + - manager: multus + operation: Update + apiVersion: v1 + time: '2023-11-19T13:39:04Z' + fieldsType: FieldsV1 + fieldsV1: + 'f:metadata': + 'f:annotations': + 'f:k8s.v1.cni.cncf.io/network-status': {} + subresource: status + - manager: kubelet + operation: Update + apiVersion: v1 + time: '2023-11-19T13:59:16Z' + fieldsType: FieldsV1 + fieldsV1: + 'f:status': + 'f:conditions': + 'k:{"type":"ContainersReady"}': + .: {} + 'f:lastProbeTime': {} + 'f:lastTransitionTime': {} + 'f:status': {} + 'f:type': {} + 'k:{"type":"Initialized"}': + .: {} + 'f:lastProbeTime': {} + 'f:lastTransitionTime': {} + 'f:status': {} + 'f:type': {} + 'k:{"type":"Ready"}': + .: {} + 'f:lastProbeTime': {} + 'f:lastTransitionTime': {} + 'f:status': {} + 'f:type': {} + 'f:containerStatuses': {} + 'f:hostIP': {} + 'f:initContainerStatuses': {} + 'f:phase': {} + 'f:podIP': {} + 'f:podIPs': + .: {} + 'k:{"ip":"10.133.0.59"}': + .: {} + 'f:ip': {} + 'f:startTime': {} + subresource: status + namespace: kserve-demo + ownerReferences: + - apiVersion: apps/v1 + kind: ReplicaSet + name: caikit-tgis-example-isvc-predictor-00001-deployment-dcbfddb + uid: d062c511-8daf-416f-b247-db730df3e2d1 + controller: true + blockOwnerDeletion: true + labels: + app: caikit-tgis-example-isvc-predictor-00001 + security.istio.io/tlsMode: istio + serving.knative.dev/configurationUID: f5863b71-d648-475e-8c46-42e344d92e43 + serving.knative.dev/serviceUID: fe138020-8241-471e-ab66-5221257730a2 + serving.knative.dev/revision: caikit-tgis-example-isvc-predictor-00001 + service.istio.io/canonical-revision: caikit-tgis-example-isvc-predictor-00001 + serving.knative.dev/configurationGeneration: '1' + serving.knative.dev/revisionUID: f3d2106a-278a-449c-aade-597cc75d6c70 + serving.knative.dev/service: caikit-tgis-example-isvc-predictor + serving.knative.dev/configuration: caikit-tgis-example-isvc-predictor + maistra-version: 2.4.5 + service.istio.io/canonical-name: caikit-tgis-example-isvc-predictor + pod-template-hash: dcbfddb + component: predictor + serving.kserve.io/inferenceservice: caikit-tgis-example-isvc +spec: + restartPolicy: Always + initContainers: + - resources: + limits: + cpu: '1' + memory: 1Gi + requests: + cpu: 100m + memory: 100Mi + terminationMessagePath: /dev/termination-log + name: storage-initializer + env: + - name: AWS_ACCESS_KEY_ID + valueFrom: + secretKeyRef: + name: storage-config + key: AWS_ACCESS_KEY_ID + - name: AWS_SECRET_ACCESS_KEY + valueFrom: + secretKeyRef: + name: storage-config + key: AWS_SECRET_ACCESS_KEY + - name: S3_USE_HTTPS + value: '0' + - name: S3_ENDPOINT + value: 'minio.minio.svc:9000' + - name: AWS_ENDPOINT_URL + value: 'http://minio.minio.svc:9000' + - name: awsAnonymousCredential + value: 'false' + - name: AWS_DEFAULT_REGION + value: us-east-2 + securityContext: + capabilities: + drop: + - ALL + runAsUser: 1000760001 + runAsNonRoot: true + allowPrivilegeEscalation: false + imagePullPolicy: IfNotPresent + volumeMounts: + - name: kserve-provision-location + mountPath: /mnt/models + - name: kube-api-access-gqvp7 + readOnly: true + mountPath: /var/run/secrets/kubernetes.io/serviceaccount + terminationMessagePolicy: FallbackToLogsOnError + image: >- + quay.io/modh/kserve-storage-initializer@sha256:d5ded56dfe3fb1c927e18dfff9a8507c97fbb34572a57e4db14a7a94a417082c + args: + - 's3://modelmesh-example-models/llm/models/flan-t5-small-caikit' + - /mnt/models + serviceAccountName: sa + imagePullSecrets: + - name: sa-dockercfg-7lc8r + priority: 0 + schedulerName: default-scheduler + enableServiceLinks: false + terminationGracePeriodSeconds: 300 + preemptionPolicy: PreemptLowerPriority + nodeName: worker-01-demo + securityContext: + seLinuxOptions: + level: 's0:c28,c2' + fsGroup: 1000760000 + seccompProfile: + type: RuntimeDefault + containers: + - resources: {} + terminationMessagePath: /dev/termination-log + lifecycle: + preStop: + httpGet: + path: /wait-for-drain + port: 8022 + scheme: HTTP + name: kserve-container + command: + - text-generation-launcher + env: + - name: TRANSFORMERS_CACHE + value: /tmp/transformers_cache + - name: K_REVISION + value: caikit-tgis-example-isvc-predictor-00001 + - name: K_CONFIGURATION + value: caikit-tgis-example-isvc-predictor + - name: K_SERVICE + value: caikit-tgis-example-isvc-predictor + securityContext: + capabilities: + drop: + - ALL + runAsUser: 1000760000 + runAsNonRoot: true + allowPrivilegeEscalation: false + imagePullPolicy: IfNotPresent + volumeMounts: + - name: kube-api-access-gqvp7 + readOnly: true + mountPath: /var/run/secrets/kubernetes.io/serviceaccount + - name: kserve-provision-location + readOnly: true + mountPath: /mnt/models + terminationMessagePolicy: FallbackToLogsOnError + image: >- + quay.io/opendatahub/text-generation-inference@sha256:efcc60c671a5da62c0251bc543f6de73f6578aac40a9a745eae3f60b0208f9e9 + args: + - '--model-name=/mnt/models/artifacts/' + - resources: {} + terminationMessagePath: /dev/termination-log + lifecycle: + preStop: + httpGet: + path: /wait-for-drain + port: 8022 + scheme: HTTP + name: transformer-container + env: + - name: RUNTIME_LOCAL_MODELS_DIR + value: /mnt/models + - name: PORT + value: '8080' + - name: K_REVISION + value: caikit-tgis-example-isvc-predictor-00001 + - name: K_CONFIGURATION + value: caikit-tgis-example-isvc-predictor + - name: K_SERVICE + value: caikit-tgis-example-isvc-predictor + securityContext: + capabilities: + drop: + - ALL + runAsUser: 1000760000 + runAsNonRoot: true + allowPrivilegeEscalation: false + ports: + - name: user-port + containerPort: 8080 + protocol: TCP + imagePullPolicy: IfNotPresent + volumeMounts: + - name: kube-api-access-gqvp7 + readOnly: true + mountPath: /var/run/secrets/kubernetes.io/serviceaccount + - name: kserve-provision-location + readOnly: true + mountPath: /mnt/models + terminationMessagePolicy: FallbackToLogsOnError + image: >- + quay.io/opendatahub/caikit-tgis-serving@sha256:9d6e8bc10f3b61774f9d321790f8593f6ded5674b64a366248fcde6a07d66833 + - resources: + requests: + cpu: 25m + readinessProbe: + httpGet: + path: /app-health/queue-proxy/readyz + port: 15020 + scheme: HTTP + httpHeaders: + - name: K-Network-Probe + value: queue + timeoutSeconds: 1 + periodSeconds: 10 + successThreshold: 1 + failureThreshold: 3 + terminationMessagePath: /dev/termination-log + name: queue-proxy + env: + - name: SERVING_NAMESPACE + value: kserve-demo + - name: SERVING_SERVICE + value: caikit-tgis-example-isvc-predictor + - name: SERVING_CONFIGURATION + value: caikit-tgis-example-isvc-predictor + - name: SERVING_REVISION + value: caikit-tgis-example-isvc-predictor-00001 + - name: QUEUE_SERVING_PORT + value: '8012' + - name: QUEUE_SERVING_TLS_PORT + value: '8112' + - name: CONTAINER_CONCURRENCY + value: '0' + - name: REVISION_TIMEOUT_SECONDS + value: '300' + - name: REVISION_RESPONSE_START_TIMEOUT_SECONDS + value: '0' + - name: REVISION_IDLE_TIMEOUT_SECONDS + value: '0' + - name: SERVING_POD + valueFrom: + fieldRef: + apiVersion: v1 + fieldPath: metadata.name + - name: SERVING_POD_IP + valueFrom: + fieldRef: + apiVersion: v1 + fieldPath: status.podIP + - name: SERVING_LOGGING_CONFIG + - name: SERVING_LOGGING_LEVEL + - name: SERVING_REQUEST_LOG_TEMPLATE + value: >- + {"httpRequest": {"requestMethod": "{{.Request.Method}}", + "requestUrl": "{{js .Request.RequestURI}}", "requestSize": + "{{.Request.ContentLength}}", "status": {{.Response.Code}}, + "responseSize": "{{.Response.Size}}", "userAgent": "{{js + .Request.UserAgent}}", "remoteIp": "{{js .Request.RemoteAddr}}", + "serverIp": "{{.Revision.PodIP}}", "referer": "{{js + .Request.Referer}}", "latency": "{{.Response.Latency}}s", + "protocol": "{{.Request.Proto}}"}, "traceId": "{{index + .Request.Header "X-B3-Traceid"}}"} + - name: SERVING_ENABLE_REQUEST_LOG + value: 'false' + - name: SERVING_REQUEST_METRICS_BACKEND + value: prometheus + - name: TRACING_CONFIG_BACKEND + value: none + - name: TRACING_CONFIG_ZIPKIN_ENDPOINT + - name: TRACING_CONFIG_DEBUG + value: 'false' + - name: TRACING_CONFIG_SAMPLE_RATE + value: '0.1' + - name: USER_PORT + value: '8080' + - name: SYSTEM_NAMESPACE + value: knative-serving + - name: METRICS_DOMAIN + value: knative.dev/internal/serving + - name: SERVING_READINESS_PROBE + value: '{"tcpSocket":{"port":8080,"host":"127.0.0.1"},"successThreshold":1}' + - name: ENABLE_PROFILING + value: 'false' + - name: SERVING_ENABLE_PROBE_REQUEST_LOG + value: 'false' + - name: METRICS_COLLECTOR_ADDRESS + - name: CONCURRENCY_STATE_ENDPOINT + - name: CONCURRENCY_STATE_TOKEN_PATH + value: /var/run/secrets/tokens/state-token + - name: HOST_IP + valueFrom: + fieldRef: + apiVersion: v1 + fieldPath: status.hostIP + - name: ENABLE_HTTP2_AUTO_DETECTION + value: 'false' + - name: ROOT_CA + securityContext: + capabilities: + drop: + - ALL + runAsUser: 1000760000 + runAsNonRoot: true + readOnlyRootFilesystem: true + allowPrivilegeEscalation: false + ports: + - name: http-queueadm + containerPort: 8022 + protocol: TCP + - name: http-autometric + containerPort: 9090 + protocol: TCP + - name: http-usermetric + containerPort: 9091 + protocol: TCP + - name: queue-port + containerPort: 8012 + protocol: TCP + - name: https-port + containerPort: 8112 + protocol: TCP + imagePullPolicy: IfNotPresent + volumeMounts: + - name: kube-api-access-gqvp7 + readOnly: true + mountPath: /var/run/secrets/kubernetes.io/serviceaccount + terminationMessagePolicy: File + image: >- + registry.redhat.io/openshift-serverless-1/serving-queue-rhel8@sha256:ce4b36c90955a356a5774410356aafdac2b60795526a3488b0b76ac2ec1a018c + - resources: + limits: + cpu: '2' + memory: 1Gi + requests: + cpu: 10m + memory: 128Mi + readinessProbe: + httpGet: + path: /healthz/ready + port: 15021 + scheme: HTTP + initialDelaySeconds: 1 + timeoutSeconds: 3 + periodSeconds: 2 + successThreshold: 1 + failureThreshold: 30 + terminationMessagePath: /dev/termination-log + name: istio-proxy + env: + - name: JWT_POLICY + value: third-party-jwt + - name: PILOT_CERT_PROVIDER + value: istiod + - name: CA_ADDR + value: 'istiod-minimal.istio-system.svc:15012' + - name: POD_NAME + valueFrom: + fieldRef: + apiVersion: v1 + fieldPath: metadata.name + - name: POD_NAMESPACE + valueFrom: + fieldRef: + apiVersion: v1 + fieldPath: metadata.namespace + - name: INSTANCE_IP + valueFrom: + fieldRef: + apiVersion: v1 + fieldPath: status.podIP + - name: SERVICE_ACCOUNT + valueFrom: + fieldRef: + apiVersion: v1 + fieldPath: spec.serviceAccountName + - name: HOST_IP + valueFrom: + fieldRef: + apiVersion: v1 + fieldPath: status.hostIP + - name: PROXY_CONFIG + value: > + {"discoveryAddress":"istiod-minimal.istio-system.svc:15012","proxyMetadata":{"ISTIO_META_DNS_AUTO_ALLOCATE":"true","ISTIO_META_DNS_CAPTURE":"true","PROXY_XDS_VIA_AGENT":"true"},"terminationDrainDuration":"35s"} + - name: ISTIO_META_POD_PORTS + value: |- + [ + {"name":"user-port","containerPort":8080,"protocol":"TCP"} + ,{"name":"http-queueadm","containerPort":8022,"protocol":"TCP"} + ,{"name":"http-autometric","containerPort":9090,"protocol":"TCP"} + ,{"name":"http-usermetric","containerPort":9091,"protocol":"TCP"} + ,{"name":"queue-port","containerPort":8012,"protocol":"TCP"} + ,{"name":"https-port","containerPort":8112,"protocol":"TCP"} + ] + - name: ISTIO_META_APP_CONTAINERS + value: 'kserve-container,transformer-container,queue-proxy' + - name: ISTIO_META_CLUSTER_ID + value: Kubernetes + - name: ISTIO_META_INTERCEPTION_MODE + value: REDIRECT + - name: ISTIO_META_WORKLOAD_NAME + value: caikit-tgis-example-isvc-predictor-00001-deployment + - name: ISTIO_META_OWNER + value: >- + kubernetes://apis/apps/v1/namespaces/kserve-demo/deployments/caikit-tgis-example-isvc-predictor-00001-deployment + - name: ISTIO_META_MESH_ID + value: cluster.local + - name: TRUST_DOMAIN + value: cluster.local + - name: ISTIO_META_DNS_AUTO_ALLOCATE + value: 'true' + - name: ISTIO_META_DNS_CAPTURE + value: 'true' + - name: PROXY_XDS_VIA_AGENT + value: 'true' + - name: ISTIO_KUBE_APP_PROBERS + value: >- + {"/app-health/queue-proxy/readyz":{"httpGet":{"path":"/","port":8012,"scheme":"HTTP","httpHeaders":[{"name":"K-Network-Probe","value":"queue"}]},"timeoutSeconds":1}} + securityContext: + capabilities: + drop: + - ALL + - KILL + - MKNOD + - SETGID + - SETUID + privileged: false + runAsUser: 1000760001 + runAsGroup: 1000760001 + runAsNonRoot: true + readOnlyRootFilesystem: true + allowPrivilegeEscalation: false + ports: + - name: http-envoy-prom + containerPort: 15090 + protocol: TCP + imagePullPolicy: IfNotPresent + volumeMounts: + - name: workload-socket + mountPath: /var/run/secrets/workload-spiffe-uds + - name: credential-socket + mountPath: /var/run/secrets/credential-uds + - name: workload-certs + mountPath: /var/run/secrets/workload-spiffe-credentials + - name: istiod-ca-cert + mountPath: /var/run/secrets/istio + - name: istio-data + mountPath: /var/lib/istio/data + - name: istio-envoy + mountPath: /etc/istio/proxy + - name: istio-token + mountPath: /var/run/secrets/tokens + - name: istio-podinfo + mountPath: /etc/istio/pod + - name: kube-api-access-gqvp7 + readOnly: true + mountPath: /var/run/secrets/kubernetes.io/serviceaccount + terminationMessagePolicy: File + image: >- + registry.redhat.io/openshift-service-mesh/proxyv2-rhel8@sha256:bf693eaa845373e1a06041626a9314ec00519a6fd2437b12f7f4f3f104864039 + args: + - proxy + - sidecar + - '--domain' + - $(POD_NAMESPACE).svc.cluster.local + - '--proxyLogLevel=warning' + - '--proxyComponentLogLevel=misc:error' + - '--log_output_level=default:warn' + - '--concurrency' + - '2' + serviceAccount: sa + volumes: + - name: workload-socket + emptyDir: {} + - name: credential-socket + emptyDir: {} + - name: workload-certs + emptyDir: {} + - name: istio-envoy + emptyDir: + medium: Memory + - name: istio-data + emptyDir: {} + - name: istio-podinfo + downwardAPI: + items: + - path: labels + fieldRef: + apiVersion: v1 + fieldPath: metadata.labels + - path: annotations + fieldRef: + apiVersion: v1 + fieldPath: metadata.annotations + defaultMode: 420 + - name: istio-token + projected: + sources: + - serviceAccountToken: + audience: istio-ca + expirationSeconds: 43200 + path: istio-token + defaultMode: 420 + - name: istiod-ca-cert + configMap: + name: istio-ca-root-cert + defaultMode: 420 + - name: kube-api-access-gqvp7 + projected: + sources: + - serviceAccountToken: + expirationSeconds: 3607 + path: token + - configMap: + name: kube-root-ca.crt + items: + - key: ca.crt + path: ca.crt + - downwardAPI: + items: + - path: namespace + fieldRef: + apiVersion: v1 + fieldPath: metadata.namespace + - configMap: + name: openshift-service-ca.crt + items: + - key: service-ca.crt + path: service-ca.crt + defaultMode: 420 + - name: kserve-provision-location + emptyDir: {} + dnsPolicy: ClusterFirst + tolerations: + - key: node.kubernetes.io/not-ready + operator: Exists + effect: NoExecute + tolerationSeconds: 300 + - key: node.kubernetes.io/unreachable + operator: Exists + effect: NoExecute + tolerationSeconds: 300 + - key: node.kubernetes.io/memory-pressure + operator: Exists + effect: NoSchedule +status: + containerStatuses: + - restartCount: 0 + started: true + ready: true + name: istio-proxy + state: + running: + startedAt: '2023-11-19T13:59:14Z' + imageID: >- + registry.redhat.io/openshift-service-mesh/proxyv2-rhel8@sha256:380547be6794ee05f864be58e713507aae44dea8b1e3bf6ca06f5119d301449e + image: >- + registry.redhat.io/openshift-service-mesh/proxyv2-rhel8@sha256:bf693eaa845373e1a06041626a9314ec00519a6fd2437b12f7f4f3f104864039 + lastState: {} + containerID: 'cri-o://179ebd55c03844d313cc2953e297e4f8d6c54e229a9d6282f89895896ccaefe1' + - restartCount: 0 + started: true + ready: true + name: kserve-container + state: + running: + startedAt: '2023-11-19T13:51:45Z' + imageID: >- + quay.io/opendatahub/text-generation-inference@sha256:efcc60c671a5da62c0251bc543f6de73f6578aac40a9a745eae3f60b0208f9e9 + image: >- + quay.io/opendatahub/text-generation-inference@sha256:efcc60c671a5da62c0251bc543f6de73f6578aac40a9a745eae3f60b0208f9e9 + lastState: {} + containerID: 'cri-o://20d9065b6bf962f9acb884379aca11b6be4312a31a931da3e4880dda4c21cb48' + - restartCount: 0 + started: true + ready: true + name: queue-proxy + state: + running: + startedAt: '2023-11-19T13:59:14Z' + imageID: >- + registry.redhat.io/openshift-serverless-1/serving-queue-rhel8@sha256:b209988ea641879624e144d51fba2a8de8592a72603d626952419c6ab5f7377e + image: >- + registry.redhat.io/openshift-serverless-1/serving-queue-rhel8@sha256:ce4b36c90955a356a5774410356aafdac2b60795526a3488b0b76ac2ec1a018c + lastState: {} + containerID: 'cri-o://673da36f3f265adb2a5bf0d5cf686070ab3274e2aaa0cd77dbd9d4efe50ba682' + - restartCount: 0 + started: true + ready: true + name: transformer-container + state: + running: + startedAt: '2023-11-19T13:58:57Z' + imageID: >- + quay.io/opendatahub/caikit-tgis-serving@sha256:9d6e8bc10f3b61774f9d321790f8593f6ded5674b64a366248fcde6a07d66833 + image: >- + quay.io/opendatahub/caikit-tgis-serving@sha256:9d6e8bc10f3b61774f9d321790f8593f6ded5674b64a366248fcde6a07d66833 + lastState: {} + containerID: 'cri-o://0baa2d8403b20b0546158a09fd7aa514607dfe7e930990d52b29e5c40db3db59' + qosClass: Burstable + podIPs: + - ip: 10.133.0.59 + podIP: 10.133.0.59 + hostIP: 172.21.6.26 + startTime: '2023-11-19T13:39:04Z' + initContainerStatuses: + - name: storage-initializer + state: + terminated: + exitCode: 0 + reason: Completed + startedAt: '2023-11-19T13:39:05Z' + finishedAt: '2023-11-19T13:39:10Z' + containerID: >- + cri-o://8d7854bb400c213936376c35f4c941c3fdd9f847cce28ff48908920c7ec2a450 + lastState: {} + ready: true + restartCount: 0 + image: >- + quay.io/modh/kserve-storage-initializer@sha256:d5ded56dfe3fb1c927e18dfff9a8507c97fbb34572a57e4db14a7a94a417082c + imageID: >- + quay.io/modh/kserve-storage-initializer@sha256:d5ded56dfe3fb1c927e18dfff9a8507c97fbb34572a57e4db14a7a94a417082c + containerID: 'cri-o://8d7854bb400c213936376c35f4c941c3fdd9f847cce28ff48908920c7ec2a450' + conditions: + - type: Initialized + status: 'True' + lastProbeTime: null + lastTransitionTime: '2023-11-19T13:39:11Z' + - type: Ready + status: 'True' + lastProbeTime: null + lastTransitionTime: '2023-11-19T13:59:16Z' + - type: ContainersReady + status: 'True' + lastProbeTime: null + lastTransitionTime: '2023-11-19T13:59:16Z' + - type: PodScheduled + status: 'True' + lastProbeTime: null + lastTransitionTime: '2023-11-19T13:39:03Z' + phase: Running