Skip to content

Commit

Permalink
llm
Browse files Browse the repository at this point in the history
  • Loading branch information
wangzheng422 committed Nov 19, 2023
1 parent 24bb944 commit 2087656
Show file tree
Hide file tree
Showing 2 changed files with 1,230 additions and 0 deletions.
191 changes: 191 additions & 0 deletions redhat/ocp4/4.13/4.13.gpu.llm.md
Original file line number Diff line number Diff line change
Expand Up @@ -1217,6 +1217,197 @@ oc create -f custom-manifests/opendatahub/kserve-dsc.yaml

# modelmeshserving to Removed, can not active, do not know why.

```

### deploy model

- https://github.com/wangzheng422/caikit-tgis-serving/blob/main/demo/kserve/deploy-remove.md

```bash

# on helper
cd ~/tmp

cd caikit-tgis-serving/demo/kserve

export TARGET_OPERATOR=rhods

source ./scripts/env.sh
source ./scripts/utils.sh
export TARGET_OPERATOR_TYPE=$(getOpType $TARGET_OPERATOR)
export TARGET_OPERATOR_NS=$(getOpNS)
export KSERVE_OPERATOR_NS=$(getKserveNS)

# Deploy the MinIO image that contains the LLM model.

ACCESS_KEY_ID=admin
SECRET_ACCESS_KEY=password
MINIO_NS=minio

oc new-project ${MINIO_NS}
oc apply -f ./custom-manifests/minio/minio.yaml -n ${MINIO_NS}
sed "s/<minio_ns>/$MINIO_NS/g" ./custom-manifests/minio/minio-secret.yaml | tee ./minio-secret-current.yaml | oc -n ${MINIO_NS} apply -f -
sed "s/<minio_ns>/$MINIO_NS/g" ./custom-manifests/minio/serviceaccount-minio.yaml | tee ./serviceaccount-minio-current.yaml | oc -n ${MINIO_NS} apply -f -

# Endpoint: http://10.133.0.36:9000 http://127.0.0.1:9000
# Browser Access:
# http://10.133.0.36:9000 http://127.0.0.1:9000
# Object API (Amazon S3 compatible):
# Go: https://docs.min.io/docs/golang-client-quickstart-guide
# Java: https://docs.min.io/docs/java-client-quickstart-guide
# Python: https://docs.min.io/docs/python-client-quickstart-guide
# JavaScript: https://docs.min.io/docs/javascript-client-quickstart-guide
# .NET: https://docs.min.io/docs/dotnet-client-quickstart-guide
# IAM initialization complete


# Deploy the LLM model with Caikit+TGIS Serving runtime
export TEST_NS=kserve-demo
oc new-project ${TEST_NS}

oc apply -f ./custom-manifests/caikit/caikit-tgis-servingruntime.yaml -n ${TEST_NS}

# to this step, pod is not created

oc apply -f ./minio-secret-current.yaml -n ${TEST_NS}
oc create -f ./serviceaccount-minio-current.yaml -n ${TEST_NS}

# change ths url to s3://minio.minio.svc.cluster.local:9000/modelmesh-example-models/llm/models/flan-t5-small-caikit
oc apply -f ./custom-manifests/caikit/caikit-tgis-isvc.yaml -n ${TEST_NS}


# Error from server (storageUri, must be one of: [gs://, s3://, pvc://, file://, https://, http://, hdfs://, webhdfs://] or match https://{}.blob.core.windows.net/{}/{} or be an absolute or relative local path. StorageUri [proto://path/to/model] is not supported.): error when creating "./custom-manifests/caikit/caikit-tgis-isvc.yaml": admission webhook "inferenceservice.kserve-webhook-server.validator" denied the request: storageUri, must be one of: [gs://, s3://, pvc://, file://, https://, http://, hdfs://, webhdfs://] or match https://{}.blob.core.windows.net/{}/{} or be an absolute or relative local path. StorageUri [proto://path/to/model] is not supported.

# oc delete -f ./custom-manifests/caikit/caikit-tgis-isvc.yaml -n ${TEST_NS}


# to this step, the pod is created.

oc get pod -n kserve-demo
# NAME READY STATUS RESTARTS AGE
# caikit-tgis-example-isvc-predictor-00001-deployment-dcbfddrk2hv 4/4 Running 0 30m

# 这个pod里面有4个 container
# kserve-container 是真正运行llm的容器,镜像是text-generation-inference,他能看到宿主机上所有的gpu,但是奇怪的是,yaml里面,并没有给他授权,后面再慢慢研究。这个容器,向外暴露了grpc, http服务,但是没有对外声明服务。
# log end with
# Shard 0: Server started at unix:///tmp/text-generation-server-0
# 2023-11-19T13:51:48.465888Z INFO text_generation_launcher: Shard 0 ready in 3.107531075s
# 2023-11-19T13:51:48.565905Z INFO text_generation_launcher: Starting Router
# 2023-11-19T13:51:48.606326Z INFO text_generation_router: Token decoder: Some(Metaspace(Metaspace { replacement: '▁', add_prefix_space: true, str_rep: "▁" }))
# 2023-11-19T13:51:48.615392Z INFO text_generation_router: Connected
# 2023-11-19T13:51:48.615840Z INFO text_generation_router::server: Shard model info: is_seq2seq = true, eos_token_id = 1, use_padding = true
# 2023-11-19T13:51:48.662055Z INFO text_generation_router::grpc_server: gRPC server started on port 8033
# 2023-11-19T13:51:50.663300Z INFO text_generation_router::server: HTTP server started on port 3000


# transformer-container 这个运行了一个caikit-tgis-serving镜像的容器,作用应该是提供grpc服务,然后调用kserve-container,这个容器暴露了user-http服务,并且对外声明了。 并且,这个容器应该是看不到GPU的.
# log end with : "Uvicorn running on http://0.0.0.0:8080 (Press CTRL+C to quit)"

# transformer-container的作用,还要再研究一下。

oc get isvc/caikit-tgis-example-isvc -n ${TEST_NS}
# NAME URL READY PREV LATEST PREVROLLEDOUTREVISION LATESTREADYREVISION AGE
# caikit-tgis-example-isvc https://caikit-tgis-example-isvc-predictor-kserve-demo.apps.demo-gpu.wzhlab.top True 100 caikit-tgis-example-isvc-predictor-00001 79m

oc get isvc/caikit-tgis-example-isvc -n ${TEST_NS} -o yaml
# apiVersion: serving.kserve.io/v1beta1
# kind: InferenceService
# metadata:
# annotations:
# kubectl.kubernetes.io/last-applied-configuration: |
# {"apiVersion":"serving.kserve.io/v1beta1","kind":"InferenceService","metadata":{"annotations":{"serving.knative.openshift.io/enablePassthrough":"true","sidecar.istio
# .io/inject":"true","sidecar.istio.io/rewriteAppHTTPProbers":"true"},"name":"caikit-tgis-example-isvc","namespace":"kserve-demo"},"spec":{"predictor":{"model":{"modelFormat
# ":{"name":"caikit"},"runtime":"caikit-tgis-runtime","storageUri":"s3://modelmesh-example-models/llm/models/flan-t5-small-caikit"},"serviceAccountName":"sa"}}}
# serving.knative.openshift.io/enablePassthrough: "true"
# sidecar.istio.io/inject: "true"
# sidecar.istio.io/rewriteAppHTTPProbers: "true"
# creationTimestamp: "2023-11-19T13:38:56Z"
# finalizers:
# - inferenceservice.finalizers
# generation: 1
# name: caikit-tgis-example-isvc
# namespace: kserve-demo
# resourceVersion: "5641996"
# uid: 3683461f-0976-4dcd-8ec2-d5eabec6e3af
# spec:
# predictor:
# model:
# modelFormat:
# name: caikit
# name: ""
# resources: {}
# runtime: caikit-tgis-runtime
# storageUri: s3://modelmesh-example-models/llm/models/flan-t5-small-caikit
# serviceAccountName: sa
# status:
# address:
# url: http://caikit-tgis-example-isvc-predictor.kserve-demo.svc.cluster.local
# components:
# predictor:
# address:
# url: http://caikit-tgis-example-isvc-predictor.kserve-demo.svc.cluster.local
# latestCreatedRevision: caikit-tgis-example-isvc-predictor-00001
# latestReadyRevision: caikit-tgis-example-isvc-predictor-00001
# latestRolledoutRevision: caikit-tgis-example-isvc-predictor-00001
# traffic:
# - latestRevision: true
# percent: 100
# revisionName: caikit-tgis-example-isvc-predictor-00001
# url: https://caikit-tgis-example-isvc-predictor-kserve-demo.apps.demo-gpu.wzhlab.top
# conditions:
# - lastTransitionTime: "2023-11-19T13:59:17Z"
# status: "True"
# type: IngressReady
# - lastTransitionTime: "2023-11-19T13:59:16Z"
# severity: Info
# status: "True"
# type: LatestDeploymentReady
# - lastTransitionTime: "2023-11-19T13:59:16Z"
# severity: Info
# status: "True"
# type: PredictorConfigurationReady
# - lastTransitionTime: "2023-11-19T13:59:16Z"
# status: "True"
# type: PredictorReady
# - lastTransitionTime: "2023-11-19T13:59:16Z"
# severity: Info
# status: "True"
# type: PredictorRouteReady
# - lastTransitionTime: "2023-11-19T13:59:17Z"
# status: "True"
# type: Ready
# - lastTransitionTime: "2023-11-19T13:59:16Z"
# severity: Info
# status: "True"
# type: RoutesReady
# modelStatus:
# copies:
# failedCopies: 0
# totalCopies: 1
# states:
# activeModelState: Loaded
# targetModelState: Loaded
# transitionStatus: UpToDate
# observedGeneration: 1
# url: https://caikit-tgis-example-isvc-predictor-kserve-demo.apps.demo-gpu.wzhlab.top


# Perform inference with Remote Procedure Call (gPRC) commands.
oc get ingresses.config/cluster -ojson | grep ingress.operator.openshift.io/default-enable-http2
# empty return

oc annotate ingresses.config/cluster ingress.operator.openshift.io/default-enable-http2=true

# https://github.com/fullstorydev/grpcurl/releases
curl -sSL "https://github.com/fullstorydev/grpcurl/releases/download/v1.8.7/grpcurl_1.8.7_linux_x86_64.tar.gz" | sudo tar -xz -C /usr/local/bin


export KSVC_HOSTNAME=$(oc get ksvc caikit-tgis-example-isvc-predictor -n ${TEST_NS} -o jsonpath='{.status.url}' | cut -d'/' -f3)
# export KSVC_HOSTNAME=$(oc get ksvc caikit-tgis-example-isvc-predictor -n ${TEST_NS} -o jsonpath='{.status.url}')
echo ${KSVC_HOSTNAME}
grpcurl -vv -insecure -d '{"text": "At what temperature does liquid Nitrogen boil?"}' -H "mm-model-id: flan-t5-small-caikit" ${KSVC_HOSTNAME}:443 caikit.runtime.Nlp.NlpService/TextGenerationTaskPredict




```
Expand Down
Loading

0 comments on commit 2087656

Please sign in to comment.