Fix #11 - Revert model comparison to Flan-T5 (#26)

* Revert to Flan-T5 Remove CUDA from container update packages and switch to CPU torch update workbenches IS update deployment switch back to 1.3 update wb image * update conclusion
rh-aiservices-bu · Apr 15, 2024 · 52ca619 · 52ca619
1 parent 67fa1df
commit 52ca619
Show file tree

Hide file tree

Showing 16 changed files with 1,311 additions and 1,354 deletions.
diff --git a/bootstrap/ic-rhoai-configuration/images-puller.yaml b/bootstrap/ic-rhoai-configuration/images-puller.yaml
@@ -19,7 +19,7 @@ spec:
     spec:
       containers:
       - name: ic-workbench
-        image: image-registry.openshift-image-registry.svc:5000/redhat-ods-applications/ic-workbench:2.0.1
+        image: image-registry.openshift-image-registry.svc:5000/redhat-ods-applications/ic-workbench:2.1.0
         command: ["tail"]
         args: ["-f", "/dev/null"]
         resources:

diff --git a/bootstrap/ic-rhoai-configuration/workbench-imagestream.yaml b/bootstrap/ic-rhoai-configuration/workbench-imagestream.yaml
@@ -9,7 +9,7 @@ metadata:
     internal.config.kubernetes.io/previousKinds: ImageStream
     opendatahub.io/notebook-image-name: CUSTOM - Insurance Claim Processing Lab Workbench
     internal.config.kubernetes.io/previousNames: CUSTOM - Insurance Claim Processing Lab Workbench
-    opendatahub.io/recommended-accelerators: '["nvidia.com/gpu"]'
+    opendatahub.io/recommended-accelerators: ''
     opendatahub.io/notebook-image-desc: >-
       Jupyter notebook image with all the libraries needed for the OpenShift AI Insurance Claim Lab.
     argocd.argoproj.io/sync-wave: "1"
@@ -25,17 +25,17 @@ spec:
   lookupPolicy:
     local: true
   tags:
-    - name: '2.0.1'
+    - name: '2.1.0'
       annotations:
         opendatahub.io/notebook-python-dependencies: >-
-          [{"name":"PyTorch","version":"2.1.2"},{"name":"Langchain","version":"0.1.12"},{"name":"Ultralytics","version":"8.1.29"},]
+          [{"name":"PyTorch","version":"2.2.2"},{"name":"Langchain","version":"0.1.12"},{"name":"Ultralytics","version":"8.1.47"},]
         opendatahub.io/notebook-software: >-
-          [{"name":"CUDA","version":"12.1"},{"name":"Python","version":"v3.11"}]
+          [{"name":"Python","version":"v3.11"}]
         openshift.io/imported-from: quay.io/rh-aiservices-bu/rhoai-lab-insurance-claim-workbench
       from:
         kind: DockerImage
         name: >-
-          quay.io/rh-aiservices-bu/rhoai-lab-insurance-claim-workbench:2.0.1
+          quay.io/rh-aiservices-bu/rhoai-lab-insurance-claim-workbench:2.1.0
       importPolicy:
         importMode: Legacy
       referencePolicy:

diff --git a/bootstrap/ic-shared-llm/deployment-hftgi.yaml b/bootstrap/ic-shared-llm/deployment-hftgi.yaml
@@ -0,0 +1,107 @@
+kind: Deployment
+apiVersion: apps/v1
+metadata:
+  name: llm-flant5
+  namespace: ic-shared-llm
+  labels:
+    app: llm-flant5
+spec:
+  replicas: 1
+  selector:
+    matchLabels:
+      app: llm-flant5
+  template:
+    metadata:
+      creationTimestamp: null
+      labels:
+        app: llm-flant5
+    spec:
+      restartPolicy: Always
+      schedulerName: default-scheduler
+      affinity: {}
+      terminationGracePeriodSeconds: 120
+      securityContext: {}
+      containers:
+        - resources:
+            limits:
+              cpu: '4'
+              memory: 8Gi
+            requests:
+              cpu: '2'
+              memory: 6Gi
+          readinessProbe:
+            httpGet:
+              path: /health
+              port: http
+              scheme: HTTP
+            timeoutSeconds: 5
+            periodSeconds: 30
+            successThreshold: 1
+            failureThreshold: 3
+          terminationMessagePath: /dev/termination-log
+          name: server
+          livenessProbe:
+            httpGet:
+              path: /health
+              port: http
+              scheme: HTTP
+            timeoutSeconds: 8
+            periodSeconds: 100
+            successThreshold: 1
+            failureThreshold: 3
+          env:
+            - name: MODEL_ID
+              value: google/flan-t5-large
+            - name: MAX_INPUT_LENGTH
+              value: '416'
+            - name: MAX_TOTAL_TOKENS
+              value: '512'
+            - name: HUGGINGFACE_HUB_CACHE
+              value: /models-cache
+            - name: PORT
+              value: '3000'
+            - name: HOSTNAME
+              value: '0.0.0.0'
+          securityContext:
+            capabilities:
+              drop:
+                - ALL
+            runAsNonRoot: true
+            allowPrivilegeEscalation: false
+            seccompProfile:
+              type: RuntimeDefault
+          ports:
+            - name: http
+              containerPort: 3000
+              protocol: TCP
+          imagePullPolicy: IfNotPresent
+          startupProbe:
+            httpGet:
+              path: /health
+              port: http
+              scheme: HTTP
+            timeoutSeconds: 1
+            periodSeconds: 30
+            successThreshold: 1
+            failureThreshold: 24
+            initialDelaySeconds: 60
+          volumeMounts:
+            - name: models-cache
+              mountPath: /models-cache
+            - name: shm
+              mountPath: /dev/shm
+          terminationMessagePolicy: File
+          image: 'ghcr.io/huggingface/text-generation-inference:1.3'
+      volumes:
+        - name: models-cache
+          persistentVolumeClaim:
+            claimName: models-cache-hftgi
+        - name: shm
+          emptyDir:
+            medium: Memory
+            sizeLimit: 1Gi
+      dnsPolicy: ClusterFirst
+  strategy:
+    type: Recreate
+  revisionHistoryLimit: 10
+  progressDeadlineSeconds: 600
diff --git a/bootstrap/ic-shared-llm/deployment-ollama.yaml b/bootstrap/ic-shared-llm/deployment-ollama.yaml
diff --git a/bootstrap/ic-shared-llm/kustomization.yaml b/bootstrap/ic-shared-llm/kustomization.yaml
@@ -11,9 +11,9 @@ resources:
 - fix-odf-config.yaml
 # wave 1
 - pvc.yaml
-- pvc-ollama.yaml
+- pvc-hftgi.yaml
 - deployment.yaml
 - service.yaml
-- deployment-ollama.yaml
-- service-ollama.yaml
+- deployment-hftgi.yaml
+- service-hftgi.yaml
 # wave 2
diff --git a/bootstrap/ic-shared-llm/pvc-ollama.yaml → bootstrap/ic-shared-llm/pvc-hftgi.yaml b/bootstrap/ic-shared-llm/pvc-ollama.yaml → bootstrap/ic-shared-llm/pvc-hftgi.yaml
@@ -2,10 +2,10 @@
 apiVersion: v1
 kind: PersistentVolumeClaim
 metadata:
-  name: ollama-cache
+  name: models-cache-hftgi
   namespace: ic-shared-llm
   labels:
-    app: ollama
+    app: ic-shared-llm
   annotations:
     argocd.argoproj.io/sync-wave: "0"
 spec:

diff --git a/bootstrap/ic-shared-llm/service-ollama.yaml → bootstrap/ic-shared-llm/service-hftgi.yaml b/bootstrap/ic-shared-llm/service-ollama.yaml → bootstrap/ic-shared-llm/service-hftgi.yaml
@@ -1,21 +1,21 @@
 kind: Service
 apiVersion: v1
 metadata:
-  name: ollama
+  name: llm-flant5
   namespace: ic-shared-llm
   labels:
-    app: ollama
+    app: llm-flant5
 spec:
   clusterIP: None
   ipFamilies:
     - IPv4
   ports:
     - name: http
       protocol: TCP
-      port: 11434
+      port: 3000
       targetPort: http
   type: ClusterIP
   ipFamilyPolicy: SingleStack
   sessionAffinity: None
   selector:
-    app: ollama
+    app: llm-flant5
diff --git a/bootstrap/ic-user-projects/create-projects-and-resources-job.yaml b/bootstrap/ic-user-projects/create-projects-and-resources-job.yaml
@@ -33,7 +33,7 @@ spec:
 
           # Define some variables
           WORKBENCH_NAME="my-workbench"
-          WORKBENCH_IMAGE="ic-workbench:2.0.1"
+          WORKBENCH_IMAGE="ic-workbench:2.1.0"
           PIPELINE_ENGINE="Tekton"
 
           for i in $(seq 1 $user_count);

diff --git a/bootstrap/ic-user-projects/create-projects-and-resources.bash b/bootstrap/ic-user-projects/create-projects-and-resources.bash
@@ -10,7 +10,7 @@ DASHBOARD_ROUTE=https://$(oc get route rhods-dashboard -n redhat-ods-application
 
 # Define some variables
 WORKBENCH_NAME="my-workbench"
-WORKBENCH_IMAGE="ic-workbench:2.0.1"
+WORKBENCH_IMAGE="ic-workbench:2.1.0"
 PIPELINE_ENGINE="Tekton"
 
 for i in $(seq 1 $user_count);

diff --git a/bootstrap/workbench-image/Containerfile b/bootstrap/workbench-image/Containerfile
@@ -25,85 +25,6 @@ RUN echo "tsflags=nodocs" | tee -a /etc/yum.conf && \
 # End of OS Packages                        #
 #############################################
 
-###################################################################################################
-# CUDA 12.1 Layer, from https://gitlab.com/nvidia/container-images/cuda/-/blob/master/dist/12.1.1 #
-###################################################################################################
-
-USER 0
-
-ENV NVARCH x86_64
-ENV NVIDIA_REQUIRE_CUDA "cuda>=12.1 brand=tesla,driver>=470,driver<471 brand=unknown,driver>=470,driver<471 brand=nvidia,driver>=470,driver<471 brand=nvidiartx,driver>=470,driver<471 brand=geforce,driver>=470,driver<471 brand=geforcertx,driver>=470,driver<471 brand=quadro,driver>=470,driver<471 brand=quadrortx,driver>=470,driver<471 brand=titan,driver>=470,driver<471 brand=titanrtx,driver>=470,driver<471 brand=tesla,driver>=525,driver<526 brand=unknown,driver>=525,driver<526 brand=nvidia,driver>=525,driver<526 brand=nvidiartx,driver>=525,driver<526 brand=geforce,driver>=525,driver<526 brand=geforcertx,driver>=525,driver<526 brand=quadro,driver>=525,driver<526 brand=quadrortx,driver>=525,driver<526 brand=titan,driver>=525,driver<526 brand=titanrtx,driver>=525,driver<526"
-ENV NV_CUDA_CUDART_VERSION 12.1.105-1
-
-COPY cuda.repo-x86_64 /etc/yum.repos.d/cuda.repo
-
-RUN NVIDIA_GPGKEY_SUM=d0664fbbdb8c32356d45de36c5984617217b2d0bef41b93ccecd326ba3b80c87 && \
-    curl -fsSL https://developer.download.nvidia.com/compute/cuda/repos/rhel9/${NVARCH}/D42D0685.pub | sed '/^Version/d' > /etc/pki/rpm-gpg/RPM-GPG-KEY-NVIDIA && \
-    echo "$NVIDIA_GPGKEY_SUM  /etc/pki/rpm-gpg/RPM-GPG-KEY-NVIDIA" | sha256sum -c --strict -
-
-ENV CUDA_VERSION 12.1.1
-
-# For libraries in the cuda-compat-* package: https://docs.nvidia.com/cuda/eula/index.html#attachment-a
-RUN yum upgrade -y && yum install -y \
-    cuda-cudart-12-1-${NV_CUDA_CUDART_VERSION} \
-    cuda-compat-12-1 \
-    && ln -s cuda-12.1 /usr/local/cuda \
-    && yum -y clean all --enablerepo='*' && \
-    rm -rf /var/cache/dnf && \
-    find /var/log -type f -name "*.log" -exec rm -f {} \;
-
-# nvidia-docker 1.0
-RUN echo "/usr/local/nvidia/lib" >> /etc/ld.so.conf.d/nvidia.conf && \
-    echo "/usr/local/nvidia/lib64" >> /etc/ld.so.conf.d/nvidia.conf
-
-ENV PATH /usr/local/nvidia/bin:/usr/local/cuda/bin:${PATH}
-ENV LD_LIBRARY_PATH /usr/local/nvidia/lib:/usr/local/nvidia/lib64
-
-COPY NGC-DL-CONTAINER-LICENSE /
-
-# nvidia-container-runtime
-ENV NVIDIA_VISIBLE_DEVICES all
-ENV NVIDIA_DRIVER_CAPABILITIES compute,utility
-
-ENV NV_CUDA_LIB_VERSION 12.1.1-1
-
-ENV NV_NVTX_VERSION 12.1.105-1
-ENV NV_LIBNPP_VERSION 12.1.0.40-1
-ENV NV_LIBNPP_PACKAGE libnpp-12-1-${NV_LIBNPP_VERSION}
-ENV NV_LIBCUBLAS_VERSION 12.1.3.1-1
-ENV NV_LIBNCCL_PACKAGE_NAME libnccl
-ENV NV_LIBNCCL_PACKAGE_VERSION 2.17.1-1
-ENV NV_LIBNCCL_VERSION 2.17.1
-ENV NCCL_VERSION 2.17.1
-ENV NV_LIBNCCL_PACKAGE ${NV_LIBNCCL_PACKAGE_NAME}-${NV_LIBNCCL_PACKAGE_VERSION}+cuda12.1
-
-RUN yum install -y \
-    cuda-libraries-12-1-${NV_CUDA_LIB_VERSION} \
-    cuda-nvtx-12-1-${NV_NVTX_VERSION} \
-    ${NV_LIBNPP_PACKAGE} \
-    libcublas-12-1-${NV_LIBCUBLAS_VERSION} \
-    ${NV_LIBNCCL_PACKAGE} \
-    && yum -y clean all --enablerepo='*' && \
-    rm -rf /var/cache/dnf && \
-    find /var/log -type f -name "*.log" -exec rm -f {} \;
-
-# Set this flag so that libraries can find the location of CUDA
-ENV XLA_FLAGS=--xla_gpu_cuda_data_dir=/usr/local/cuda
-
-# CuDNN
-ENV NV_CUDNN_VERSION 8.9.0.131-1
-ENV NV_CUDNN_PACKAGE libcudnn8-${NV_CUDNN_VERSION}.cuda12.1
-
-RUN yum install -y \
-    ${NV_CUDNN_PACKAGE} \
-    && yum -y clean all --enablerepo='*' && \
-    rm -rf /var/cache/dnf && \
-    find /var/log -type f -name "*.log" -exec rm -f {} \;
-
-#############################################
-# End of CUDA 12.1 Layer                    #
-#############################################
-
 ######################################
 # Deploy Python packages and Jupyter #
 ######################################