Skip to content

Commit

Permalink
Fix #11 - Revert model comparison to Flan-T5 (#26)
Browse files Browse the repository at this point in the history
* Revert to Flan-T5

Remove CUDA from container

update packages and switch to CPU torch

update workbenches IS

update deployment

switch back to 1.3

update wb image

* update conclusion
  • Loading branch information
guimou authored Apr 15, 2024
1 parent 67fa1df commit 52ca619
Show file tree
Hide file tree
Showing 16 changed files with 1,311 additions and 1,354 deletions.
2 changes: 1 addition & 1 deletion bootstrap/ic-rhoai-configuration/images-puller.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -19,7 +19,7 @@ spec:
spec:
containers:
- name: ic-workbench
image: image-registry.openshift-image-registry.svc:5000/redhat-ods-applications/ic-workbench:2.0.1
image: image-registry.openshift-image-registry.svc:5000/redhat-ods-applications/ic-workbench:2.1.0
command: ["tail"]
args: ["-f", "/dev/null"]
resources:
Expand Down
10 changes: 5 additions & 5 deletions bootstrap/ic-rhoai-configuration/workbench-imagestream.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -9,7 +9,7 @@ metadata:
internal.config.kubernetes.io/previousKinds: ImageStream
opendatahub.io/notebook-image-name: CUSTOM - Insurance Claim Processing Lab Workbench
internal.config.kubernetes.io/previousNames: CUSTOM - Insurance Claim Processing Lab Workbench
opendatahub.io/recommended-accelerators: '["nvidia.com/gpu"]'
opendatahub.io/recommended-accelerators: ''
opendatahub.io/notebook-image-desc: >-
Jupyter notebook image with all the libraries needed for the OpenShift AI Insurance Claim Lab.
argocd.argoproj.io/sync-wave: "1"
Expand All @@ -25,17 +25,17 @@ spec:
lookupPolicy:
local: true
tags:
- name: '2.0.1'
- name: '2.1.0'
annotations:
opendatahub.io/notebook-python-dependencies: >-
[{"name":"PyTorch","version":"2.1.2"},{"name":"Langchain","version":"0.1.12"},{"name":"Ultralytics","version":"8.1.29"},]
[{"name":"PyTorch","version":"2.2.2"},{"name":"Langchain","version":"0.1.12"},{"name":"Ultralytics","version":"8.1.47"},]
opendatahub.io/notebook-software: >-
[{"name":"CUDA","version":"12.1"},{"name":"Python","version":"v3.11"}]
[{"name":"Python","version":"v3.11"}]
openshift.io/imported-from: quay.io/rh-aiservices-bu/rhoai-lab-insurance-claim-workbench
from:
kind: DockerImage
name: >-
quay.io/rh-aiservices-bu/rhoai-lab-insurance-claim-workbench:2.0.1
quay.io/rh-aiservices-bu/rhoai-lab-insurance-claim-workbench:2.1.0
importPolicy:
importMode: Legacy
referencePolicy:
Expand Down
107 changes: 107 additions & 0 deletions bootstrap/ic-shared-llm/deployment-hftgi.yaml
Original file line number Diff line number Diff line change
@@ -0,0 +1,107 @@
kind: Deployment
apiVersion: apps/v1
metadata:
name: llm-flant5
namespace: ic-shared-llm
labels:
app: llm-flant5
spec:
replicas: 1
selector:
matchLabels:
app: llm-flant5
template:
metadata:
creationTimestamp: null
labels:
app: llm-flant5
spec:
restartPolicy: Always
schedulerName: default-scheduler
affinity: {}
terminationGracePeriodSeconds: 120
securityContext: {}
containers:
- resources:
limits:
cpu: '4'
memory: 8Gi
requests:
cpu: '2'
memory: 6Gi
readinessProbe:
httpGet:
path: /health
port: http
scheme: HTTP
timeoutSeconds: 5
periodSeconds: 30
successThreshold: 1
failureThreshold: 3
terminationMessagePath: /dev/termination-log
name: server
livenessProbe:
httpGet:
path: /health
port: http
scheme: HTTP
timeoutSeconds: 8
periodSeconds: 100
successThreshold: 1
failureThreshold: 3
env:
- name: MODEL_ID
value: google/flan-t5-large
- name: MAX_INPUT_LENGTH
value: '416'
- name: MAX_TOTAL_TOKENS
value: '512'
- name: HUGGINGFACE_HUB_CACHE
value: /models-cache
- name: PORT
value: '3000'
- name: HOSTNAME
value: '0.0.0.0'
securityContext:
capabilities:
drop:
- ALL
runAsNonRoot: true
allowPrivilegeEscalation: false
seccompProfile:
type: RuntimeDefault
ports:
- name: http
containerPort: 3000
protocol: TCP
imagePullPolicy: IfNotPresent
startupProbe:
httpGet:
path: /health
port: http
scheme: HTTP
timeoutSeconds: 1
periodSeconds: 30
successThreshold: 1
failureThreshold: 24
initialDelaySeconds: 60
volumeMounts:
- name: models-cache
mountPath: /models-cache
- name: shm
mountPath: /dev/shm
terminationMessagePolicy: File
image: 'ghcr.io/huggingface/text-generation-inference:1.3'
volumes:
- name: models-cache
persistentVolumeClaim:
claimName: models-cache-hftgi
- name: shm
emptyDir:
medium: Memory
sizeLimit: 1Gi
dnsPolicy: ClusterFirst
strategy:
type: Recreate
revisionHistoryLimit: 10
progressDeadlineSeconds: 600
81 changes: 0 additions & 81 deletions bootstrap/ic-shared-llm/deployment-ollama.yaml

This file was deleted.

6 changes: 3 additions & 3 deletions bootstrap/ic-shared-llm/kustomization.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -11,9 +11,9 @@ resources:
- fix-odf-config.yaml
# wave 1
- pvc.yaml
- pvc-ollama.yaml
- pvc-hftgi.yaml
- deployment.yaml
- service.yaml
- deployment-ollama.yaml
- service-ollama.yaml
- deployment-hftgi.yaml
- service-hftgi.yaml
# wave 2
Original file line number Diff line number Diff line change
Expand Up @@ -2,10 +2,10 @@
apiVersion: v1
kind: PersistentVolumeClaim
metadata:
name: ollama-cache
name: models-cache-hftgi
namespace: ic-shared-llm
labels:
app: ollama
app: ic-shared-llm
annotations:
argocd.argoproj.io/sync-wave: "0"
spec:
Expand Down
Original file line number Diff line number Diff line change
@@ -1,21 +1,21 @@
kind: Service
apiVersion: v1
metadata:
name: ollama
name: llm-flant5
namespace: ic-shared-llm
labels:
app: ollama
app: llm-flant5
spec:
clusterIP: None
ipFamilies:
- IPv4
ports:
- name: http
protocol: TCP
port: 11434
port: 3000
targetPort: http
type: ClusterIP
ipFamilyPolicy: SingleStack
sessionAffinity: None
selector:
app: ollama
app: llm-flant5
Original file line number Diff line number Diff line change
Expand Up @@ -33,7 +33,7 @@ spec:
# Define some variables
WORKBENCH_NAME="my-workbench"
WORKBENCH_IMAGE="ic-workbench:2.0.1"
WORKBENCH_IMAGE="ic-workbench:2.1.0"
PIPELINE_ENGINE="Tekton"
for i in $(seq 1 $user_count);
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -10,7 +10,7 @@ DASHBOARD_ROUTE=https://$(oc get route rhods-dashboard -n redhat-ods-application

# Define some variables
WORKBENCH_NAME="my-workbench"
WORKBENCH_IMAGE="ic-workbench:2.0.1"
WORKBENCH_IMAGE="ic-workbench:2.1.0"
PIPELINE_ENGINE="Tekton"

for i in $(seq 1 $user_count);
Expand Down
79 changes: 0 additions & 79 deletions bootstrap/workbench-image/Containerfile
Original file line number Diff line number Diff line change
Expand Up @@ -25,85 +25,6 @@ RUN echo "tsflags=nodocs" | tee -a /etc/yum.conf && \
# End of OS Packages #
#############################################

###################################################################################################
# CUDA 12.1 Layer, from https://gitlab.com/nvidia/container-images/cuda/-/blob/master/dist/12.1.1 #
###################################################################################################

USER 0

ENV NVARCH x86_64
ENV NVIDIA_REQUIRE_CUDA "cuda>=12.1 brand=tesla,driver>=470,driver<471 brand=unknown,driver>=470,driver<471 brand=nvidia,driver>=470,driver<471 brand=nvidiartx,driver>=470,driver<471 brand=geforce,driver>=470,driver<471 brand=geforcertx,driver>=470,driver<471 brand=quadro,driver>=470,driver<471 brand=quadrortx,driver>=470,driver<471 brand=titan,driver>=470,driver<471 brand=titanrtx,driver>=470,driver<471 brand=tesla,driver>=525,driver<526 brand=unknown,driver>=525,driver<526 brand=nvidia,driver>=525,driver<526 brand=nvidiartx,driver>=525,driver<526 brand=geforce,driver>=525,driver<526 brand=geforcertx,driver>=525,driver<526 brand=quadro,driver>=525,driver<526 brand=quadrortx,driver>=525,driver<526 brand=titan,driver>=525,driver<526 brand=titanrtx,driver>=525,driver<526"
ENV NV_CUDA_CUDART_VERSION 12.1.105-1

COPY cuda.repo-x86_64 /etc/yum.repos.d/cuda.repo

RUN NVIDIA_GPGKEY_SUM=d0664fbbdb8c32356d45de36c5984617217b2d0bef41b93ccecd326ba3b80c87 && \
curl -fsSL https://developer.download.nvidia.com/compute/cuda/repos/rhel9/${NVARCH}/D42D0685.pub | sed '/^Version/d' > /etc/pki/rpm-gpg/RPM-GPG-KEY-NVIDIA && \
echo "$NVIDIA_GPGKEY_SUM /etc/pki/rpm-gpg/RPM-GPG-KEY-NVIDIA" | sha256sum -c --strict -

ENV CUDA_VERSION 12.1.1

# For libraries in the cuda-compat-* package: https://docs.nvidia.com/cuda/eula/index.html#attachment-a
RUN yum upgrade -y && yum install -y \
cuda-cudart-12-1-${NV_CUDA_CUDART_VERSION} \
cuda-compat-12-1 \
&& ln -s cuda-12.1 /usr/local/cuda \
&& yum -y clean all --enablerepo='*' && \
rm -rf /var/cache/dnf && \
find /var/log -type f -name "*.log" -exec rm -f {} \;

# nvidia-docker 1.0
RUN echo "/usr/local/nvidia/lib" >> /etc/ld.so.conf.d/nvidia.conf && \
echo "/usr/local/nvidia/lib64" >> /etc/ld.so.conf.d/nvidia.conf

ENV PATH /usr/local/nvidia/bin:/usr/local/cuda/bin:${PATH}
ENV LD_LIBRARY_PATH /usr/local/nvidia/lib:/usr/local/nvidia/lib64

COPY NGC-DL-CONTAINER-LICENSE /

# nvidia-container-runtime
ENV NVIDIA_VISIBLE_DEVICES all
ENV NVIDIA_DRIVER_CAPABILITIES compute,utility

ENV NV_CUDA_LIB_VERSION 12.1.1-1

ENV NV_NVTX_VERSION 12.1.105-1
ENV NV_LIBNPP_VERSION 12.1.0.40-1
ENV NV_LIBNPP_PACKAGE libnpp-12-1-${NV_LIBNPP_VERSION}
ENV NV_LIBCUBLAS_VERSION 12.1.3.1-1
ENV NV_LIBNCCL_PACKAGE_NAME libnccl
ENV NV_LIBNCCL_PACKAGE_VERSION 2.17.1-1
ENV NV_LIBNCCL_VERSION 2.17.1
ENV NCCL_VERSION 2.17.1
ENV NV_LIBNCCL_PACKAGE ${NV_LIBNCCL_PACKAGE_NAME}-${NV_LIBNCCL_PACKAGE_VERSION}+cuda12.1

RUN yum install -y \
cuda-libraries-12-1-${NV_CUDA_LIB_VERSION} \
cuda-nvtx-12-1-${NV_NVTX_VERSION} \
${NV_LIBNPP_PACKAGE} \
libcublas-12-1-${NV_LIBCUBLAS_VERSION} \
${NV_LIBNCCL_PACKAGE} \
&& yum -y clean all --enablerepo='*' && \
rm -rf /var/cache/dnf && \
find /var/log -type f -name "*.log" -exec rm -f {} \;

# Set this flag so that libraries can find the location of CUDA
ENV XLA_FLAGS=--xla_gpu_cuda_data_dir=/usr/local/cuda

# CuDNN
ENV NV_CUDNN_VERSION 8.9.0.131-1
ENV NV_CUDNN_PACKAGE libcudnn8-${NV_CUDNN_VERSION}.cuda12.1

RUN yum install -y \
${NV_CUDNN_PACKAGE} \
&& yum -y clean all --enablerepo='*' && \
rm -rf /var/cache/dnf && \
find /var/log -type f -name "*.log" -exec rm -f {} \;

#############################################
# End of CUDA 12.1 Layer #
#############################################

######################################
# Deploy Python packages and Jupyter #
######################################
Expand Down
Loading

0 comments on commit 52ca619

Please sign in to comment.