77
88
99export PYTORCH_CUDA_PKG=" "
10+ export CONDA_ENV=" build_binary"
1011
11- conda create -y -n build_binary python=" ${MATRIX_PYTHON_VERSION} "
12+ conda create -y -n " ${CONDA_ENV} " python=" ${MATRIX_PYTHON_VERSION} "
1213
1314conda run -n build_binary python --version
1415
@@ -49,41 +50,58 @@ elif [[ ${MATRIX_CHANNEL} = 'release' ]]; then
4950 export PYTORCH_URL=" https://download.pytorch.org/whl/${CUDA_VERSION} "
5051fi
5152
53+
54+ echo " CU_VERSION: ${CUDA_VERSION} "
55+ echo " MATRIX_CHANNEL: ${MATRIX_CHANNEL} "
56+ echo " CONDA_ENV: ${CONDA_ENV} "
57+
58+ # shellcheck disable=SC2155
59+ export CONDA_PREFIX=$( conda run -n " ${CONDA_ENV} " printenv CONDA_PREFIX)
60+
61+ find / -name * cuda*
62+
63+ # Setting LD_LIBRARY_PATH fixes the runtime error with fbgemm_gpu not
64+ # being able to locate libnvrtc.so
65+ echo " [NOVA] Setting LD_LIBRARY_PATH ..."
66+ conda env config vars set -n ${CONDA_ENV} \
67+ LD_LIBRARY_PATH=" /usr/local/lib:/usr/lib64:${CONDA_PREFIX} /lib:${LD_LIBRARY_PATH} "
68+
69+
5270# install pytorch
5371# switch back to conda once torch nightly is fixed
5472# if [[ ${MATRIX_GPU_ARCH_TYPE} = 'cuda' ]]; then
5573# export PYTORCH_CUDA_PKG="pytorch-cuda=${MATRIX_GPU_ARCH_VERSION}"
5674# fi
57- conda run -n build_binary pip install torch --index-url " $PYTORCH_URL "
75+ conda run -n " ${CONDA_ENV} " pip install torch --index-url " $PYTORCH_URL "
5876
5977# install fbgemm
60- conda run -n build_binary pip install fbgemm-gpu --index-url " $PYTORCH_URL "
78+ conda run -n " ${CONDA_ENV} " pip install fbgemm-gpu --index-url " $PYTORCH_URL "
6179
6280# install requirements from pypi
63- conda run -n build_binary pip install torchmetrics==1.0.3
81+ conda run -n " ${CONDA_ENV} " pip install torchmetrics==1.0.3
6482
6583# install torchrec
66- conda run -n build_binary pip install torchrec --index-url " $PYTORCH_URL "
84+ conda run -n " ${CONDA_ENV} " pip install torchrec --index-url " $PYTORCH_URL "
6785
6886# Run small import test
69- conda run -n build_binary python -c " import torch; import fbgemm_gpu; import torchrec"
87+ conda run -n " ${CONDA_ENV} " python -c " import torch; import fbgemm_gpu; import torchrec"
7088
7189# check directory
7290ls -R
7391
7492# check if cuda available
75- conda run -n build_binary python -c " import torch; print(torch.cuda.is_available())"
93+ conda run -n " ${CONDA_ENV} " python -c " import torch; print(torch.cuda.is_available())"
7694
7795# check cuda version
78- conda run -n build_binary python -c " import torch; print(torch.version.cuda)"
96+ conda run -n " ${CONDA_ENV} " python -c " import torch; print(torch.version.cuda)"
7997
8098# Finally run smoke test
8199# python 3.11 needs torchx-nightly
82- conda run -n build_binary pip install torchx-nightly iopath
100+ conda run -n " ${CONDA_ENV} " pip install torchx-nightly iopath
83101if [[ ${MATRIX_GPU_ARCH_TYPE} = ' cuda' ]]; then
84- conda run -n build_binary torchx run -s local_cwd dist.ddp -j 1 --gpu 2 --script test_installation.py
102+ conda run -n " ${CONDA_ENV} " torchx run -s local_cwd dist.ddp -j 1 --gpu 2 --script test_installation.py
85103else
86- conda run -n build_binary torchx run -s local_cwd dist.ddp -j 1 --script test_installation.py -- --cpu_only
104+ conda run -n " ${CONDA_ENV} " torchx run -s local_cwd dist.ddp -j 1 --script test_installation.py -- --cpu_only
87105fi
88106
89107
@@ -93,31 +111,31 @@ if [[ ${MATRIX_CHANNEL} != 'release' ]]; then
93111 exit 0
94112else
95113 # Check version matches only for release binaries
96- torchrec_version=$( conda run -n build_binary pip show torchrec | grep Version | cut -d' ' -f2)
97- fbgemm_version=$( conda run -n build_binary pip show fbgemm_gpu | grep Version | cut -d' ' -f2)
114+ torchrec_version=$( conda run -n " ${CONDA_ENV} " pip show torchrec | grep Version | cut -d' ' -f2)
115+ fbgemm_version=$( conda run -n " ${CONDA_ENV} " pip show fbgemm_gpu | grep Version | cut -d' ' -f2)
98116
99117 if [ " $torchrec_version " != " $fbgemm_version " ]; then
100118 echo " Error: TorchRec package version does not match FBGEMM package version"
101119 exit 1
102120 fi
103121fi
104122
105- conda create -y -n build_binary python=" ${MATRIX_PYTHON_VERSION} "
123+ conda create -y -n " ${CONDA_ENV} " python=" ${MATRIX_PYTHON_VERSION} "
106124
107- conda run -n build_binary python --version
125+ conda run -n " ${CONDA_ENV} " python --version
108126
109127if [[ ${MATRIX_GPU_ARCH_VERSION} != ' 12.4' ]]; then
110128 exit 0
111129fi
112130
113131echo " checking pypi release"
114- conda run -n build_binary pip install torch
115- conda run -n build_binary pip install fbgemm-gpu
116- conda run -n build_binary pip install torchrec
132+ conda run -n " ${CONDA_ENV} " pip install torch
133+ conda run -n " ${CONDA_ENV} " pip install fbgemm-gpu
134+ conda run -n " ${CONDA_ENV} " pip install torchrec
117135
118136# Check version matching again for PyPI
119- torchrec_version=$( conda run -n build_binary pip show torchrec | grep Version | cut -d' ' -f2)
120- fbgemm_version=$( conda run -n build_binary pip show fbgemm_gpu | grep Version | cut -d' ' -f2)
137+ torchrec_version=$( conda run -n " ${CONDA_ENV} " pip show torchrec | grep Version | cut -d' ' -f2)
138+ fbgemm_version=$( conda run -n " ${CONDA_ENV} " pip show fbgemm_gpu | grep Version | cut -d' ' -f2)
121139
122140if [ " $torchrec_version " != " $fbgemm_version " ]; then
123141 echo " Error: TorchRec package version does not match FBGEMM package version"
128146ls -R
129147
130148# check if cuda available
131- conda run -n build_binary python -c " import torch; print(torch.cuda.is_available())"
149+ conda run -n " ${CONDA_ENV} " python -c " import torch; print(torch.cuda.is_available())"
132150
133151# check cuda version
134- conda run -n build_binary python -c " import torch; print(torch.version.cuda)"
152+ conda run -n " ${CONDA_ENV} " python -c " import torch; print(torch.version.cuda)"
135153
136154# python 3.11 needs torchx-nightly
137- conda run -n build_binary pip install torchx-nightly iopath
155+ conda run -n " ${CONDA_ENV} " pip install torchx-nightly iopath
138156
139157# Finally run smoke test
140- conda run -n build_binary torchx run -s local_cwd dist.ddp -j 1 --gpu 2 --script test_installation.py
158+ conda run -n " ${CONDA_ENV} " torchx run -s local_cwd dist.ddp -j 1 --gpu 2 --script test_installation.py
0 commit comments