77
88
99export PYTORCH_CUDA_PKG=" "
10+ export CONDA_ENV=" build_binary"
1011
11- conda create -y -n build_binary python=" ${MATRIX_PYTHON_VERSION} "
12+ conda create -y -n " ${CONDA_ENV} " python=" ${MATRIX_PYTHON_VERSION} "
1213
1314conda run -n build_binary python --version
1415
@@ -49,41 +50,60 @@ elif [[ ${MATRIX_CHANNEL} = 'release' ]]; then
4950 export PYTORCH_URL=" https://download.pytorch.org/whl/${CUDA_VERSION} "
5051fi
5152
53+
54+ echo " CU_VERSION: ${CUDA_VERSION} "
55+ echo " MATRIX_CHANNEL: ${MATRIX_CHANNEL} "
56+ echo " CONDA_ENV: ${CONDA_ENV} "
57+
58+ # shellcheck disable=SC2155
59+ export CONDA_PREFIX=$( conda run -n " ${CONDA_ENV} " printenv CONDA_PREFIX)
60+
61+ find / -name * cuda*
62+
63+ if [[ $CUDA_VERSION = cu* ]]; then
64+ # Setting LD_LIBRARY_PATH fixes the runtime error with fbgemm_gpu not
65+ # being able to locate libnvrtc.so
66+ echo " [NOVA] Setting LD_LIBRARY_PATH ..."
67+ conda env config vars set -n ${CONDA_ENV} \
68+ LD_LIBRARY_PATH=" /usr/local/lib:${CUDA_HOME} /lib64:${CONDA_PREFIX} /lib:${LD_LIBRARY_PATH} "
69+ fi
70+
71+
5272# install pytorch
5373# switch back to conda once torch nightly is fixed
5474# if [[ ${MATRIX_GPU_ARCH_TYPE} = 'cuda' ]]; then
5575# export PYTORCH_CUDA_PKG="pytorch-cuda=${MATRIX_GPU_ARCH_VERSION}"
5676# fi
57- conda run -n build_binary pip install torch --index-url " $PYTORCH_URL "
77+ conda run -n " ${CONDA_ENV} " pip install torch --index-url " $PYTORCH_URL "
5878
5979# install fbgemm
60- conda run -n build_binary pip install fbgemm-gpu --index-url " $PYTORCH_URL "
80+ conda run -n " ${CONDA_ENV} " pip install fbgemm-gpu --index-url " $PYTORCH_URL "
6181
6282# install requirements from pypi
63- conda run -n build_binary pip install torchmetrics==1.0.3
83+ conda run -n " ${CONDA_ENV} " pip install torchmetrics==1.0.3
6484
6585# install torchrec
66- conda run -n build_binary pip install torchrec --index-url " $PYTORCH_URL "
86+ conda run -n " ${CONDA_ENV} " pip install torchrec --index-url " $PYTORCH_URL "
6787
6888# Run small import test
69- conda run -n build_binary python -c " import torch; import fbgemm_gpu; import torchrec"
89+ conda run -n " ${CONDA_ENV} " python -c " import torch; import fbgemm_gpu; import torchrec"
7090
7191# check directory
7292ls -R
7393
7494# check if cuda available
75- conda run -n build_binary python -c " import torch; print(torch.cuda.is_available())"
95+ conda run -n " ${CONDA_ENV} " python -c " import torch; print(torch.cuda.is_available())"
7696
7797# check cuda version
78- conda run -n build_binary python -c " import torch; print(torch.version.cuda)"
98+ conda run -n " ${CONDA_ENV} " python -c " import torch; print(torch.version.cuda)"
7999
80100# Finally run smoke test
81101# python 3.11 needs torchx-nightly
82- conda run -n build_binary pip install torchx-nightly iopath
102+ conda run -n " ${CONDA_ENV} " pip install torchx-nightly iopath
83103if [[ ${MATRIX_GPU_ARCH_TYPE} = ' cuda' ]]; then
84- conda run -n build_binary torchx run -s local_cwd dist.ddp -j 1 --gpu 2 --script test_installation.py
104+ conda run -n " ${CONDA_ENV} " torchx run -s local_cwd dist.ddp -j 1 --gpu 2 --script test_installation.py
85105else
86- conda run -n build_binary torchx run -s local_cwd dist.ddp -j 1 --script test_installation.py -- --cpu_only
106+ conda run -n " ${CONDA_ENV} " torchx run -s local_cwd dist.ddp -j 1 --script test_installation.py -- --cpu_only
87107fi
88108
89109
@@ -93,31 +113,31 @@ if [[ ${MATRIX_CHANNEL} != 'release' ]]; then
93113 exit 0
94114else
95115 # Check version matches only for release binaries
96- torchrec_version=$( conda run -n build_binary pip show torchrec | grep Version | cut -d' ' -f2)
97- fbgemm_version=$( conda run -n build_binary pip show fbgemm_gpu | grep Version | cut -d' ' -f2)
116+ torchrec_version=$( conda run -n " ${CONDA_ENV} " pip show torchrec | grep Version | cut -d' ' -f2)
117+ fbgemm_version=$( conda run -n " ${CONDA_ENV} " pip show fbgemm_gpu | grep Version | cut -d' ' -f2)
98118
99119 if [ " $torchrec_version " != " $fbgemm_version " ]; then
100120 echo " Error: TorchRec package version does not match FBGEMM package version"
101121 exit 1
102122 fi
103123fi
104124
105- conda create -y -n build_binary python=" ${MATRIX_PYTHON_VERSION} "
125+ conda create -y -n " ${CONDA_ENV} " python=" ${MATRIX_PYTHON_VERSION} "
106126
107- conda run -n build_binary python --version
127+ conda run -n " ${CONDA_ENV} " python --version
108128
109129if [[ ${MATRIX_GPU_ARCH_VERSION} != ' 12.4' ]]; then
110130 exit 0
111131fi
112132
113133echo " checking pypi release"
114- conda run -n build_binary pip install torch
115- conda run -n build_binary pip install fbgemm-gpu
116- conda run -n build_binary pip install torchrec
134+ conda run -n " ${CONDA_ENV} " pip install torch
135+ conda run -n " ${CONDA_ENV} " pip install fbgemm-gpu
136+ conda run -n " ${CONDA_ENV} " pip install torchrec
117137
118138# Check version matching again for PyPI
119- torchrec_version=$( conda run -n build_binary pip show torchrec | grep Version | cut -d' ' -f2)
120- fbgemm_version=$( conda run -n build_binary pip show fbgemm_gpu | grep Version | cut -d' ' -f2)
139+ torchrec_version=$( conda run -n " ${CONDA_ENV} " pip show torchrec | grep Version | cut -d' ' -f2)
140+ fbgemm_version=$( conda run -n " ${CONDA_ENV} " pip show fbgemm_gpu | grep Version | cut -d' ' -f2)
121141
122142if [ " $torchrec_version " != " $fbgemm_version " ]; then
123143 echo " Error: TorchRec package version does not match FBGEMM package version"
128148ls -R
129149
130150# check if cuda available
131- conda run -n build_binary python -c " import torch; print(torch.cuda.is_available())"
151+ conda run -n " ${CONDA_ENV} " python -c " import torch; print(torch.cuda.is_available())"
132152
133153# check cuda version
134- conda run -n build_binary python -c " import torch; print(torch.version.cuda)"
154+ conda run -n " ${CONDA_ENV} " python -c " import torch; print(torch.version.cuda)"
135155
136156# python 3.11 needs torchx-nightly
137- conda run -n build_binary pip install torchx-nightly iopath
157+ conda run -n " ${CONDA_ENV} " pip install torchx-nightly iopath
138158
139159# Finally run smoke test
140- conda run -n build_binary torchx run -s local_cwd dist.ddp -j 1 --gpu 2 --script test_installation.py
160+ conda run -n " ${CONDA_ENV} " torchx run -s local_cwd dist.ddp -j 1 --gpu 2 --script test_installation.py
0 commit comments