Skip to content

Commit dc3cb96

Browse files
q10facebook-github-bot
authored andcommitted
Clone of D68511145 (#2697)
Summary: - [OSS] set LD_LIBRARY_PATH for fbgemm in validate_binaries.sh Differential Revision: D68516472
1 parent dd5457c commit dc3cb96

File tree

2 files changed

+50
-24
lines changed

2 files changed

+50
-24
lines changed

.github/scripts/validate_binaries.sh

Lines changed: 44 additions & 24 deletions
Original file line numberDiff line numberDiff line change
@@ -7,8 +7,9 @@
77

88

99
export PYTORCH_CUDA_PKG=""
10+
export CONDA_ENV="build_binary"
1011

11-
conda create -y -n build_binary python="${MATRIX_PYTHON_VERSION}"
12+
conda create -y -n "${CONDA_ENV}" python="${MATRIX_PYTHON_VERSION}"
1213

1314
conda run -n build_binary python --version
1415

@@ -49,41 +50,60 @@ elif [[ ${MATRIX_CHANNEL} = 'release' ]]; then
4950
export PYTORCH_URL="https://download.pytorch.org/whl/${CUDA_VERSION}"
5051
fi
5152

53+
54+
echo "CU_VERSION: ${CUDA_VERSION}"
55+
echo "MATRIX_CHANNEL: ${MATRIX_CHANNEL}"
56+
echo "CONDA_ENV: ${CONDA_ENV}"
57+
58+
# shellcheck disable=SC2155
59+
export CONDA_PREFIX=$(conda run -n "${CONDA_ENV}" printenv CONDA_PREFIX)
60+
61+
find / -name *cuda*
62+
63+
if [[ $CUDA_VERSION = cu* ]]; then
64+
# Setting LD_LIBRARY_PATH fixes the runtime error with fbgemm_gpu not
65+
# being able to locate libnvrtc.so
66+
echo "[NOVA] Setting LD_LIBRARY_PATH ..."
67+
conda env config vars set -n ${CONDA_ENV} \
68+
LD_LIBRARY_PATH="/usr/local/lib:${CUDA_HOME}/lib64:${CONDA_PREFIX}/lib:${LD_LIBRARY_PATH}"
69+
fi
70+
71+
5272
# install pytorch
5373
# switch back to conda once torch nightly is fixed
5474
# if [[ ${MATRIX_GPU_ARCH_TYPE} = 'cuda' ]]; then
5575
# export PYTORCH_CUDA_PKG="pytorch-cuda=${MATRIX_GPU_ARCH_VERSION}"
5676
# fi
57-
conda run -n build_binary pip install torch --index-url "$PYTORCH_URL"
77+
conda run -n "${CONDA_ENV}" pip install torch --index-url "$PYTORCH_URL"
5878

5979
# install fbgemm
60-
conda run -n build_binary pip install fbgemm-gpu --index-url "$PYTORCH_URL"
80+
conda run -n "${CONDA_ENV}" pip install fbgemm-gpu --index-url "$PYTORCH_URL"
6181

6282
# install requirements from pypi
63-
conda run -n build_binary pip install torchmetrics==1.0.3
83+
conda run -n "${CONDA_ENV}" pip install torchmetrics==1.0.3
6484

6585
# install torchrec
66-
conda run -n build_binary pip install torchrec --index-url "$PYTORCH_URL"
86+
conda run -n "${CONDA_ENV}" pip install torchrec --index-url "$PYTORCH_URL"
6787

6888
# Run small import test
69-
conda run -n build_binary python -c "import torch; import fbgemm_gpu; import torchrec"
89+
conda run -n "${CONDA_ENV}" python -c "import torch; import fbgemm_gpu; import torchrec"
7090

7191
# check directory
7292
ls -R
7393

7494
# check if cuda available
75-
conda run -n build_binary python -c "import torch; print(torch.cuda.is_available())"
95+
conda run -n "${CONDA_ENV}" python -c "import torch; print(torch.cuda.is_available())"
7696

7797
# check cuda version
78-
conda run -n build_binary python -c "import torch; print(torch.version.cuda)"
98+
conda run -n "${CONDA_ENV}" python -c "import torch; print(torch.version.cuda)"
7999

80100
# Finally run smoke test
81101
# python 3.11 needs torchx-nightly
82-
conda run -n build_binary pip install torchx-nightly iopath
102+
conda run -n "${CONDA_ENV}" pip install torchx-nightly iopath
83103
if [[ ${MATRIX_GPU_ARCH_TYPE} = 'cuda' ]]; then
84-
conda run -n build_binary torchx run -s local_cwd dist.ddp -j 1 --gpu 2 --script test_installation.py
104+
conda run -n "${CONDA_ENV}" torchx run -s local_cwd dist.ddp -j 1 --gpu 2 --script test_installation.py
85105
else
86-
conda run -n build_binary torchx run -s local_cwd dist.ddp -j 1 --script test_installation.py -- --cpu_only
106+
conda run -n "${CONDA_ENV}" torchx run -s local_cwd dist.ddp -j 1 --script test_installation.py -- --cpu_only
87107
fi
88108

89109

@@ -93,31 +113,31 @@ if [[ ${MATRIX_CHANNEL} != 'release' ]]; then
93113
exit 0
94114
else
95115
# Check version matches only for release binaries
96-
torchrec_version=$(conda run -n build_binary pip show torchrec | grep Version | cut -d' ' -f2)
97-
fbgemm_version=$(conda run -n build_binary pip show fbgemm_gpu | grep Version | cut -d' ' -f2)
116+
torchrec_version=$(conda run -n "${CONDA_ENV}" pip show torchrec | grep Version | cut -d' ' -f2)
117+
fbgemm_version=$(conda run -n "${CONDA_ENV}" pip show fbgemm_gpu | grep Version | cut -d' ' -f2)
98118

99119
if [ "$torchrec_version" != "$fbgemm_version" ]; then
100120
echo "Error: TorchRec package version does not match FBGEMM package version"
101121
exit 1
102122
fi
103123
fi
104124

105-
conda create -y -n build_binary python="${MATRIX_PYTHON_VERSION}"
125+
conda create -y -n "${CONDA_ENV}" python="${MATRIX_PYTHON_VERSION}"
106126

107-
conda run -n build_binary python --version
127+
conda run -n "${CONDA_ENV}" python --version
108128

109129
if [[ ${MATRIX_GPU_ARCH_VERSION} != '12.4' ]]; then
110130
exit 0
111131
fi
112132

113133
echo "checking pypi release"
114-
conda run -n build_binary pip install torch
115-
conda run -n build_binary pip install fbgemm-gpu
116-
conda run -n build_binary pip install torchrec
134+
conda run -n "${CONDA_ENV}" pip install torch
135+
conda run -n "${CONDA_ENV}" pip install fbgemm-gpu
136+
conda run -n "${CONDA_ENV}" pip install torchrec
117137

118138
# Check version matching again for PyPI
119-
torchrec_version=$(conda run -n build_binary pip show torchrec | grep Version | cut -d' ' -f2)
120-
fbgemm_version=$(conda run -n build_binary pip show fbgemm_gpu | grep Version | cut -d' ' -f2)
139+
torchrec_version=$(conda run -n "${CONDA_ENV}" pip show torchrec | grep Version | cut -d' ' -f2)
140+
fbgemm_version=$(conda run -n "${CONDA_ENV}" pip show fbgemm_gpu | grep Version | cut -d' ' -f2)
121141

122142
if [ "$torchrec_version" != "$fbgemm_version" ]; then
123143
echo "Error: TorchRec package version does not match FBGEMM package version"
@@ -128,13 +148,13 @@ fi
128148
ls -R
129149

130150
# check if cuda available
131-
conda run -n build_binary python -c "import torch; print(torch.cuda.is_available())"
151+
conda run -n "${CONDA_ENV}" python -c "import torch; print(torch.cuda.is_available())"
132152

133153
# check cuda version
134-
conda run -n build_binary python -c "import torch; print(torch.version.cuda)"
154+
conda run -n "${CONDA_ENV}" python -c "import torch; print(torch.version.cuda)"
135155

136156
# python 3.11 needs torchx-nightly
137-
conda run -n build_binary pip install torchx-nightly iopath
157+
conda run -n "${CONDA_ENV}" pip install torchx-nightly iopath
138158

139159
# Finally run smoke test
140-
conda run -n build_binary torchx run -s local_cwd dist.ddp -j 1 --gpu 2 --script test_installation.py
160+
conda run -n "${CONDA_ENV}" torchx run -s local_cwd dist.ddp -j 1 --gpu 2 --script test_installation.py

.github/workflows/validate-binaries.yml

Lines changed: 6 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -1,6 +1,12 @@
11
name: Validate binaries
22

33
on:
4+
pull_request:
5+
paths-ignore:
6+
- "docs/*"
7+
- "third_party/*"
8+
- .gitignore
9+
- "*.md"
410
workflow_call:
511
inputs:
612
channel:

0 commit comments

Comments
 (0)