pax_global_header00006660000000000000000000000064145076010420014511gustar00rootroot0000000000000052 comment=140d3ad677aae615767412873b90982cbf97d35d pytorch_scatter-2.1.2/000077500000000000000000000000001450760104200147305ustar00rootroot00000000000000pytorch_scatter-2.1.2/.coveragerc000066400000000000000000000002251450760104200170500ustar00rootroot00000000000000[run] source=torch_scatter omit=torch_scatter/placeholder.py [report] exclude_lines = pragma: no cover torch.jit.script raise except pytorch_scatter-2.1.2/.github/000077500000000000000000000000001450760104200162705ustar00rootroot00000000000000pytorch_scatter-2.1.2/.github/workflows/000077500000000000000000000000001450760104200203255ustar00rootroot00000000000000pytorch_scatter-2.1.2/.github/workflows/building-conda.yml000066400000000000000000000053341450760104200237340ustar00rootroot00000000000000name: Building Conda on: [workflow_dispatch] jobs: conda-build: runs-on: ${{ matrix.os }} strategy: fail-fast: false matrix: # We have trouble building for Windows - drop for now. os: [ubuntu-20.04, macos-11] # windows-2019 python-version: ['3.8', '3.9', '3.10', '3.11'] torch-version: [2.0.0, 2.1.0] cuda-version: ['cpu', 'cu117', 'cu118', 'cu121'] exclude: - torch-version: 2.0.0 cuda-version: 'cu121' - torch-version: 2.1.0 cuda-version: 'cu117' - os: macos-11 cuda-version: 'cu117' - os: macos-11 cuda-version: 'cu118' - os: macos-11 cuda-version: 'cu121' steps: - uses: actions/checkout@v2 - name: Set up Conda for Python ${{ matrix.python-version }} uses: conda-incubator/setup-miniconda@v2 with: python-version: ${{ matrix.python-version }} - name: Free Disk Space (Ubuntu) if: ${{ runner.os == 'Linux' }} uses: jlumbroso/free-disk-space@main - name: Install Conda packages run: | conda install conda-build conda-verify --yes shell: bash -l {0} - name: Install CUDA ${{ matrix.cuda-version }} if: ${{ matrix.cuda-version != 'cpu' }} run: | bash .github/workflows/cuda/${{ matrix.cuda-version }}-${{ runner.os }}.sh shell: bash - name: Build Conda package for CPU if: ${{ matrix.cuda-version == 'cpu' }} run: | FORCE_CUDA=0 TORCH_CUDA_ARCH_LIST=0 ./conda/pytorch-scatter/build_conda.sh ${{ matrix.python-version }} ${{ matrix.torch-version }} ${{ matrix.cuda-version }} shell: bash -l {0} - name: Build Conda package for GPU if: ${{ matrix.cuda-version != 'cpu' }} run: | source .github/workflows/cuda/${{ matrix.cuda-version }}-${{ runner.os }}-env.sh ./conda/pytorch-scatter/build_conda.sh ${{ matrix.python-version }} ${{ matrix.torch-version }} ${{ matrix.cuda-version }} shell: bash -l {0} - name: Publish Conda package on organization channel run: | conda install anaconda-client --yes anaconda upload --force --label main $HOME/conda-bld/*/*.tar.bz2 env: ANACONDA_API_TOKEN: ${{ secrets.PYG_ANACONDA_TOKEN }} shell: bash -l {0} - name: Publish Conda package on personal channel run: | conda install anaconda-client --yes anaconda upload --force --label main $HOME/conda-bld/*/*.tar.bz2 env: ANACONDA_API_TOKEN: ${{ secrets.RUSTY1S_ANACONDA_TOKEN }} shell: bash -l {0} pytorch_scatter-2.1.2/.github/workflows/building.yml000066400000000000000000000073471450760104200226600ustar00rootroot00000000000000name: Building Wheels on: [workflow_dispatch] jobs: wheel: runs-on: ${{ matrix.os }} strategy: fail-fast: false matrix: os: [ubuntu-20.04, macos-11, windows-2019] python-version: ['3.8', '3.9', '3.10', '3.11'] torch-version: [2.0.0, 2.1.0] cuda-version: ['cpu', 'cu117', 'cu118', 'cu121'] exclude: - torch-version: 2.0.0 cuda-version: 'cu121' - torch-version: 2.1.0 cuda-version: 'cu117' - os: macos-11 cuda-version: 'cu117' - os: macos-11 cuda-version: 'cu118' - os: macos-11 cuda-version: 'cu121' steps: - uses: actions/checkout@v2 - name: Set up Python ${{ matrix.python-version }} uses: actions/setup-python@v2 with: python-version: ${{ matrix.python-version }} - name: Upgrade pip run: | pip install --upgrade setuptools - name: Free Disk Space (Ubuntu) if: ${{ runner.os == 'Linux' }} uses: jlumbroso/free-disk-space@main - name: Install CUDA ${{ matrix.cuda-version }} if: ${{ matrix.cuda-version != 'cpu' }} run: | bash .github/workflows/cuda/${{ matrix.cuda-version }}-${{ runner.os }}.sh - name: Install PyTorch ${{ matrix.torch-version }}+${{ matrix.cuda-version }} run: | pip install torch==${{ matrix.torch-version }} --extra-index-url https://download.pytorch.org/whl/${{ matrix.cuda-version }} python -c "import torch; print('PyTorch:', torch.__version__)" python -c "import torch; print('CUDA:', torch.version.cuda)" - name: Patch PyTorch static constexpr on Windows if: ${{ runner.os == 'Windows' }} run: | Torch_DIR=`python -c 'import os; import torch; print(os.path.dirname(torch.__file__))'` sed -i '31,38c\ TORCH_API void lazy_init_num_threads();' ${Torch_DIR}/include/ATen/Parallel.h shell: bash - name: Set version if: ${{ runner.os != 'macOS' }} run: | VERSION=`sed -n "s/^__version__ = '\(.*\)'/\1/p" torch_scatter/__init__.py` TORCH_VERSION=`echo "pt${{ matrix.torch-version }}" | sed "s/..$//" | sed "s/\.//g"` CUDA_VERSION=`echo ${{ matrix.cuda-version }}` echo "New version name: $VERSION+$TORCH_VERSION$CUDA_VERSION" sed -i "s/$VERSION/$VERSION+$TORCH_VERSION$CUDA_VERSION/" setup.py sed -i "s/$VERSION/$VERSION+$TORCH_VERSION$CUDA_VERSION/" torch_scatter/__init__.py shell: bash - name: Install main package for CPU if: ${{ matrix.cuda-version == 'cpu' }} run: | FORCE_ONLY_CPU=1 python setup.py develop shell: bash - name: Install main package for GPU if: ${{ matrix.cuda-version != 'cpu' }} run: | source .github/workflows/cuda/${{ matrix.cuda-version }}-${{ runner.os }}-env.sh python setup.py develop shell: bash - name: Test installation run: | python -c "import torch_scatter; print('torch-scatter:', torch_scatter.__version__)" - name: Build wheel run: | pip install wheel python setup.py bdist_wheel --dist-dir=dist - name: Configure AWS uses: aws-actions/configure-aws-credentials@v1 with: aws-access-key-id: ${{ secrets.AWS_ACCESS_KEY_ID }} aws-secret-access-key: ${{ secrets.AWS_SECRET_ACCESS_KEY }} aws-region: us-west-1 - name: Upload wheel run: | aws s3 sync dist s3://data.pyg.org/whl/torch-${{ matrix.torch-version }}+${{ matrix.cuda-version }} --grants read=uri=http://acs.amazonaws.com/groups/global/AllUsers pytorch_scatter-2.1.2/.github/workflows/cuda/000077500000000000000000000000001450760104200212415ustar00rootroot00000000000000pytorch_scatter-2.1.2/.github/workflows/cuda/cu101-Linux-env.sh000066400000000000000000000003131450760104200243060ustar00rootroot00000000000000#!/bin/bash CUDA_HOME=/usr/local/cuda-10.1 LD_LIBRARY_PATH=${CUDA_HOME}/lib64:${LD_LIBRARY_PATH} PATH=${CUDA_HOME}/bin:${PATH} export FORCE_CUDA=1 export TORCH_CUDA_ARCH_LIST="3.5;5.0+PTX;6.0;7.0;7.5" pytorch_scatter-2.1.2/.github/workflows/cuda/cu101-Linux.sh000077500000000000000000000013361450760104200235310ustar00rootroot00000000000000#!/bin/bash OS=ubuntu1804 wget -nv https://developer.download.nvidia.com/compute/cuda/repos/${OS}/x86_64/cuda-${OS}.pin sudo mv cuda-${OS}.pin /etc/apt/preferences.d/cuda-repository-pin-600 wget -nv https://developer.download.nvidia.com/compute/cuda/10.1/Prod/local_installers/cuda-repo-${OS}-10-1-local-10.1.243-418.87.00_1.0-1_amd64.deb sudo dpkg -i cuda-repo-${OS}-10-1-local-10.1.243-418.87.00_1.0-1_amd64.deb sudo apt-key add /var/cuda-repo-10-1-local-10.1.243-418.87.00/7fa2af80.pub sudo apt-get -qq update sudo apt install cuda-nvcc-10-1 cuda-libraries-dev-10-1 sudo apt clean rm -f https://developer.download.nvidia.com/compute/cuda/10.1/Prod/local_installers/cuda-repo-${OS}-10-1-local-10.1.243-418.87.00_1.0-1_amd64.deb pytorch_scatter-2.1.2/.github/workflows/cuda/cu101-Windows-env.sh000066400000000000000000000004341450760104200246450ustar00rootroot00000000000000#!/bin/bash CUDA_HOME=/c/Program\ Files/NVIDIA\ GPU\ Computing\ Toolkit/CUDA/v10.1 PATH=${CUDA_HOME}/bin:$PATH PATH=/c/Program\ Files\ \(x86\)/Microsoft\ Visual\ Studio/2017/BuildTools/MSBuild/15.0/Bin:$PATH export FORCE_CUDA=1 export TORCH_CUDA_ARCH_LIST="3.5;5.0+PTX;6.0;7.0;7.5" pytorch_scatter-2.1.2/.github/workflows/cuda/cu101-Windows.sh000077500000000000000000000020761450760104200240660ustar00rootroot00000000000000#!/bin/bash # Install NVIDIA drivers, see: # https://github.com/pytorch/vision/blob/master/packaging/windows/internal/cuda_install.bat#L99-L102 curl -k -L "https://drive.google.com/u/0/uc?id=1injUyo3lnarMgWyRcXqKg4UGnN0ysmuq&export=download" --output "/tmp/gpu_driver_dlls.zip" 7z x "/tmp/gpu_driver_dlls.zip" -o"/c/Windows/System32" export CUDA_SHORT=10.1 export CUDA_URL=https://developer.download.nvidia.com/compute/cuda/${CUDA_SHORT}/Prod/local_installers/ export CUDA_FILE=cuda_${CUDA_SHORT}.243_426.00_win10.exe # Install CUDA: curl -k -L "${CUDA_URL}/${CUDA_FILE}" --output "${CUDA_FILE}" echo "" echo "Installing from ${CUDA_FILE}..." PowerShell -Command "Start-Process -FilePath \"${CUDA_FILE}\" -ArgumentList \"-s nvcc_${CUDA_SHORT} cuobjdump_${CUDA_SHORT} nvprune_${CUDA_SHORT} cupti_${CUDA_SHORT} cublas_dev_${CUDA_SHORT} cudart_${CUDA_SHORT} cufft_dev_${CUDA_SHORT} curand_dev_${CUDA_SHORT} cusolver_dev_${CUDA_SHORT} cusparse_dev_${CUDA_SHORT} npp_dev_${CUDA_SHORT} nvrtc_dev_${CUDA_SHORT} nvml_dev_${CUDA_SHORT}\" -Wait -NoNewWindow" echo "Done!" rm -f "${CUDA_FILE}" pytorch_scatter-2.1.2/.github/workflows/cuda/cu102-Linux-env.sh000066400000000000000000000003131450760104200243070ustar00rootroot00000000000000#!/bin/bash CUDA_HOME=/usr/local/cuda-10.2 LD_LIBRARY_PATH=${CUDA_HOME}/lib64:${LD_LIBRARY_PATH} PATH=${CUDA_HOME}/bin:${PATH} export FORCE_CUDA=1 export TORCH_CUDA_ARCH_LIST="3.5;5.0+PTX;6.0;7.0;7.5" pytorch_scatter-2.1.2/.github/workflows/cuda/cu102-Linux.sh000077500000000000000000000013321450760104200235260ustar00rootroot00000000000000#!/bin/bash OS=ubuntu1804 wget -nv https://developer.download.nvidia.com/compute/cuda/repos/${OS}/x86_64/cuda-${OS}.pin sudo mv cuda-${OS}.pin /etc/apt/preferences.d/cuda-repository-pin-600 wget -nv https://developer.download.nvidia.com/compute/cuda/10.2/Prod/local_installers/cuda-repo-${OS}-10-2-local-10.2.89-440.33.01_1.0-1_amd64.deb sudo dpkg -i cuda-repo-${OS}-10-2-local-10.2.89-440.33.01_1.0-1_amd64.deb sudo apt-key add /var/cuda-repo-10-2-local-10.2.89-440.33.01/7fa2af80.pub sudo apt-get -qq update sudo apt install cuda-nvcc-10-2 cuda-libraries-dev-10-2 sudo apt clean rm -f https://developer.download.nvidia.com/compute/cuda/10.2/Prod/local_installers/cuda-repo-${OS}-10-2-local-10.2.89-440.33.01_1.0-1_amd64.deb pytorch_scatter-2.1.2/.github/workflows/cuda/cu102-Windows-env.sh000066400000000000000000000004341450760104200246460ustar00rootroot00000000000000#!/bin/bash CUDA_HOME=/c/Program\ Files/NVIDIA\ GPU\ Computing\ Toolkit/CUDA/v10.2 PATH=${CUDA_HOME}/bin:$PATH PATH=/c/Program\ Files\ \(x86\)/Microsoft\ Visual\ Studio/2017/BuildTools/MSBuild/15.0/Bin:$PATH export FORCE_CUDA=1 export TORCH_CUDA_ARCH_LIST="3.5;5.0+PTX;6.0;7.0;7.5" pytorch_scatter-2.1.2/.github/workflows/cuda/cu102-Windows.sh000077500000000000000000000020741450760104200240650ustar00rootroot00000000000000#!/bin/bash # Install NVIDIA drivers, see: # https://github.com/pytorch/vision/blob/master/packaging/windows/internal/cuda_install.bat#L99-L102 curl -k -L "https://drive.google.com/u/0/uc?id=1injUyo3lnarMgWyRcXqKg4UGnN0ysmuq&export=download" --output "/tmp/gpu_driver_dlls.zip" 7z x "/tmp/gpu_driver_dlls.zip" -o"/c/Windows/System32" export CUDA_SHORT=10.2 export CUDA_URL=https://developer.download.nvidia.com/compute/cuda/${CUDA_SHORT}/Prod/local_installers export CUDA_FILE=cuda_${CUDA_SHORT}.89_441.22_win10.exe # Install CUDA: curl -k -L "${CUDA_URL}/${CUDA_FILE}" --output "${CUDA_FILE}" echo "" echo "Installing from ${CUDA_FILE}..." PowerShell -Command "Start-Process -FilePath \"${CUDA_FILE}\" -ArgumentList \"-s nvcc_${CUDA_SHORT} cuobjdump_${CUDA_SHORT} nvprune_${CUDA_SHORT} cupti_${CUDA_SHORT} cublas_dev_${CUDA_SHORT} cudart_${CUDA_SHORT} cufft_dev_${CUDA_SHORT} curand_dev_${CUDA_SHORT} cusolver_dev_${CUDA_SHORT} cusparse_dev_${CUDA_SHORT} npp_dev_${CUDA_SHORT} nvrtc_dev_${CUDA_SHORT} nvml_dev_${CUDA_SHORT}\" -Wait -NoNewWindow" echo "Done!" rm -f "${CUDA_FILE}" pytorch_scatter-2.1.2/.github/workflows/cuda/cu111-Linux-env.sh000066400000000000000000000003231450760104200243100ustar00rootroot00000000000000#!/bin/bash CUDA_HOME=/usr/local/cuda-11.1 LD_LIBRARY_PATH=${CUDA_HOME}/lib64:${LD_LIBRARY_PATH} PATH=${CUDA_HOME}/bin:${PATH} export FORCE_CUDA=1 export TORCH_CUDA_ARCH_LIST="3.5;5.0+PTX;6.0;7.0;7.5;8.0;8.6" pytorch_scatter-2.1.2/.github/workflows/cuda/cu111-Linux.sh000077500000000000000000000012711450760104200235300ustar00rootroot00000000000000#!/bin/bash OS=ubuntu1804 wget -nv https://developer.download.nvidia.com/compute/cuda/repos/${OS}/x86_64/cuda-${OS}.pin sudo mv cuda-${OS}.pin /etc/apt/preferences.d/cuda-repository-pin-600 wget -nv https://developer.download.nvidia.com/compute/cuda/11.1.1/local_installers/cuda-repo-${OS}-11-1-local_11.1.1-455.32.00-1_amd64.deb sudo dpkg -i cuda-repo-${OS}-11-1-local_11.1.1-455.32.00-1_amd64.deb sudo apt-key add /var/cuda-repo-${OS}-11-1-local/7fa2af80.pub sudo apt-get -qq update sudo apt install cuda-nvcc-11-1 cuda-libraries-dev-11-1 sudo apt clean rm -f https://developer.download.nvidia.com/compute/cuda/11.1.1/local_installers/cuda-repo-${OS}-11-1-local_11.1.1-455.32.00-1_amd64.deb pytorch_scatter-2.1.2/.github/workflows/cuda/cu111-Windows-env.sh000066400000000000000000000004141450760104200246440ustar00rootroot00000000000000#!/bin/bash CUDA_HOME=/c/Program\ Files/NVIDIA\ GPU\ Computing\ Toolkit/CUDA/v11.1 PATH=${CUDA_HOME}/bin:$PATH PATH=/c/Program\ Files\ \(x86\)/Microsoft\ Visual\ Studio/2017/BuildTools/MSBuild/15.0/Bin:$PATH export FORCE_CUDA=1 export TORCH_CUDA_ARCH_LIST="6.0+PTX" pytorch_scatter-2.1.2/.github/workflows/cuda/cu111-Windows.sh000077500000000000000000000020701450760104200240610ustar00rootroot00000000000000#!/bin/bash # Install NVIDIA drivers, see: # https://github.com/pytorch/vision/blob/master/packaging/windows/internal/cuda_install.bat#L99-L102 curl -k -L "https://drive.google.com/u/0/uc?id=1injUyo3lnarMgWyRcXqKg4UGnN0ysmuq&export=download" --output "/tmp/gpu_driver_dlls.zip" 7z x "/tmp/gpu_driver_dlls.zip" -o"/c/Windows/System32" export CUDA_SHORT=11.1 export CUDA_URL=https://developer.download.nvidia.com/compute/cuda/${CUDA_SHORT}.1/local_installers export CUDA_FILE=cuda_${CUDA_SHORT}.1_456.81_win10.exe # Install CUDA: curl -k -L "${CUDA_URL}/${CUDA_FILE}" --output "${CUDA_FILE}" echo "" echo "Installing from ${CUDA_FILE}..." PowerShell -Command "Start-Process -FilePath \"${CUDA_FILE}\" -ArgumentList \"-s nvcc_${CUDA_SHORT} cuobjdump_${CUDA_SHORT} nvprune_${CUDA_SHORT} cupti_${CUDA_SHORT} cublas_dev_${CUDA_SHORT} cudart_${CUDA_SHORT} cufft_dev_${CUDA_SHORT} curand_dev_${CUDA_SHORT} cusolver_dev_${CUDA_SHORT} cusparse_dev_${CUDA_SHORT} npp_dev_${CUDA_SHORT} nvrtc_dev_${CUDA_SHORT} nvml_dev_${CUDA_SHORT}\" -Wait -NoNewWindow" echo "Done!" rm -f "${CUDA_FILE}" pytorch_scatter-2.1.2/.github/workflows/cuda/cu113-Linux-env.sh000066400000000000000000000003231450760104200243120ustar00rootroot00000000000000#!/bin/bash CUDA_HOME=/usr/local/cuda-11.3 LD_LIBRARY_PATH=${CUDA_HOME}/lib64:${LD_LIBRARY_PATH} PATH=${CUDA_HOME}/bin:${PATH} export FORCE_CUDA=1 export TORCH_CUDA_ARCH_LIST="3.5;5.0+PTX;6.0;7.0;7.5;8.0;8.6" pytorch_scatter-2.1.2/.github/workflows/cuda/cu113-Linux.sh000077500000000000000000000012711450760104200235320ustar00rootroot00000000000000#!/bin/bash OS=ubuntu1804 wget -nv https://developer.download.nvidia.com/compute/cuda/repos/${OS}/x86_64/cuda-${OS}.pin sudo mv cuda-${OS}.pin /etc/apt/preferences.d/cuda-repository-pin-600 wget -nv https://developer.download.nvidia.com/compute/cuda/11.3.0/local_installers/cuda-repo-${OS}-11-3-local_11.3.0-465.19.01-1_amd64.deb sudo dpkg -i cuda-repo-${OS}-11-3-local_11.3.0-465.19.01-1_amd64.deb sudo apt-key add /var/cuda-repo-${OS}-11-3-local/7fa2af80.pub sudo apt-get -qq update sudo apt install cuda-nvcc-11-3 cuda-libraries-dev-11-3 sudo apt clean rm -f https://developer.download.nvidia.com/compute/cuda/11.3.0/local_installers/cuda-repo-${OS}-11-3-local_11.3.0-465.19.01-1_amd64.deb pytorch_scatter-2.1.2/.github/workflows/cuda/cu113-Windows-env.sh000066400000000000000000000004141450760104200246460ustar00rootroot00000000000000#!/bin/bash CUDA_HOME=/c/Program\ Files/NVIDIA\ GPU\ Computing\ Toolkit/CUDA/v11.3 PATH=${CUDA_HOME}/bin:$PATH PATH=/c/Program\ Files\ \(x86\)/Microsoft\ Visual\ Studio/2017/BuildTools/MSBuild/15.0/Bin:$PATH export FORCE_CUDA=1 export TORCH_CUDA_ARCH_LIST="6.0+PTX" pytorch_scatter-2.1.2/.github/workflows/cuda/cu113-Windows.sh000077500000000000000000000021151450760104200240630ustar00rootroot00000000000000#!/bin/bash # Install NVIDIA drivers, see: # https://github.com/pytorch/vision/blob/master/packaging/windows/internal/cuda_install.bat#L99-L102 curl -k -L "https://drive.google.com/u/0/uc?id=1injUyo3lnarMgWyRcXqKg4UGnN0ysmuq&export=download" --output "/tmp/gpu_driver_dlls.zip" 7z x "/tmp/gpu_driver_dlls.zip" -o"/c/Windows/System32" export CUDA_SHORT=11.3 export CUDA_URL=https://developer.download.nvidia.com/compute/cuda/${CUDA_SHORT}.0/local_installers export CUDA_FILE=cuda_${CUDA_SHORT}.0_465.89_win10.exe # Install CUDA: curl -k -L "${CUDA_URL}/${CUDA_FILE}" --output "${CUDA_FILE}" echo "" echo "Installing from ${CUDA_FILE}..." PowerShell -Command "Start-Process -FilePath \"${CUDA_FILE}\" -ArgumentList \"-s nvcc_${CUDA_SHORT} cuobjdump_${CUDA_SHORT} nvprune_${CUDA_SHORT} cupti_${CUDA_SHORT} cublas_dev_${CUDA_SHORT} cudart_${CUDA_SHORT} cufft_dev_${CUDA_SHORT} curand_dev_${CUDA_SHORT} cusolver_dev_${CUDA_SHORT} cusparse_dev_${CUDA_SHORT} thrust_${CUDA_SHORT} npp_dev_${CUDA_SHORT} nvrtc_dev_${CUDA_SHORT} nvml_dev_${CUDA_SHORT}\" -Wait -NoNewWindow" echo "Done!" rm -f "${CUDA_FILE}" pytorch_scatter-2.1.2/.github/workflows/cuda/cu115-Linux-env.sh000066400000000000000000000003231450760104200243140ustar00rootroot00000000000000#!/bin/bash CUDA_HOME=/usr/local/cuda-11.5 LD_LIBRARY_PATH=${CUDA_HOME}/lib64:${LD_LIBRARY_PATH} PATH=${CUDA_HOME}/bin:${PATH} export FORCE_CUDA=1 export TORCH_CUDA_ARCH_LIST="3.5;5.0+PTX;6.0;7.0;7.5;8.0;8.6" pytorch_scatter-2.1.2/.github/workflows/cuda/cu115-Linux.sh000077500000000000000000000012711450760104200235340ustar00rootroot00000000000000#!/bin/bash OS=ubuntu1804 wget -nv https://developer.download.nvidia.com/compute/cuda/repos/${OS}/x86_64/cuda-${OS}.pin sudo mv cuda-${OS}.pin /etc/apt/preferences.d/cuda-repository-pin-600 wget -nv https://developer.download.nvidia.com/compute/cuda/11.5.2/local_installers/cuda-repo-${OS}-11-5-local_11.5.2-495.29.05-1_amd64.deb sudo dpkg -i cuda-repo-${OS}-11-5-local_11.5.2-495.29.05-1_amd64.deb sudo apt-key add /var/cuda-repo-${OS}-11-5-local/7fa2af80.pub sudo apt-get -qq update sudo apt install cuda-nvcc-11-5 cuda-libraries-dev-11-5 sudo apt clean rm -f https://developer.download.nvidia.com/compute/cuda/11.5.2/local_installers/cuda-repo-${OS}-11-5-local_11.5.2-495.29.05-1_amd64.deb pytorch_scatter-2.1.2/.github/workflows/cuda/cu115-Windows-env.sh000066400000000000000000000004141450760104200246500ustar00rootroot00000000000000#!/bin/bash CUDA_HOME=/c/Program\ Files/NVIDIA\ GPU\ Computing\ Toolkit/CUDA/v11.3 PATH=${CUDA_HOME}/bin:$PATH PATH=/c/Program\ Files\ \(x86\)/Microsoft\ Visual\ Studio/2017/BuildTools/MSBuild/15.0/Bin:$PATH export FORCE_CUDA=1 export TORCH_CUDA_ARCH_LIST="6.0+PTX" pytorch_scatter-2.1.2/.github/workflows/cuda/cu115-Windows.sh000077500000000000000000000022221450760104200240640ustar00rootroot00000000000000#!/bin/bash # TODO We currently use CUDA 11.3 to build CUDA 11.5 Windows wheels # Install NVIDIA drivers, see: # https://github.com/pytorch/vision/blob/master/packaging/windows/internal/cuda_install.bat#L99-L102 curl -k -L "https://drive.google.com/u/0/uc?id=1injUyo3lnarMgWyRcXqKg4UGnN0ysmuq&export=download" --output "/tmp/gpu_driver_dlls.zip" 7z x "/tmp/gpu_driver_dlls.zip" -o"/c/Windows/System32" export CUDA_SHORT=11.3 export CUDA_URL=https://developer.download.nvidia.com/compute/cuda/${CUDA_SHORT}.0/local_installers export CUDA_FILE=cuda_${CUDA_SHORT}.0_465.89_win10.exe # Install CUDA: curl -k -L "${CUDA_URL}/${CUDA_FILE}" --output "${CUDA_FILE}" echo "" echo "Installing from ${CUDA_FILE}..." PowerShell -Command "Start-Process -FilePath \"${CUDA_FILE}\" -ArgumentList \"-s nvcc_${CUDA_SHORT} cuobjdump_${CUDA_SHORT} nvprune_${CUDA_SHORT} cupti_${CUDA_SHORT} cublas_dev_${CUDA_SHORT} cudart_${CUDA_SHORT} cufft_dev_${CUDA_SHORT} curand_dev_${CUDA_SHORT} cusolver_dev_${CUDA_SHORT} cusparse_dev_${CUDA_SHORT} thrust_${CUDA_SHORT} npp_dev_${CUDA_SHORT} nvrtc_dev_${CUDA_SHORT} nvml_dev_${CUDA_SHORT}\" -Wait -NoNewWindow" echo "Done!" rm -f "${CUDA_FILE}" pytorch_scatter-2.1.2/.github/workflows/cuda/cu116-Linux-env.sh000066400000000000000000000003231450760104200243150ustar00rootroot00000000000000#!/bin/bash CUDA_HOME=/usr/local/cuda-11.6 LD_LIBRARY_PATH=${CUDA_HOME}/lib64:${LD_LIBRARY_PATH} PATH=${CUDA_HOME}/bin:${PATH} export FORCE_CUDA=1 export TORCH_CUDA_ARCH_LIST="3.5;5.0+PTX;6.0;7.0;7.5;8.0;8.6" pytorch_scatter-2.1.2/.github/workflows/cuda/cu116-Linux.sh000077500000000000000000000012711450760104200235350ustar00rootroot00000000000000#!/bin/bash OS=ubuntu1804 wget -nv https://developer.download.nvidia.com/compute/cuda/repos/${OS}/x86_64/cuda-${OS}.pin sudo mv cuda-${OS}.pin /etc/apt/preferences.d/cuda-repository-pin-600 wget -nv https://developer.download.nvidia.com/compute/cuda/11.6.2/local_installers/cuda-repo-${OS}-11-6-local_11.6.2-510.47.03-1_amd64.deb sudo dpkg -i cuda-repo-${OS}-11-6-local_11.6.2-510.47.03-1_amd64.deb sudo apt-key add /var/cuda-repo-${OS}-11-6-local/7fa2af80.pub sudo apt-get -qq update sudo apt install cuda-nvcc-11-6 cuda-libraries-dev-11-6 sudo apt clean rm -f https://developer.download.nvidia.com/compute/cuda/11.5.2/local_installers/cuda-repo-${OS}-11-6-local_11.6.2-510.47.03-1_amd64.deb pytorch_scatter-2.1.2/.github/workflows/cuda/cu116-Windows-env.sh000066400000000000000000000004141450760104200246510ustar00rootroot00000000000000#!/bin/bash CUDA_HOME=/c/Program\ Files/NVIDIA\ GPU\ Computing\ Toolkit/CUDA/v11.3 PATH=${CUDA_HOME}/bin:$PATH PATH=/c/Program\ Files\ \(x86\)/Microsoft\ Visual\ Studio/2017/BuildTools/MSBuild/15.0/Bin:$PATH export FORCE_CUDA=1 export TORCH_CUDA_ARCH_LIST="6.0+PTX" pytorch_scatter-2.1.2/.github/workflows/cuda/cu116-Windows.sh000077500000000000000000000022221450760104200240650ustar00rootroot00000000000000#!/bin/bash # TODO We currently use CUDA 11.3 to build CUDA 11.6 Windows wheels # Install NVIDIA drivers, see: # https://github.com/pytorch/vision/blob/master/packaging/windows/internal/cuda_install.bat#L99-L102 curl -k -L "https://drive.google.com/u/0/uc?id=1injUyo3lnarMgWyRcXqKg4UGnN0ysmuq&export=download" --output "/tmp/gpu_driver_dlls.zip" 7z x "/tmp/gpu_driver_dlls.zip" -o"/c/Windows/System32" export CUDA_SHORT=11.3 export CUDA_URL=https://developer.download.nvidia.com/compute/cuda/${CUDA_SHORT}.0/local_installers export CUDA_FILE=cuda_${CUDA_SHORT}.0_465.89_win10.exe # Install CUDA: curl -k -L "${CUDA_URL}/${CUDA_FILE}" --output "${CUDA_FILE}" echo "" echo "Installing from ${CUDA_FILE}..." PowerShell -Command "Start-Process -FilePath \"${CUDA_FILE}\" -ArgumentList \"-s nvcc_${CUDA_SHORT} cuobjdump_${CUDA_SHORT} nvprune_${CUDA_SHORT} cupti_${CUDA_SHORT} cublas_dev_${CUDA_SHORT} cudart_${CUDA_SHORT} cufft_dev_${CUDA_SHORT} curand_dev_${CUDA_SHORT} cusolver_dev_${CUDA_SHORT} cusparse_dev_${CUDA_SHORT} thrust_${CUDA_SHORT} npp_dev_${CUDA_SHORT} nvrtc_dev_${CUDA_SHORT} nvml_dev_${CUDA_SHORT}\" -Wait -NoNewWindow" echo "Done!" rm -f "${CUDA_FILE}" pytorch_scatter-2.1.2/.github/workflows/cuda/cu117-Linux-env.sh000066400000000000000000000003231450760104200243160ustar00rootroot00000000000000#!/bin/bash CUDA_HOME=/usr/local/cuda-11.7 LD_LIBRARY_PATH=${CUDA_HOME}/lib64:${LD_LIBRARY_PATH} PATH=${CUDA_HOME}/bin:${PATH} export FORCE_CUDA=1 export TORCH_CUDA_ARCH_LIST="3.5;5.0+PTX;6.0;7.0;7.5;8.0;8.6" pytorch_scatter-2.1.2/.github/workflows/cuda/cu117-Linux.sh000077500000000000000000000013131450760104200235330ustar00rootroot00000000000000#!/bin/bash OS=ubuntu2004 wget -nv https://developer.download.nvidia.com/compute/cuda/repos/${OS}/x86_64/cuda-${OS}.pin sudo mv cuda-${OS}.pin /etc/apt/preferences.d/cuda-repository-pin-600 wget -nv https://developer.download.nvidia.com/compute/cuda/11.7.1/local_installers/cuda-repo-${OS}-11-7-local_11.7.1-515.65.01-1_amd64.deb sudo dpkg -i cuda-repo-${OS}-11-7-local_11.7.1-515.65.01-1_amd64.deb sudo cp /var/cuda-repo-${OS}-11-7-local/cuda-*-keyring.gpg /usr/share/keyrings/ sudo apt-get -qq update sudo apt install cuda-nvcc-11-7 cuda-libraries-dev-11-7 sudo apt clean rm -f https://developer.download.nvidia.com/compute/cuda/11.7.1/local_installers/cuda-repo-${OS}-11-7-local_11.7.1-515.65.01-1_amd64.deb pytorch_scatter-2.1.2/.github/workflows/cuda/cu117-Windows-env.sh000066400000000000000000000004141450760104200246520ustar00rootroot00000000000000#!/bin/bash CUDA_HOME=/c/Program\ Files/NVIDIA\ GPU\ Computing\ Toolkit/CUDA/v11.3 PATH=${CUDA_HOME}/bin:$PATH PATH=/c/Program\ Files\ \(x86\)/Microsoft\ Visual\ Studio/2017/BuildTools/MSBuild/15.0/Bin:$PATH export FORCE_CUDA=1 export TORCH_CUDA_ARCH_LIST="6.0+PTX" pytorch_scatter-2.1.2/.github/workflows/cuda/cu117-Windows.sh000077500000000000000000000022221450760104200240660ustar00rootroot00000000000000#!/bin/bash # TODO We currently use CUDA 11.3 to build CUDA 11.7 Windows wheels # Install NVIDIA drivers, see: # https://github.com/pytorch/vision/blob/master/packaging/windows/internal/cuda_install.bat#L99-L102 curl -k -L "https://drive.google.com/u/0/uc?id=1injUyo3lnarMgWyRcXqKg4UGnN0ysmuq&export=download" --output "/tmp/gpu_driver_dlls.zip" 7z x "/tmp/gpu_driver_dlls.zip" -o"/c/Windows/System32" export CUDA_SHORT=11.3 export CUDA_URL=https://developer.download.nvidia.com/compute/cuda/${CUDA_SHORT}.0/local_installers export CUDA_FILE=cuda_${CUDA_SHORT}.0_465.89_win10.exe # Install CUDA: curl -k -L "${CUDA_URL}/${CUDA_FILE}" --output "${CUDA_FILE}" echo "" echo "Installing from ${CUDA_FILE}..." PowerShell -Command "Start-Process -FilePath \"${CUDA_FILE}\" -ArgumentList \"-s nvcc_${CUDA_SHORT} cuobjdump_${CUDA_SHORT} nvprune_${CUDA_SHORT} cupti_${CUDA_SHORT} cublas_dev_${CUDA_SHORT} cudart_${CUDA_SHORT} cufft_dev_${CUDA_SHORT} curand_dev_${CUDA_SHORT} cusolver_dev_${CUDA_SHORT} cusparse_dev_${CUDA_SHORT} thrust_${CUDA_SHORT} npp_dev_${CUDA_SHORT} nvrtc_dev_${CUDA_SHORT} nvml_dev_${CUDA_SHORT}\" -Wait -NoNewWindow" echo "Done!" rm -f "${CUDA_FILE}" pytorch_scatter-2.1.2/.github/workflows/cuda/cu118-Linux-env.sh000066400000000000000000000003231450760104200243170ustar00rootroot00000000000000#!/bin/bash CUDA_HOME=/usr/local/cuda-11.8 LD_LIBRARY_PATH=${CUDA_HOME}/lib64:${LD_LIBRARY_PATH} PATH=${CUDA_HOME}/bin:${PATH} export FORCE_CUDA=1 export TORCH_CUDA_ARCH_LIST="3.5;5.0+PTX;6.0;7.0;7.5;8.0;8.6" pytorch_scatter-2.1.2/.github/workflows/cuda/cu118-Linux.sh000077500000000000000000000013131450760104200235340ustar00rootroot00000000000000#!/bin/bash OS=ubuntu2004 wget -nv https://developer.download.nvidia.com/compute/cuda/repos/${OS}/x86_64/cuda-${OS}.pin sudo mv cuda-${OS}.pin /etc/apt/preferences.d/cuda-repository-pin-600 wget -nv https://developer.download.nvidia.com/compute/cuda/11.8.0/local_installers/cuda-repo-${OS}-11-8-local_11.8.0-520.61.05-1_amd64.deb sudo dpkg -i cuda-repo-${OS}-11-8-local_11.8.0-520.61.05-1_amd64.deb sudo cp /var/cuda-repo-${OS}-11-8-local/cuda-*-keyring.gpg /usr/share/keyrings/ sudo apt-get -qq update sudo apt install cuda-nvcc-11-8 cuda-libraries-dev-11-8 sudo apt clean rm -f https://developer.download.nvidia.com/compute/cuda/11.8.0/local_installers/cuda-repo-${OS}-11-8-local_11.8.0-520.61.05-1_amd64.deb pytorch_scatter-2.1.2/.github/workflows/cuda/cu118-Windows-env.sh000066400000000000000000000004141450760104200246530ustar00rootroot00000000000000#!/bin/bash CUDA_HOME=/c/Program\ Files/NVIDIA\ GPU\ Computing\ Toolkit/CUDA/v11.8 PATH=${CUDA_HOME}/bin:$PATH PATH=/c/Program\ Files\ \(x86\)/Microsoft\ Visual\ Studio/2017/BuildTools/MSBuild/15.0/Bin:$PATH export FORCE_CUDA=1 export TORCH_CUDA_ARCH_LIST="6.0+PTX" pytorch_scatter-2.1.2/.github/workflows/cuda/cu118-Windows.sh000077500000000000000000000021171450760104200240720ustar00rootroot00000000000000#!/bin/bash # Install NVIDIA drivers, see: # https://github.com/pytorch/vision/blob/master/packaging/windows/internal/cuda_install.bat#L99-L102 curl -k -L "https://drive.google.com/u/0/uc?id=1injUyo3lnarMgWyRcXqKg4UGnN0ysmuq&export=download" --output "/tmp/gpu_driver_dlls.zip" 7z x "/tmp/gpu_driver_dlls.zip" -o"/c/Windows/System32" export CUDA_SHORT=11.8 export CUDA_URL=https://developer.download.nvidia.com/compute/cuda/${CUDA_SHORT}.0/local_installers export CUDA_FILE=cuda_${CUDA_SHORT}.0_522.06_windows.exe # Install CUDA: curl -k -L "${CUDA_URL}/${CUDA_FILE}" --output "${CUDA_FILE}" echo "" echo "Installing from ${CUDA_FILE}..." PowerShell -Command "Start-Process -FilePath \"${CUDA_FILE}\" -ArgumentList \"-s nvcc_${CUDA_SHORT} cuobjdump_${CUDA_SHORT} nvprune_${CUDA_SHORT} cupti_${CUDA_SHORT} cublas_dev_${CUDA_SHORT} cudart_${CUDA_SHORT} cufft_dev_${CUDA_SHORT} curand_dev_${CUDA_SHORT} cusolver_dev_${CUDA_SHORT} cusparse_dev_${CUDA_SHORT} thrust_${CUDA_SHORT} npp_dev_${CUDA_SHORT} nvrtc_dev_${CUDA_SHORT} nvml_dev_${CUDA_SHORT}\" -Wait -NoNewWindow" echo "Done!" rm -f "${CUDA_FILE}" pytorch_scatter-2.1.2/.github/workflows/cuda/cu121-Linux-env.sh000066400000000000000000000003231450760104200243110ustar00rootroot00000000000000#!/bin/bash CUDA_HOME=/usr/local/cuda-12.1 LD_LIBRARY_PATH=${CUDA_HOME}/lib64:${LD_LIBRARY_PATH} PATH=${CUDA_HOME}/bin:${PATH} export FORCE_CUDA=1 export TORCH_CUDA_ARCH_LIST="5.0+PTX;6.0;7.0;7.5;8.0;8.6;9.0" pytorch_scatter-2.1.2/.github/workflows/cuda/cu121-Linux.sh000077500000000000000000000013131450760104200235260ustar00rootroot00000000000000#!/bin/bash OS=ubuntu2004 wget -nv https://developer.download.nvidia.com/compute/cuda/repos/${OS}/x86_64/cuda-${OS}.pin sudo mv cuda-${OS}.pin /etc/apt/preferences.d/cuda-repository-pin-600 wget -nv https://developer.download.nvidia.com/compute/cuda/12.1.1/local_installers/cuda-repo-${OS}-12-1-local_12.1.1-530.30.02-1_amd64.deb sudo dpkg -i cuda-repo-${OS}-12-1-local_12.1.1-530.30.02-1_amd64.deb sudo cp /var/cuda-repo-${OS}-12-1-local/cuda-*-keyring.gpg /usr/share/keyrings/ sudo apt-get -qq update sudo apt install cuda-nvcc-12-1 cuda-libraries-dev-12-1 sudo apt clean rm -f https://developer.download.nvidia.com/compute/cuda/12.1.0/local_installers/cuda-repo-${OS}-12-1-local_12.1.1-530.30.02-1_amd64.deb pytorch_scatter-2.1.2/.github/workflows/cuda/cu121-Windows-env.sh000066400000000000000000000004141450760104200246450ustar00rootroot00000000000000#!/bin/bash CUDA_HOME=/c/Program\ Files/NVIDIA\ GPU\ Computing\ Toolkit/CUDA/v12.1 PATH=${CUDA_HOME}/bin:$PATH PATH=/c/Program\ Files\ \(x86\)/Microsoft\ Visual\ Studio/2017/BuildTools/MSBuild/15.0/Bin:$PATH export FORCE_CUDA=1 export TORCH_CUDA_ARCH_LIST="6.0+PTX" pytorch_scatter-2.1.2/.github/workflows/cuda/cu121-Windows.sh000077500000000000000000000021171450760104200240640ustar00rootroot00000000000000#!/bin/bash # Install NVIDIA drivers, see: # https://github.com/pytorch/vision/blob/master/packaging/windows/internal/cuda_install.bat#L99-L102 curl -k -L "https://drive.google.com/u/0/uc?id=1injUyo3lnarMgWyRcXqKg4UGnN0ysmuq&export=download" --output "/tmp/gpu_driver_dlls.zip" 7z x "/tmp/gpu_driver_dlls.zip" -o"/c/Windows/System32" export CUDA_SHORT=12.1 export CUDA_URL=https://developer.download.nvidia.com/compute/cuda/${CUDA_SHORT}.1/local_installers export CUDA_FILE=cuda_${CUDA_SHORT}.1_531.14_windows.exe # Install CUDA: curl -k -L "${CUDA_URL}/${CUDA_FILE}" --output "${CUDA_FILE}" echo "" echo "Installing from ${CUDA_FILE}..." PowerShell -Command "Start-Process -FilePath \"${CUDA_FILE}\" -ArgumentList \"-s nvcc_${CUDA_SHORT} cuobjdump_${CUDA_SHORT} nvprune_${CUDA_SHORT} cupti_${CUDA_SHORT} cublas_dev_${CUDA_SHORT} cudart_${CUDA_SHORT} cufft_dev_${CUDA_SHORT} curand_dev_${CUDA_SHORT} cusolver_dev_${CUDA_SHORT} cusparse_dev_${CUDA_SHORT} thrust_${CUDA_SHORT} npp_dev_${CUDA_SHORT} nvrtc_dev_${CUDA_SHORT} nvml_dev_${CUDA_SHORT}\" -Wait -NoNewWindow" echo "Done!" rm -f "${CUDA_FILE}" pytorch_scatter-2.1.2/.github/workflows/linting.yml000066400000000000000000000006371450760104200225220ustar00rootroot00000000000000name: Linting on: push: branches: - master pull_request: jobs: flake8: runs-on: ubuntu-latest steps: - uses: actions/checkout@v2 - name: Set up Python uses: actions/setup-python@v2 with: python-version: 3.8 - name: Install dependencies run: | pip install flake8 - name: Run linting run: | flake8 . pytorch_scatter-2.1.2/.github/workflows/stale.yml000066400000000000000000000014261450760104200221630ustar00rootroot00000000000000name: "Close stale issues and PRs" on: schedule: # Every day at 00:00 - cron: "0 0 * * *" workflow_dispatch: jobs: stale: runs-on: ubuntu-latest steps: - uses: actions/stale@v4.0.0 with: stale-issue-message: 'This issue had no activity for **6 months**. It will be closed in **2 weeks** unless there is some new activity. Is this issue already resolved?' stale-issue-label: 'stale' exempt-issue-labels: 'bug,enhancement,good first issue' stale-pr-message: 'This pull request had no activity for **6 months**. It will be closed in **2 weeks** unless there is some new activity.' stale-pr-label: 'stale' days-before-stale: 180 days-before-close: 14 operations-per-run: 200 pytorch_scatter-2.1.2/.github/workflows/testing.yml000066400000000000000000000025411450760104200225270ustar00rootroot00000000000000name: Testing on: push: branches: - master pull_request: jobs: pytest: runs-on: ${{ matrix.os }} strategy: fail-fast: false matrix: os: [ubuntu-latest, windows-latest] python-version: [3.8] torch-version: [2.0.0, 2.1.0] steps: - uses: actions/checkout@v2 - name: Set up Python ${{ matrix.python-version }} uses: actions/setup-python@v2 with: python-version: ${{ matrix.python-version }} - name: Install PyTorch ${{ matrix.torch-version }} run: | pip install torch==${{ matrix.torch-version }} --extra-index-url https://download.pytorch.org/whl/cpu - name: Patch PyTorch static constexpr on Windows if: ${{ runner.os == 'Windows' }} run: | Torch_DIR=`python -c 'import os; import torch; print(os.path.dirname(torch.__file__))'` sed -i '31,38c\ TORCH_API void lazy_init_num_threads();' ${Torch_DIR}/include/ATen/Parallel.h shell: bash - name: Install main package run: | python setup.py develop - name: Run test-suite run: | pip install pytest pytest-cov pytest --cov --cov-report=xml - name: Upload coverage uses: codecov/codecov-action@v1 if: success() with: fail_ci_if_error: false pytorch_scatter-2.1.2/.gitignore000066400000000000000000000001521450760104200167160ustar00rootroot00000000000000__pycache__/ build/ dist/ .cache/ .eggs/ *.egg-info/ .coverage *.so *.aux *.log *.pdf *.hip *_hip.cpp hip pytorch_scatter-2.1.2/CMakeLists.txt000066400000000000000000000052511450760104200174730ustar00rootroot00000000000000cmake_minimum_required(VERSION 3.0) project(torchscatter) set(CMAKE_CXX_STANDARD 14) set(TORCHSCATTER_VERSION 2.1.2) option(WITH_CUDA "Enable CUDA support" OFF) option(WITH_PYTHON "Link to Python when building" ON) if(WITH_CUDA) enable_language(CUDA) add_definitions(-D__CUDA_NO_HALF_OPERATORS__) add_definitions(-DWITH_CUDA) set(CMAKE_CUDA_FLAGS "${CMAKE_CUDA_FLAGS} --expt-relaxed-constexpr") endif() if (WITH_PYTHON) add_definitions(-DWITH_PYTHON) find_package(Python3 COMPONENTS Development) endif() find_package(Torch REQUIRED) file(GLOB HEADERS csrc/*.h) file(GLOB OPERATOR_SOURCES csrc/cpu/*.h csrc/cpu/*.cpp csrc/*.cpp) if(WITH_CUDA) file(GLOB OPERATOR_SOURCES ${OPERATOR_SOURCES} csrc/cuda/*.h csrc/cuda/*.cu) endif() add_library(${PROJECT_NAME} SHARED ${OPERATOR_SOURCES}) target_link_libraries(${PROJECT_NAME} PRIVATE ${TORCH_LIBRARIES}) if (WITH_PYTHON) target_link_libraries(${PROJECT_NAME} PRIVATE Python3::Python) endif() set_target_properties(${PROJECT_NAME} PROPERTIES EXPORT_NAME TorchScatter) target_include_directories(${PROJECT_NAME} INTERFACE "$" $) include(GNUInstallDirs) include(CMakePackageConfigHelpers) set(TORCHSCATTER_CMAKECONFIG_INSTALL_DIR "share/cmake/TorchScatter" CACHE STRING "install path for TorchScatterConfig.cmake") configure_package_config_file(cmake/TorchScatterConfig.cmake.in "${CMAKE_CURRENT_BINARY_DIR}/TorchScatterConfig.cmake" INSTALL_DESTINATION ${TORCHSCATTER_CMAKECONFIG_INSTALL_DIR}) write_basic_package_version_file(${CMAKE_CURRENT_BINARY_DIR}/TorchScatterConfigVersion.cmake VERSION ${TORCHSCATTER_VERSION} COMPATIBILITY AnyNewerVersion) install(FILES ${CMAKE_CURRENT_BINARY_DIR}/TorchScatterConfig.cmake ${CMAKE_CURRENT_BINARY_DIR}/TorchScatterConfigVersion.cmake DESTINATION ${TORCHSCATTER_CMAKECONFIG_INSTALL_DIR}) install(TARGETS ${PROJECT_NAME} EXPORT TorchScatterTargets LIBRARY DESTINATION ${CMAKE_INSTALL_LIBDIR} ) install(EXPORT TorchScatterTargets NAMESPACE TorchScatter:: DESTINATION ${TORCHSCATTER_CMAKECONFIG_INSTALL_DIR}) install(FILES ${HEADERS} DESTINATION ${CMAKE_INSTALL_INCLUDEDIR}/${PROJECT_NAME}) install(FILES csrc/cpu/scatter_cpu.h csrc/cpu/segment_coo_cpu.h csrc/cpu/segment_csr_cpu.h DESTINATION ${CMAKE_INSTALL_INCLUDEDIR}/${PROJECT_NAME}/cpu) if(WITH_CUDA) install(FILES csrc/cuda/scatter_cuda.h csrc/cuda/segment_coo_cuda.h csrc/cuda/segment_csr_cuda.h DESTINATION ${CMAKE_INSTALL_INCLUDEDIR}/${PROJECT_NAME}/cuda) endif() if(WITH_CUDA) set_property(TARGET torch_cuda PROPERTY INTERFACE_COMPILE_OPTIONS "") set_property(TARGET torch_cpu PROPERTY INTERFACE_COMPILE_OPTIONS "") endif() pytorch_scatter-2.1.2/LICENSE000066400000000000000000000020761450760104200157420ustar00rootroot00000000000000Copyright (c) 2020 Matthias Fey Permission is hereby granted, free of charge, to any person obtaining a copy of this software and associated documentation files (the "Software"), to deal in the Software without restriction, including without limitation the rights to use, copy, modify, merge, publish, distribute, sublicense, and/or sell copies of the Software, and to permit persons to whom the Software is furnished to do so, subject to the following conditions: The above copyright notice and this permission notice shall be included in all copies or substantial portions of the Software. THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. pytorch_scatter-2.1.2/MANIFEST.in000066400000000000000000000001251450760104200164640ustar00rootroot00000000000000include README.md include LICENSE recursive-exclude test * recursive-include csrc * pytorch_scatter-2.1.2/README.md000066400000000000000000000140371450760104200162140ustar00rootroot00000000000000[pypi-image]: https://badge.fury.io/py/torch-scatter.svg [pypi-url]: https://pypi.python.org/pypi/torch-scatter [testing-image]: https://github.com/rusty1s/pytorch_scatter/actions/workflows/testing.yml/badge.svg [testing-url]: https://github.com/rusty1s/pytorch_scatter/actions/workflows/testing.yml [linting-image]: https://github.com/rusty1s/pytorch_scatter/actions/workflows/linting.yml/badge.svg [linting-url]: https://github.com/rusty1s/pytorch_scatter/actions/workflows/linting.yml [docs-image]: https://readthedocs.org/projects/pytorch-scatter/badge/?version=latest [docs-url]: https://pytorch-scatter.readthedocs.io/en/latest/?badge=latest [coverage-image]: https://codecov.io/gh/rusty1s/pytorch_scatter/branch/master/graph/badge.svg [coverage-url]: https://codecov.io/github/rusty1s/pytorch_scatter?branch=master # PyTorch Scatter [![PyPI Version][pypi-image]][pypi-url] [![Testing Status][testing-image]][testing-url] [![Linting Status][linting-image]][linting-url] [![Docs Status][docs-image]][docs-url] [![Code Coverage][coverage-image]][coverage-url]

-------------------------------------------------------------------------------- **[Documentation](https://pytorch-scatter.readthedocs.io)** This package consists of a small extension library of highly optimized sparse update (scatter and segment) operations for the use in [PyTorch](http://pytorch.org/), which are missing in the main package. Scatter and segment operations can be roughly described as reduce operations based on a given "group-index" tensor. Segment operations require the "group-index" tensor to be sorted, whereas scatter operations are not subject to these requirements. The package consists of the following operations with reduction types `"sum"|"mean"|"min"|"max"`: * [**scatter**](https://pytorch-scatter.readthedocs.io/en/latest/functions/scatter.html) based on arbitrary indices * [**segment_coo**](https://pytorch-scatter.readthedocs.io/en/latest/functions/segment_coo.html) based on sorted indices * [**segment_csr**](https://pytorch-scatter.readthedocs.io/en/latest/functions/segment_csr.html) based on compressed indices via pointers In addition, we provide the following **composite functions** which make use of `scatter_*` operations under the hood: `scatter_std`, `scatter_logsumexp`, `scatter_softmax` and `scatter_log_softmax`. All included operations are broadcastable, work on varying data types, are implemented both for CPU and GPU with corresponding backward implementations, and are fully traceable. ## Installation ### Anaconda **Update:** You can now install `pytorch-scatter` via [Anaconda](https://anaconda.org/pyg/pytorch-scatter) for all major OS/PyTorch/CUDA combinations 🤗 Given that you have [`pytorch >= 1.8.0` installed](https://pytorch.org/get-started/locally/), simply run ``` conda install pytorch-scatter -c pyg ``` ### Binaries We alternatively provide pip wheels for all major OS/PyTorch/CUDA combinations, see [here](https://data.pyg.org/whl). #### PyTorch 2.1 To install the binaries for PyTorch 2.1.0, simply run ``` pip install torch-scatter -f https://data.pyg.org/whl/torch-2.1.0+${CUDA}.html ``` where `${CUDA}` should be replaced by either `cpu`, `cu118`, or `cu121` depending on your PyTorch installation. | | `cpu` | `cu118` | `cu121` | |-------------|-------|---------|---------| | **Linux** | ✅ | ✅ | ✅ | | **Windows** | ✅ | ✅ | ✅ | | **macOS** | ✅ | | | #### PyTorch 2.0 To install the binaries for PyTorch 2.0.0, simply run ``` pip install torch-scatter -f https://data.pyg.org/whl/torch-2.0.0+${CUDA}.html ``` where `${CUDA}` should be replaced by either `cpu`, `cu117`, or `cu118` depending on your PyTorch installation. | | `cpu` | `cu117` | `cu118` | |-------------|-------|---------|---------| | **Linux** | ✅ | ✅ | ✅ | | **Windows** | ✅ | ✅ | ✅ | | **macOS** | ✅ | | | **Note:** Binaries of older versions are also provided for PyTorch 1.4.0, PyTorch 1.5.0, PyTorch 1.6.0, PyTorch 1.7.0/1.7.1, PyTorch 1.8.0/1.8.1, PyTorch 1.9.0, PyTorch 1.10.0/1.10.1/1.10.2, PyTorch 1.11.0, PyTorch 1.12.0/1.12.1 and PyTorch 1.13.0/1.13.1 (following the same procedure). For older versions, you need to explicitly specify the latest supported version number or install via `pip install --no-index` in order to prevent a manual installation from source. You can look up the latest supported version number [here](https://data.pyg.org/whl). ### From source Ensure that at least PyTorch 1.4.0 is installed and verify that `cuda/bin` and `cuda/include` are in your `$PATH` and `$CPATH` respectively, *e.g.*: ``` $ python -c "import torch; print(torch.__version__)" >>> 1.4.0 $ echo $PATH >>> /usr/local/cuda/bin:... $ echo $CPATH >>> /usr/local/cuda/include:... ``` Then run: ``` pip install torch-scatter ``` When running in a docker container without NVIDIA driver, PyTorch needs to evaluate the compute capabilities and may fail. In this case, ensure that the compute capabilities are set via `TORCH_CUDA_ARCH_LIST`, *e.g.*: ``` export TORCH_CUDA_ARCH_LIST = "6.0 6.1 7.2+PTX 7.5+PTX" ``` ## Example ```py import torch from torch_scatter import scatter_max src = torch.tensor([[2, 0, 1, 4, 3], [0, 2, 1, 3, 4]]) index = torch.tensor([[4, 5, 4, 2, 3], [0, 0, 2, 2, 1]]) out, argmax = scatter_max(src, index, dim=-1) ``` ``` print(out) tensor([[0, 0, 4, 3, 2, 0], [2, 4, 3, 0, 0, 0]]) print(argmax) tensor([[5, 5, 3, 4, 0, 1] [1, 4, 3, 5, 5, 5]]) ``` ## Running tests ``` pytest ``` ## C++ API `torch-scatter` also offers a C++ API that contains C++ equivalent of python models. For this, we need to add `TorchLib` to the `-DCMAKE_PREFIX_PATH` (*e.g.*, it may exists in `{CONDA}/lib/python{X.X}/site-packages/torch` if installed via `conda`): ``` mkdir build cd build # Add -DWITH_CUDA=on support for CUDA support cmake -DCMAKE_PREFIX_PATH="..." .. make make install ``` pytorch_scatter-2.1.2/benchmark/000077500000000000000000000000001450760104200166625ustar00rootroot00000000000000pytorch_scatter-2.1.2/benchmark/.gitignore000066400000000000000000000000141450760104200206450ustar00rootroot00000000000000*.mat *.tmp pytorch_scatter-2.1.2/benchmark/gather.py000066400000000000000000000104141450760104200205060ustar00rootroot00000000000000import time import itertools import argparse import torch from scipy.io import loadmat from torch_scatter import gather_coo, gather_csr from scatter_segment import short_rows, long_rows, download, bold @torch.no_grad() def correctness(dataset): group, name = dataset mat = loadmat(f'{name}.mat')['Problem'][0][0][2].tocsr() rowptr = torch.from_numpy(mat.indptr).to(args.device, torch.long) row = torch.from_numpy(mat.tocoo().row).to(args.device, torch.long) dim_size = rowptr.size(0) - 1 for size in sizes[1:]: try: x = torch.randn((dim_size, size), device=args.device) x = x.squeeze(-1) if size == 1 else x out1 = x.index_select(0, row) out2 = gather_coo(x, row) out3 = gather_csr(x, rowptr) assert torch.allclose(out1, out2, atol=1e-4) assert torch.allclose(out1, out3, atol=1e-4) except RuntimeError as e: if 'out of memory' not in str(e): raise RuntimeError(e) torch.cuda.empty_cache() def time_func(func, x): try: if torch.cuda.is_available(): torch.cuda.synchronize() t = time.perf_counter() if not args.with_backward: with torch.no_grad(): for _ in range(iters): func(x) else: x = x.requires_grad_() for _ in range(iters): out = func(x) torch.autograd.grad(out, x, out, only_inputs=True) if torch.cuda.is_available(): torch.cuda.synchronize() return time.perf_counter() - t except RuntimeError as e: if 'out of memory' not in str(e): raise RuntimeError(e) torch.cuda.empty_cache() return float('inf') def timing(dataset): group, name = dataset mat = loadmat(f'{name}.mat')['Problem'][0][0][2].tocsr() rowptr = torch.from_numpy(mat.indptr).to(args.device, torch.long) row = torch.from_numpy(mat.tocoo().row).to(args.device, torch.long) dim_size = rowptr.size(0) - 1 avg_row_len = row.size(0) / dim_size def select(x): return x.index_select(0, row) def gather(x): return x.gather(0, row.view(-1, 1).expand(-1, x.size(1))) def gat_coo(x): return gather_coo(x, row) def gat_csr(x): return gather_csr(x, rowptr) t1, t2, t3, t4 = [], [], [], [] for size in sizes: try: x = torch.randn((dim_size, size), device=args.device) t1 += [time_func(select, x)] t2 += [time_func(gather, x)] t3 += [time_func(gat_coo, x)] t4 += [time_func(gat_csr, x)] del x except RuntimeError as e: if 'out of memory' not in str(e): raise RuntimeError(e) torch.cuda.empty_cache() for t in (t1, t2, t3, t4): t.append(float('inf')) ts = torch.tensor([t1, t2, t3, t4]) winner = torch.zeros_like(ts, dtype=torch.bool) winner[ts.argmin(dim=0), torch.arange(len(sizes))] = 1 winner = winner.tolist() name = f'{group}/{name}' print(f'{bold(name)} (avg row length: {avg_row_len:.2f}):') print('\t'.join([' '] + [f'{size:>5}' for size in sizes])) print('\t'.join([bold('SELECT ')] + [bold(f'{t:.5f}', f) for t, f in zip(t1, winner[0])])) print('\t'.join([bold('GAT ')] + [bold(f'{t:.5f}', f) for t, f in zip(t2, winner[1])])) print('\t'.join([bold('GAT_COO')] + [bold(f'{t:.5f}', f) for t, f in zip(t3, winner[2])])) print('\t'.join([bold('GAT_CSR')] + [bold(f'{t:.5f}', f) for t, f in zip(t4, winner[3])])) print() if __name__ == '__main__': parser = argparse.ArgumentParser() parser.add_argument('--with_backward', action='store_true') parser.add_argument('--device', type=str, default='cuda') args = parser.parse_args() iters = 1 if args.device == 'cpu' else 20 sizes = [1, 16, 32, 64, 128, 256, 512] sizes = sizes[:3] if args.device == 'cpu' else sizes for _ in range(10): # Warmup. torch.randn(100, 100, device=args.device).sum() for dataset in itertools.chain(short_rows, long_rows): download(dataset) correctness(dataset) timing(dataset) pytorch_scatter-2.1.2/benchmark/scatter_segment.py000066400000000000000000000174051450760104200224320ustar00rootroot00000000000000import time import os.path as osp import itertools import argparse import wget import torch from scipy.io import loadmat from torch_scatter import scatter, segment_coo, segment_csr short_rows = [ ('DIMACS10', 'citationCiteseer'), ('SNAP', 'web-Stanford'), ] long_rows = [ ('Janna', 'StocF-1465'), ('GHS_psdef', 'ldoor'), ] def download(dataset): url = 'https://sparse.tamu.edu/mat/{}/{}.mat' for group, name in itertools.chain(long_rows, short_rows): if not osp.exists(f'{name}.mat'): print(f'Downloading {group}/{name}:') wget.download(url.format(group, name)) print('') def bold(text, flag=True): return f'\033[1m{text}\033[0m' if flag else text @torch.no_grad() def correctness(dataset): group, name = dataset mat = loadmat(f'{name}.mat')['Problem'][0][0][2].tocsr() rowptr = torch.from_numpy(mat.indptr).to(args.device, torch.long) row = torch.from_numpy(mat.tocoo().row).to(args.device, torch.long) dim_size = rowptr.size(0) - 1 for size in sizes: try: x = torch.randn((row.size(0), size), device=args.device) x = x.squeeze(-1) if size == 1 else x out1 = scatter(x, row, dim=0, dim_size=dim_size, reduce='add') out2 = segment_coo(x, row, dim_size=dim_size, reduce='add') out3 = segment_csr(x, rowptr, reduce='add') assert torch.allclose(out1, out2, atol=1e-4) assert torch.allclose(out1, out3, atol=1e-4) out1 = scatter(x, row, dim=0, dim_size=dim_size, reduce='mean') out2 = segment_coo(x, row, dim_size=dim_size, reduce='mean') out3 = segment_csr(x, rowptr, reduce='mean') assert torch.allclose(out1, out2, atol=1e-4) assert torch.allclose(out1, out3, atol=1e-4) out1 = scatter(x, row, dim=0, dim_size=dim_size, reduce='min') out2 = segment_coo(x, row, reduce='min') out3 = segment_csr(x, rowptr, reduce='min') assert torch.allclose(out1, out2, atol=1e-4) assert torch.allclose(out1, out3, atol=1e-4) out1 = scatter(x, row, dim=0, dim_size=dim_size, reduce='max') out2 = segment_coo(x, row, reduce='max') out3 = segment_csr(x, rowptr, reduce='max') assert torch.allclose(out1, out2, atol=1e-4) assert torch.allclose(out1, out3, atol=1e-4) except RuntimeError as e: if 'out of memory' not in str(e): raise RuntimeError(e) torch.cuda.empty_cache() def time_func(func, x): try: if torch.cuda.is_available(): torch.cuda.synchronize() t = time.perf_counter() if not args.with_backward: with torch.no_grad(): for _ in range(iters): func(x) else: x = x.requires_grad_() for _ in range(iters): out = func(x) out = out[0] if isinstance(out, tuple) else out torch.autograd.grad(out, x, out, only_inputs=True) if torch.cuda.is_available(): torch.cuda.synchronize() return time.perf_counter() - t except RuntimeError as e: if 'out of memory' not in str(e): raise RuntimeError(e) torch.cuda.empty_cache() return float('inf') def timing(dataset): group, name = dataset mat = loadmat(f'{name}.mat')['Problem'][0][0][2].tocsr() rowptr = torch.from_numpy(mat.indptr).to(args.device, torch.long) row = torch.from_numpy(mat.tocoo().row).to(args.device, torch.long) row2 = row[torch.randperm(row.size(0))] dim_size = rowptr.size(0) - 1 avg_row_len = row.size(0) / dim_size def sca1_row(x): out = x.new_zeros(dim_size, *x.size()[1:]) row_tmp = row.view(-1, 1).expand_as(x) if x.dim() > 1 else row return out.scatter_add_(0, row_tmp, x) def sca1_col(x): out = x.new_zeros(dim_size, *x.size()[1:]) row2_tmp = row2.view(-1, 1).expand_as(x) if x.dim() > 1 else row2 return out.scatter_add_(0, row2_tmp, x) def sca2_row(x): return scatter(x, row, dim=0, dim_size=dim_size, reduce=args.reduce) def sca2_col(x): return scatter(x, row2, dim=0, dim_size=dim_size, reduce=args.reduce) def seg_coo(x): return segment_coo(x, row, reduce=args.reduce) def seg_csr(x): return segment_csr(x, rowptr, reduce=args.reduce) def dense1(x): return getattr(torch, args.reduce)(x, dim=-2) def dense2(x): return getattr(torch, args.reduce)(x, dim=-1) t1, t2, t3, t4, t5, t6, t7, t8 = [], [], [], [], [], [], [], [] for size in sizes: try: x = torch.randn((row.size(0), size), device=args.device) x = x.squeeze(-1) if size == 1 else x t1 += [time_func(sca1_row, x)] t2 += [time_func(sca1_col, x)] t3 += [time_func(sca2_row, x)] t4 += [time_func(sca2_col, x)] t5 += [time_func(seg_coo, x)] t6 += [time_func(seg_csr, x)] del x except RuntimeError as e: if 'out of memory' not in str(e): raise RuntimeError(e) torch.cuda.empty_cache() for t in (t1, t2, t3, t4, t5, t6): t.append(float('inf')) try: x = torch.randn((dim_size, int(avg_row_len + 1), size), device=args.device) t7 += [time_func(dense1, x)] x = x.view(dim_size, size, int(avg_row_len + 1)) t8 += [time_func(dense2, x)] del x except RuntimeError as e: if 'out of memory' not in str(e): raise RuntimeError(e) torch.cuda.empty_cache() for t in (t7, t8): t.append(float('inf')) ts = torch.tensor([t1, t2, t3, t4, t5, t6, t7, t8]) winner = torch.zeros_like(ts, dtype=torch.bool) winner[ts.argmin(dim=0), torch.arange(len(sizes))] = 1 winner = winner.tolist() name = f'{group}/{name}' print(f'{bold(name)} (avg row length: {avg_row_len:.2f}):') print('\t'.join([' '] + [f'{size:>5}' for size in sizes])) print('\t'.join([bold('SCA1_ROW')] + [bold(f'{t:.5f}', f) for t, f in zip(t1, winner[0])])) print('\t'.join([bold('SCA1_COL')] + [bold(f'{t:.5f}', f) for t, f in zip(t2, winner[1])])) print('\t'.join([bold('SCA2_ROW')] + [bold(f'{t:.5f}', f) for t, f in zip(t3, winner[2])])) print('\t'.join([bold('SCA2_COL')] + [bold(f'{t:.5f}', f) for t, f in zip(t4, winner[3])])) print('\t'.join([bold('SEG_COO ')] + [bold(f'{t:.5f}', f) for t, f in zip(t5, winner[4])])) print('\t'.join([bold('SEG_CSR ')] + [bold(f'{t:.5f}', f) for t, f in zip(t6, winner[5])])) print('\t'.join([bold('DENSE1 ')] + [bold(f'{t:.5f}', f) for t, f in zip(t7, winner[6])])) print('\t'.join([bold('DENSE2 ')] + [bold(f'{t:.5f}', f) for t, f in zip(t8, winner[7])])) print() if __name__ == '__main__': parser = argparse.ArgumentParser() parser.add_argument('--reduce', type=str, required=True, choices=['sum', 'mean', 'min', 'max']) parser.add_argument('--with_backward', action='store_true') parser.add_argument('--device', type=str, default='cuda') args = parser.parse_args() iters = 1 if args.device == 'cpu' else 20 sizes = [1, 16, 32, 64, 128, 256, 512] sizes = sizes[:3] if args.device == 'cpu' else sizes for _ in range(10): # Warmup. torch.randn(100, 100, device=args.device).sum() for dataset in itertools.chain(short_rows, long_rows): download(dataset) correctness(dataset) timing(dataset) pytorch_scatter-2.1.2/cmake/000077500000000000000000000000001450760104200160105ustar00rootroot00000000000000pytorch_scatter-2.1.2/cmake/TorchScatterConfig.cmake.in000066400000000000000000000020511450760104200231500ustar00rootroot00000000000000# TorchScatterConfig.cmake # -------------------- # # Exported targets:: Scatter # @PACKAGE_INIT@ set(PN TorchScatter) set(${PN}_INCLUDE_DIR "${PACKAGE_PREFIX_DIR}/@CMAKE_INSTALL_INCLUDEDIR@") set(${PN}_LIBRARY "") set(${PN}_DEFINITIONS USING_${PN}) check_required_components(${PN}) if(NOT (CMAKE_VERSION VERSION_LESS 3.0)) #----------------------------------------------------------------------------- # Don't include targets if this file is being picked up by another # project which has already built this as a subproject #----------------------------------------------------------------------------- if(NOT TARGET ${PN}::TorchScatter) include("${CMAKE_CURRENT_LIST_DIR}/${PN}Targets.cmake") if(NOT TARGET torch_library) find_package(Torch REQUIRED) endif() if(NOT TARGET Python3::Python) find_package(Python3 COMPONENTS Development) endif() target_link_libraries(TorchScatter::TorchScatter INTERFACE ${TORCH_LIBRARIES} Python3::Python) if(@WITH_CUDA@) target_compile_definitions(TorchScatter::TorchScatter INTERFACE WITH_CUDA) endif() endif() endif() pytorch_scatter-2.1.2/conda/000077500000000000000000000000001450760104200160145ustar00rootroot00000000000000pytorch_scatter-2.1.2/conda/pytorch-scatter/000077500000000000000000000000001450760104200211475ustar00rootroot00000000000000pytorch_scatter-2.1.2/conda/pytorch-scatter/README.md000066400000000000000000000001151450760104200224230ustar00rootroot00000000000000``` ./build_conda.sh 3.9 2.1.0 cu118 # python, pytorch and cuda version ``` pytorch_scatter-2.1.2/conda/pytorch-scatter/build_conda.sh000077500000000000000000000031671450760104200237600ustar00rootroot00000000000000#!/bin/bash export PYTHON_VERSION=$1 export TORCH_VERSION=$2 export CUDA_VERSION=$3 export CONDA_PYTORCH_CONSTRAINT="pytorch==${TORCH_VERSION%.*}.*" if [ "${CUDA_VERSION}" = "cpu" ]; then export CONDA_CUDATOOLKIT_CONSTRAINT="cpuonly # [not osx]" else case $CUDA_VERSION in cu121) export CONDA_CUDATOOLKIT_CONSTRAINT="pytorch-cuda==12.1.*" ;; cu118) export CONDA_CUDATOOLKIT_CONSTRAINT="pytorch-cuda==11.8.*" ;; cu117) export CONDA_CUDATOOLKIT_CONSTRAINT="pytorch-cuda==11.7.*" ;; cu116) if [ "${TORCH_VERSION}" = "1.12.0" ]; then export CONDA_CUDATOOLKIT_CONSTRAINT="cudatoolkit==11.6.*" else export CONDA_CUDATOOLKIT_CONSTRAINT="pytorch-cuda==11.6.*" fi ;; cu115) export CONDA_CUDATOOLKIT_CONSTRAINT="cudatoolkit==11.5.*" ;; cu113) export CONDA_CUDATOOLKIT_CONSTRAINT="cudatoolkit==11.3.*" ;; cu111) export CONDA_CUDATOOLKIT_CONSTRAINT="cudatoolkit==11.1.*" ;; cu102) export CONDA_CUDATOOLKIT_CONSTRAINT="cudatoolkit==10.2.*" ;; cu101) export CONDA_CUDATOOLKIT_CONSTRAINT="cudatoolkit==10.1.*" ;; *) echo "Unrecognized CUDA_VERSION=$CUDA_VERSION" exit 1 ;; esac fi echo "PyTorch $TORCH_VERSION+$CUDA_VERSION" echo "- $CONDA_PYTORCH_CONSTRAINT" echo "- $CONDA_CUDATOOLKIT_CONSTRAINT" if [ "${TORCH_VERSION}" = "1.12.0" ] && [ "${CUDA_VERSION}" = "cu116" ]; then conda build . -c pytorch -c default -c nvidia -c conda-forge --output-folder "$HOME/conda-bld" else conda build . -c pytorch -c default -c nvidia --output-folder "$HOME/conda-bld" fi pytorch_scatter-2.1.2/conda/pytorch-scatter/meta.yaml000066400000000000000000000015541450760104200227660ustar00rootroot00000000000000package: name: pytorch-scatter version: 2.1.2 source: path: ../.. requirements: build: - {{ compiler('c') }} # [win] host: - pip - python {{ environ.get('PYTHON_VERSION') }} - {{ environ.get('CONDA_PYTORCH_CONSTRAINT') }} - {{ environ.get('CONDA_CUDATOOLKIT_CONSTRAINT') }} run: - python {{ environ.get('PYTHON_VERSION') }} - {{ environ.get('CONDA_PYTORCH_CONSTRAINT') }} - {{ environ.get('CONDA_CUDATOOLKIT_CONSTRAINT') }} build: string: py{{ environ.get('PYTHON_VERSION').replace('.', '') }}_torch_{{ environ['TORCH_VERSION'] }}_{{ environ['CUDA_VERSION'] }} script: pip install . script_env: - FORCE_CUDA - TORCH_CUDA_ARCH_LIST test: imports: - torch_scatter about: home: https://github.com/rusty1s/pytorch_scatter license: MIT summary: PyTorch Extension Library of Optimized Scatter Operations pytorch_scatter-2.1.2/csrc/000077500000000000000000000000001450760104200156625ustar00rootroot00000000000000pytorch_scatter-2.1.2/csrc/cpu/000077500000000000000000000000001450760104200164515ustar00rootroot00000000000000pytorch_scatter-2.1.2/csrc/cpu/index_info.h000066400000000000000000000031671450760104200207530ustar00rootroot00000000000000#pragma once #include "../extensions.h" #define MAX_TENSORINFO_DIMS 25 template struct TensorInfo { TensorInfo(scalar_t *p, int dim, int sz[MAX_TENSORINFO_DIMS], int st[MAX_TENSORINFO_DIMS]) { data = p; dims = dim; AT_ASSERT(dims < MAX_TENSORINFO_DIMS); for (int i = 0; i < dim; ++i) { sizes[i] = sz[i]; strides[i] = st[i]; } } scalar_t *data; int dims; int sizes[MAX_TENSORINFO_DIMS]; int strides[MAX_TENSORINFO_DIMS]; }; template TensorInfo getTensorInfo(const torch::Tensor &tensor) { int sizes[MAX_TENSORINFO_DIMS]; int strides[MAX_TENSORINFO_DIMS]; int dims = tensor.dim(); for (int i = 0; i < dims; ++i) { sizes[i] = tensor.size(i); strides[i] = tensor.stride(i); } return TensorInfo(tensor.data_ptr(), dims, sizes, strides); } template struct IndexToOffset { static inline int get(int idx, const TensorInfo &info) { int offset = 0; for (int i = info.dims - 1; i >= 0; --i) { offset += (idx % info.sizes[i]) * info.strides[i]; idx /= info.sizes[i]; } return offset; } }; template struct IndexPtrToOffset { static inline int get(int idx, const TensorInfo &info) { int offset = idx % (info.sizes[info.dims - 1] - 1); offset *= info.strides[info.dims - 1]; idx /= info.sizes[info.dims - 1] - 1; for (int i = info.dims - 2; i >= 0; --i) { offset += (idx % info.sizes[i]) * info.strides[i]; idx /= info.sizes[i]; } return offset; } }; pytorch_scatter-2.1.2/csrc/cpu/reducer.h000066400000000000000000000074121450760104200202570ustar00rootroot00000000000000#pragma once #include #include enum ReductionType { SUM, MEAN, MUL, DIV, MIN, MAX }; const std::map reduce2REDUCE = { {"sum", SUM}, {"mean", MEAN}, {"mul", MUL}, {"div", DIV}, {"min", MIN}, {"max", MAX}, }; #define AT_DISPATCH_REDUCTION_TYPES(reduce, ...) \ [&] { \ switch (reduce2REDUCE.at(reduce)) { \ case SUM: { \ static constexpr ReductionType REDUCE = SUM; \ return __VA_ARGS__(); \ } \ case MEAN: { \ static constexpr ReductionType REDUCE = MEAN; \ return __VA_ARGS__(); \ } \ case MUL: { \ static constexpr ReductionType REDUCE = MUL; \ return __VA_ARGS__(); \ } \ case DIV: { \ static constexpr ReductionType REDUCE = DIV; \ return __VA_ARGS__(); \ } \ case MIN: { \ static constexpr ReductionType REDUCE = MIN; \ return __VA_ARGS__(); \ } \ case MAX: { \ static constexpr ReductionType REDUCE = MAX; \ return __VA_ARGS__(); \ } \ } \ }() template struct Reducer { static inline scalar_t init() { if (REDUCE == MUL || REDUCE == DIV) return (scalar_t)1; else if (REDUCE == MIN) return std::numeric_limits::max(); else if (REDUCE == MAX) return std::numeric_limits::lowest(); else return (scalar_t)0; } static inline void update(scalar_t *val, scalar_t new_val, int64_t *arg, int64_t new_arg) { if (REDUCE == SUM || REDUCE == MEAN) *val = *val + new_val; else if (REDUCE == MUL) *val = *val * new_val; else if (REDUCE == DIV) *val = *val / new_val; else if ((REDUCE == MIN && new_val < *val) || (REDUCE == MAX && new_val > *val)) { *val = new_val; *arg = new_arg; } } static inline void write(scalar_t *address, scalar_t val, int64_t *arg_address, int64_t arg, int count) { if (REDUCE == SUM || REDUCE == MUL || REDUCE == DIV) *address = val; else if (REDUCE == MEAN) *address = val / (scalar_t)(count > 0 ? count : 1); else if (REDUCE == MIN || REDUCE == MAX) { if (count > 0) { *address = val; *arg_address = arg; } else *address = (scalar_t)0; } } }; pytorch_scatter-2.1.2/csrc/cpu/scatter_cpu.cpp000066400000000000000000000053051450760104200214740ustar00rootroot00000000000000#include "scatter_cpu.h" #include "index_info.h" #include "reducer.h" #include "utils.h" std::tuple> scatter_cpu(torch::Tensor src, torch::Tensor index, int64_t dim, torch::optional optional_out, torch::optional dim_size, std::string reduce) { CHECK_CPU(src); CHECK_CPU(index); if (optional_out.has_value()) CHECK_CPU(optional_out.value()); CHECK_INPUT(src.dim() == index.dim()); for (auto i = 0; i < index.dim() - 1; i++) CHECK_INPUT(src.size(i) >= index.size(i)); src = src.contiguous(); torch::Tensor out; if (optional_out.has_value()) { out = optional_out.value().contiguous(); for (auto i = 0; i < out.dim(); i++) if (i != dim) CHECK_INPUT(src.size(i) == out.size(i)); } else { auto sizes = src.sizes().vec(); if (dim_size.has_value()) sizes[dim] = dim_size.value(); else if (index.numel() == 0) sizes[dim] = 0; else sizes[dim] = 1 + *index.max().data_ptr(); out = torch::empty(sizes, src.options()); } torch::optional arg_out = torch::nullopt; int64_t *arg_out_data = nullptr; if (reduce2REDUCE.at(reduce) == MIN || reduce2REDUCE.at(reduce) == MAX) { arg_out = torch::full_like(out, src.size(dim), index.options()); arg_out_data = arg_out.value().data_ptr(); } if (src.numel() == 0) { if (!optional_out.has_value()) out.fill_(0); return std::make_tuple(out, arg_out); } auto B = 1; for (auto i = 0; i < dim; i++) B *= src.size(i); auto E = src.size(dim); auto K = src.numel() / (B * E); auto N = out.size(dim); auto index_info = getTensorInfo(index); AT_DISPATCH_ALL_TYPES_AND2(at::ScalarType::Half, at::ScalarType::BFloat16, src.scalar_type(), "scatter_cpu", [&] { auto src_data = src.data_ptr(); auto out_data = out.data_ptr(); int64_t i, idx; AT_DISPATCH_REDUCTION_TYPES(reduce, [&] { if (!optional_out.has_value()) out.fill_(Reducer::init()); for (auto b = 0; b < B; b++) { for (auto e = 0; e < E; e++) { for (auto k = 0; k < K; k++) { i = b * E * K + e * K + k; idx = index_info.data[IndexToOffset::get(i, index_info)]; Reducer::update( out_data + b * N * K + idx * K + k, src_data[i], arg_out_data + b * N * K + idx * K + k, e); } } } if (!optional_out.has_value() && (REDUCE == MIN || REDUCE == MAX)) out.masked_fill_(out == Reducer::init(), (scalar_t)0); }); }); return std::make_tuple(out, arg_out); } pytorch_scatter-2.1.2/csrc/cpu/scatter_cpu.h000066400000000000000000000004421450760104200211360ustar00rootroot00000000000000#pragma once #include "../extensions.h" std::tuple> scatter_cpu(torch::Tensor src, torch::Tensor index, int64_t dim, torch::optional optional_out, torch::optional dim_size, std::string reduce); pytorch_scatter-2.1.2/csrc/cpu/segment_coo_cpu.cpp000066400000000000000000000154261450760104200223360ustar00rootroot00000000000000#include "segment_coo_cpu.h" #include "index_info.h" #include "reducer.h" #include "utils.h" #include std::tuple> segment_coo_cpu(torch::Tensor src, torch::Tensor index, torch::optional optional_out, torch::optional dim_size, std::string reduce) { CHECK_CPU(src); CHECK_CPU(index); if (optional_out.has_value()) CHECK_CPU(optional_out.value()); CHECK_INPUT(src.dim() >= index.dim()); auto sizes = index.sizes().vec(); for (auto i = 0; i < index.dim(); i++) sizes[i] = src.size(i); index = index.expand(sizes); auto dim = index.dim() - 1; src = src.contiguous(); torch::Tensor out; if (optional_out.has_value()) { out = optional_out.value().contiguous(); for (auto i = 0; i < out.dim(); i++) if (i != dim) CHECK_INPUT(src.size(i) == out.size(i)); } else { sizes = src.sizes().vec(); if (dim_size.has_value()) sizes[dim] = dim_size.value(); else if (index.numel() == 0) sizes[dim] = 0; else { auto tmp = index.select(dim, index.size(dim) - 1); tmp = tmp.numel() > 1 ? tmp.max() : tmp; sizes[dim] = 1 + *tmp.data_ptr(); } out = torch::empty(sizes, src.options()); } torch::optional arg_out = torch::nullopt; int64_t *arg_out_data = nullptr; if (reduce2REDUCE.at(reduce) == MIN || reduce2REDUCE.at(reduce) == MAX) { arg_out = torch::full_like(out, src.size(dim), index.options()); arg_out_data = arg_out.value().data_ptr(); } else if (reduce2REDUCE.at(reduce) == MEAN) { auto sizes = index.sizes().vec(); sizes[dim] = out.size(dim); arg_out = torch::zeros(sizes, out.options()); } if (src.numel() == 0) { if (!optional_out.has_value()) out.fill_(0); return std::make_tuple(out, arg_out); } auto B = index.numel() / src.size(dim); auto E = src.size(dim); auto K = src.numel() / index.numel(); auto N = out.size(dim); auto index_info = getTensorInfo(index); auto stride = index_info.strides[index_info.dims - 1]; std::vector args(K); AT_DISPATCH_ALL_TYPES_AND2(at::ScalarType::Half, at::ScalarType::BFloat16, src.scalar_type(), "segment_coo_cpu", [&] { using opmath_t = at::opmath_type; auto src_data = src.data_ptr(); auto out_data = out.data_ptr(); scalar_t *count_data = nullptr; std::vector vals(K); int64_t idx, next_idx, row_start; AT_DISPATCH_REDUCTION_TYPES(reduce, [&] { if (!optional_out.has_value()) out.fill_(Reducer::init()); if (REDUCE == MEAN) count_data = arg_out.value().data_ptr(); for (auto b = 0; b < B; b++) { auto offset = IndexToOffset::get(b * E, index_info); idx = index_info.data[offset]; for (auto k = 0; k < K; k++) vals[k] = static_cast(out_data[b * N * K + k]); row_start = 0; for (auto e = 0; e < E; e++) { for (auto k = 0; k < K; k++) Reducer::update( &vals[k], static_cast(src_data[b * E * K + e * K + k]), &args[k], e); if (e == E - 1) { for (auto k = 0; k < K; k++) Reducer::write( out_data + b * N * K + idx * K + k, static_cast(vals[k]), arg_out_data + b * N * K + idx * K + k, args[k], e + 1 - row_start); if (REDUCE == MEAN) count_data[b * N + idx] = (scalar_t)(e + 1 - row_start); } else { next_idx = index_info.data[offset + (e + 1) * stride]; assert(idx <= next_idx); if (idx != next_idx) { for (auto k = 0; k < K; k++) { Reducer::write( out_data + b * N * K + idx * K + k, static_cast(vals[k]), arg_out_data + b * N * K + idx * K + k, args[k], e + 1 - row_start); vals[k] = static_cast(out_data[b * N * K + next_idx * K + k]); } if (REDUCE == MEAN) count_data[b * N + idx] = (scalar_t)(e + 1 - row_start); row_start = e + 1; } idx = next_idx; } } } if (!optional_out.has_value() && (REDUCE == MIN || REDUCE == MAX)) out.masked_fill_(out == Reducer::init(), (scalar_t)0); if (REDUCE == MEAN) arg_out.value().masked_fill_(arg_out.value() < (scalar_t)1, (scalar_t)1); }); }); return std::make_tuple(out, arg_out); } torch::Tensor gather_coo_cpu(torch::Tensor src, torch::Tensor index, torch::optional optional_out) { CHECK_CPU(src); CHECK_CPU(index); if (optional_out.has_value()) CHECK_CPU(optional_out.value()); CHECK_INPUT(src.dim() >= index.dim()); for (auto i = 0; i < index.dim() - 1; i++) CHECK_INPUT(src.size(i) == index.size(i)); auto dim = index.dim() - 1; src = src.contiguous(); torch::Tensor out; if (optional_out.has_value()) { out = optional_out.value().contiguous(); for (auto i = 0; i < src.dim(); i++) if (i != dim) CHECK_INPUT(src.size(i) == out.size(i)); } else { auto sizes = src.sizes().vec(); sizes[dim] = index.size(dim); out = torch::empty(sizes, src.options()); } if (src.numel() == 0) { if (!optional_out.has_value()) out.fill_(0); return out; } auto B = index.numel() / out.size(dim); auto E = index.size(dim); auto K = out.numel() / index.numel(); auto N = src.size(dim); auto index_info = getTensorInfo(index); auto stride = index_info.strides[index_info.dims - 1]; AT_DISPATCH_ALL_TYPES_AND2(at::ScalarType::Half, at::ScalarType::BFloat16, src.scalar_type(), "gather_coo_cpu", [&] { auto src_data = src.data_ptr(); auto out_data = out.data_ptr(); std::vector vals(K); int64_t idx, next_idx; for (auto b = 0; b < B; b++) { auto offset = IndexToOffset::get(b * E, index_info); idx = index_info.data[offset]; for (auto k = 0; k < K; k++) vals[k] = src_data[b * N * K + idx * K + k]; for (auto e = 0; e < E; e++) { for (auto k = 0; k < K; k++) out_data[b * E * K + e * K + k] = vals[k]; if (e < E - 1) { next_idx = index_info.data[offset + (e + 1) * stride]; CHECK_INPUT(idx <= next_idx); if (idx != next_idx) { idx = next_idx; for (auto k = 0; k < K; k++) vals[k] = src_data[b * N * K + idx * K + k]; } } } } }); return out; } pytorch_scatter-2.1.2/csrc/cpu/segment_coo_cpu.h000066400000000000000000000006621450760104200217770ustar00rootroot00000000000000#pragma once #include "../extensions.h" std::tuple> segment_coo_cpu(torch::Tensor src, torch::Tensor index, torch::optional optional_out, torch::optional dim_size, std::string reduce); torch::Tensor gather_coo_cpu(torch::Tensor src, torch::Tensor index, torch::optional optional_out); pytorch_scatter-2.1.2/csrc/cpu/segment_csr_cpu.cpp000066400000000000000000000121661450760104200223430ustar00rootroot00000000000000#include "segment_csr_cpu.h" #include "index_info.h" #include "reducer.h" #include "utils.h" #include std::tuple> segment_csr_cpu(torch::Tensor src, torch::Tensor indptr, torch::optional optional_out, std::string reduce) { CHECK_CPU(src); CHECK_CPU(indptr); if (optional_out.has_value()) CHECK_CPU(optional_out.value()); CHECK_INPUT(src.dim() >= indptr.dim()); auto sizes = indptr.sizes().vec(); for (auto i = 0; i < indptr.dim() - 1; i++) sizes[i] = src.size(i); indptr = indptr.expand(sizes); auto dim = indptr.dim() - 1; src = src.contiguous(); torch::Tensor out; if (optional_out.has_value()) { out = optional_out.value().contiguous(); for (auto i = 0; i < out.dim(); i++) if (i != dim) CHECK_INPUT(src.size(i) == out.size(i)); CHECK_INPUT(src.numel() == 0 || out.size(dim) == indptr.size(dim) - 1); } else { sizes = src.sizes().vec(); sizes[dim] = std::max(indptr.size(dim) - 1, 0); out = torch::empty(sizes, src.options()); } torch::optional arg_out = torch::nullopt; int64_t *arg_out_data = nullptr; if (reduce2REDUCE.at(reduce) == MIN || reduce2REDUCE.at(reduce) == MAX) { arg_out = torch::full(out.sizes(), src.size(dim), indptr.options()); arg_out_data = arg_out.value().data_ptr(); } if (src.numel() == 0) { if (!optional_out.has_value()) out.fill_(0); return std::make_tuple(out, arg_out); } auto N = out.size(dim) * (indptr.numel() / indptr.size(-1)); auto K = out.numel() / N; auto E = src.size(dim); auto indptr_info = getTensorInfo(indptr); auto stride = indptr_info.strides[indptr_info.dims - 1]; std::vector args(K); AT_DISPATCH_ALL_TYPES_AND2(at::ScalarType::Half, at::ScalarType::BFloat16, src.scalar_type(), "segment_csr_cpu", [&] { using opmath_t = at::opmath_type; auto src_data = src.data_ptr(); auto out_data = out.data_ptr(); std::vector vals(K); int64_t row_start, row_end; AT_DISPATCH_REDUCTION_TYPES(reduce, [&] { for (auto n = 0; n < N; n++) { auto offset = IndexPtrToOffset::get(n, indptr_info); row_start = indptr_info.data[offset]; row_end = indptr_info.data[offset + stride]; offset = (n / (indptr.size(-1) - 1)) * E * K; for (auto k = 0; k < K; k++) vals[k] = Reducer::init(); for (auto e = row_start; e < row_end; e++) for (auto k = 0; k < K; k++) Reducer::update( &vals[k], static_cast(src_data[offset + e * K + k]), &args[k], e); for (auto k = 0; k < K; k++) Reducer::write(out_data + n * K + k, static_cast(vals[k]), arg_out_data + n * K + k, args[k], row_end - row_start); } }); }); return std::make_tuple(out, arg_out); } torch::Tensor gather_csr_cpu(torch::Tensor src, torch::Tensor indptr, torch::optional optional_out) { CHECK_CPU(src); CHECK_CPU(indptr); if (optional_out.has_value()) CHECK_CPU(optional_out.value()); CHECK_INPUT(src.dim() >= indptr.dim()); auto sizes = indptr.sizes().vec(); for (auto i = 0; i < indptr.dim() - 1; i++) sizes[i] = src.size(i); indptr = indptr.expand(sizes); auto dim = indptr.dim() - 1; CHECK_INPUT(src.size(dim) == 0 || src.size(dim) == indptr.size(dim) - 1); src = src.contiguous(); torch::Tensor out; if (optional_out.has_value()) { out = optional_out.value().contiguous(); for (auto i = 0; i < out.dim(); i++) if (i != dim) CHECK_INPUT(src.size(i) == out.size(i)); } else { auto sizes = src.sizes().vec(); if (src.numel() > 0) sizes[dim] = *indptr.flatten()[-1].data_ptr(); else sizes[dim] = 0; out = torch::empty(sizes, src.options()); } if (src.numel() == 0) { if (!optional_out.has_value()) out.fill_(0); return out; } auto N = src.size(dim) * (indptr.numel() / indptr.size(-1)); auto K = src.numel() / N; auto E = out.size(dim); auto indptr_info = getTensorInfo(indptr); auto stride = indptr_info.strides[indptr_info.dims - 1]; AT_DISPATCH_ALL_TYPES_AND2(at::ScalarType::Half, at::ScalarType::BFloat16, src.scalar_type(), "gather_csr_cpu", [&] { auto src_data = src.data_ptr(); auto out_data = out.data_ptr(); std::vector vals(K); int64_t row_start, row_end; for (auto n = 0; n < N; n++) { auto offset = IndexPtrToOffset::get(n, indptr_info); row_start = indptr_info.data[offset]; row_end = indptr_info.data[offset + stride]; for (auto k = 0; k < K; k++) vals[k] = src_data[n * K + k]; offset = (n / (indptr.size(-1) - 1)) * E * K; for (auto e = row_start; e < row_end; e++) for (auto k = 0; k < K; k++) out_data[offset + e * K + k] = vals[k]; } }); return out; } pytorch_scatter-2.1.2/csrc/cpu/segment_csr_cpu.h000066400000000000000000000006211450760104200220010ustar00rootroot00000000000000#pragma once #include "../extensions.h" std::tuple> segment_csr_cpu(torch::Tensor src, torch::Tensor indptr, torch::optional optional_out, std::string reduce); torch::Tensor gather_csr_cpu(torch::Tensor src, torch::Tensor indptr, torch::optional optional_out); pytorch_scatter-2.1.2/csrc/cpu/utils.h000066400000000000000000000002601450760104200177600ustar00rootroot00000000000000#pragma once #include "../extensions.h" #define CHECK_CPU(x) AT_ASSERTM(x.device().is_cpu(), #x " must be CPU tensor") #define CHECK_INPUT(x) AT_ASSERTM(x, "Input mismatch") pytorch_scatter-2.1.2/csrc/cuda/000077500000000000000000000000001450760104200165765ustar00rootroot00000000000000pytorch_scatter-2.1.2/csrc/cuda/atomics.cuh000066400000000000000000000424211450760104200207410ustar00rootroot00000000000000#pragma once #define ATOMIC(NAME) \ template struct Atomic##NAME##IntegerImpl; \ \ template struct Atomic##NAME##IntegerImpl { \ inline __device__ void operator()(scalar *address, scalar val) { \ uint32_t *address_as_ui = (uint32_t *)(address - ((size_t)address & 3)); \ uint32_t old = *address_as_ui; \ uint32_t shift = ((size_t)address & 3) * 8; \ uint32_t sum; \ uint32_t assumed; \ \ do { \ assumed = old; \ sum = OP(val, scalar((old >> shift) & 0xff)); \ old = (old & ~(0x000000ff << shift)) | (sum << shift); \ old = atomicCAS(address_as_ui, assumed, old); \ } while (assumed != old); \ } \ }; \ \ template struct Atomic##NAME##IntegerImpl { \ inline __device__ void operator()(scalar *address, scalar val) { \ uint32_t *address_as_ui = \ (uint32_t *)((char *)address - ((size_t)address & 2)); \ uint32_t old = *address_as_ui; \ uint32_t sum; \ uint32_t newval; \ uint32_t assumed; \ \ do { \ assumed = old; \ sum = OP(val, (size_t)address & 2 ? scalar(old >> 16) \ : scalar(old & 0xffff)); \ newval = (size_t)address & 2 ? (old & 0xffff) | (sum << 16) \ : (old & 0xffff0000) | sum; \ old = atomicCAS(address_as_ui, assumed, newval); \ } while (assumed != old); \ } \ }; \ \ template struct Atomic##NAME##IntegerImpl { \ inline __device__ void operator()(scalar *address, scalar val) { \ uint32_t *address_as_ui = (uint32_t *)address; \ uint32_t old = *address_as_ui; \ uint32_t assumed; \ \ do { \ assumed = old; \ old = atomicCAS(address_as_ui, assumed, OP(val, (scalar)old)); \ } while (assumed != old); \ } \ }; \ \ template struct Atomic##NAME##IntegerImpl { \ inline __device__ void operator()(scalar *address, scalar val) { \ unsigned long long *address_as_ull = (unsigned long long *)address; \ unsigned long long old = *address_as_ull; \ unsigned long long assumed; \ \ do { \ assumed = old; \ old = atomicCAS(address_as_ull, assumed, OP(val, (scalar)old)); \ } while (assumed != old); \ } \ }; \ \ template struct Atomic##NAME##DecimalImpl; \ \ template <> struct Atomic##NAME##DecimalImpl { \ inline __device__ void operator()(at::Half *address, at::Half val) { \ unsigned int *address_as_ui = \ (unsigned int *)((char *)address - ((size_t)address & 2)); \ unsigned int old = *address_as_ui; \ unsigned int assumed; \ \ do { \ assumed = old; \ at::Half hsum; \ hsum.x = (size_t)address & 2 ? (old >> 16) : (old & 0xffff); \ hsum = OP(hsum, val); \ old = (size_t)address & 2 ? (old & 0xffff) | (hsum.x << 16) \ : (old & 0xffff0000) | hsum.x; \ old = atomicCAS(address_as_ui, assumed, old); \ } while (assumed != old); \ } \ }; \ \ template <> struct Atomic##NAME##DecimalImpl { \ inline __device__ void operator()(at::BFloat16 *address, at::BFloat16 val){\ unsigned int *address_as_ui = \ (unsigned int *)((char *)address - ((size_t)address & 2)); \ unsigned int old = *address_as_ui; \ unsigned int assumed; \ \ do { \ assumed = old; \ at::BFloat16 hsum; \ hsum.x = (size_t)address & 2 ? (old >> 16) : (old & 0xffff); \ hsum = OP(hsum, val); \ old = (size_t)address & 2 ? (old & 0xffff) | (hsum.x << 16) \ : (old & 0xffff0000) | hsum.x; \ old = atomicCAS(address_as_ui, assumed, old); \ } while (assumed != old); \ } \ }; \ \ template struct Atomic##NAME##DecimalImpl { \ inline __device__ void operator()(scalar *address, scalar val) { \ int *address_as_i = (int *)address; \ int old = *address_as_i; \ int assumed; \ \ do { \ assumed = old; \ old = atomicCAS(address_as_i, assumed, \ __float_as_int(OP(val, __int_as_float(assumed)))); \ } while (assumed != old); \ } \ }; \ \ template struct Atomic##NAME##DecimalImpl { \ inline __device__ void operator()(scalar *address, scalar val) { \ unsigned long long int *address_as_ull = \ (unsigned long long int *)address; \ unsigned long long int old = *address_as_ull; \ unsigned long long int assumed; \ \ do { \ assumed = old; \ old = atomicCAS( \ address_as_ull, assumed, \ __double_as_longlong(OP(val, __longlong_as_double(assumed)))); \ } while (assumed != old); \ } \ }; #define OP(X, Y) Y + X ATOMIC(Add) #undef OP static inline __device__ void atomAdd(uint8_t *address, uint8_t val) { AtomicAddIntegerImpl()(address, val); } static inline __device__ void atomAdd(int8_t *address, int8_t val) { AtomicAddIntegerImpl()(address, val); } static inline __device__ void atomAdd(int16_t *address, int16_t val) { AtomicAddIntegerImpl()(address, val); } static inline __device__ void atomAdd(int32_t *address, int32_t val) { atomicAdd(address, val); } static inline __device__ void atomAdd(int64_t *address, int64_t val) { AtomicAddIntegerImpl()(address, val); } #if defined(USE_ROCM) || (defined(__CUDA_ARCH__) && (__CUDA_ARCH__ < 700 || CUDA_VERSION < 10000)) static inline __device__ void atomAdd(at::Half *address, at::Half val) { AtomicAddDecimalImpl()(address, val); } #else static inline __device__ void atomAdd(at::Half *address, at::Half val) { atomicAdd(reinterpret_cast<__half *>(address), val); } #endif static inline __device__ void atomAdd(float *address, float val) { atomicAdd(address, val); } #if defined(__CUDA_ARCH__) && (__CUDA_ARCH__ < 600 || CUDA_VERSION < 8000) static inline __device__ void atomAdd(double *address, double val) { AtomicAddDecimalImpl()(address, val); } #else static inline __device__ void atomAdd(double *address, double val) { atomicAdd(address, val); } #endif static inline __device__ void atomAdd(at::BFloat16 *address, at::BFloat16 val) { AtomicAddDecimalImpl()(address, val); } #define OP(X, Y) Y *X ATOMIC(Mul) #undef OP static inline __device__ void atomMul(uint8_t *address, uint8_t val) { AtomicMulIntegerImpl()(address, val); } static inline __device__ void atomMul(int8_t *address, int8_t val) { AtomicMulIntegerImpl()(address, val); } static inline __device__ void atomMul(int16_t *address, int16_t val) { AtomicMulIntegerImpl()(address, val); } static inline __device__ void atomMul(int32_t *address, int32_t val) { AtomicMulIntegerImpl()(address, val); } static inline __device__ void atomMul(int64_t *address, int64_t val) { AtomicMulIntegerImpl()(address, val); } static inline __device__ void atomMul(float *address, float val) { AtomicMulDecimalImpl()(address, val); } static inline __device__ void atomMul(at::Half *address, at::Half val) { AtomicMulDecimalImpl()(address, val); } static inline __device__ void atomMul(double *address, double val) { AtomicMulDecimalImpl()(address, val); } static inline __device__ void atomMul(at::BFloat16 *address, at::BFloat16 val) { AtomicMulDecimalImpl()(address, val); } #define OP(X, Y) Y / X ATOMIC(Div) #undef OP static inline __device__ void atomDiv(uint8_t *address, uint8_t val) { AtomicDivIntegerImpl()(address, val); } static inline __device__ void atomDiv(int8_t *address, int8_t val) { AtomicDivIntegerImpl()(address, val); } static inline __device__ void atomDiv(int16_t *address, int16_t val) { AtomicDivIntegerImpl()(address, val); } static inline __device__ void atomDiv(int32_t *address, int32_t val) { AtomicDivIntegerImpl()(address, val); } static inline __device__ void atomDiv(int64_t *address, int64_t val) { AtomicDivIntegerImpl()(address, val); } static inline __device__ void atomDiv(at::Half *address, at::Half val) { AtomicDivDecimalImpl()(address, val); } static inline __device__ void atomDiv(float *address, float val) { AtomicDivDecimalImpl()(address, val); } static inline __device__ void atomDiv(double *address, double val) { AtomicDivDecimalImpl()(address, val); } static inline __device__ void atomDiv(at::BFloat16 *address, at::BFloat16 val) { AtomicDivDecimalImpl()(address, val); } #define OP(X, Y) max(Y, X) ATOMIC(Max) #undef OP static inline __device__ void atomMax(uint8_t *address, uint8_t val) { AtomicMaxIntegerImpl()(address, val); } static inline __device__ void atomMax(int8_t *address, int8_t val) { AtomicMaxIntegerImpl()(address, val); } static inline __device__ void atomMax(int16_t *address, int16_t val) { AtomicMaxIntegerImpl()(address, val); } static inline __device__ void atomMax(int32_t *address, int32_t val) { atomicMax(address, val); } static inline __device__ void atomMax(int64_t *address, int64_t val) { AtomicMaxIntegerImpl()(address, val); } static inline __device__ void atomMax(at::Half *address, at::Half val) { AtomicMaxDecimalImpl()(address, val); } static inline __device__ void atomMax(float *address, float val) { AtomicMaxDecimalImpl()(address, val); } static inline __device__ void atomMax(double *address, double val) { AtomicMaxDecimalImpl()(address, val); } static inline __device__ void atomMax(at::BFloat16 *address, at::BFloat16 val) { AtomicMaxDecimalImpl()(address, val); } #define OP(X, Y) min(Y, X) ATOMIC(Min) #undef OP static inline __device__ void atomMin(uint8_t *address, uint8_t val) { AtomicMinIntegerImpl()(address, val); } static inline __device__ void atomMin(int8_t *address, int8_t val) { AtomicMinIntegerImpl()(address, val); } static inline __device__ void atomMin(int16_t *address, int16_t val) { AtomicMinIntegerImpl()(address, val); } static inline __device__ void atomMin(int32_t *address, int32_t val) { atomicMin(address, val); } static inline __device__ void atomMin(int64_t *address, int64_t val) { AtomicMinIntegerImpl()(address, val); } static inline __device__ void atomMin(at::Half *address, at::Half val) { AtomicMinDecimalImpl()(address, val); } static inline __device__ void atomMin(float *address, float val) { AtomicMinDecimalImpl()(address, val); } static inline __device__ void atomMin(double *address, double val) { AtomicMinDecimalImpl()(address, val); } static inline __device__ void atomMin(at::BFloat16 *address, at::BFloat16 val) { AtomicMinDecimalImpl()(address, val); } pytorch_scatter-2.1.2/csrc/cuda/index_info.cuh000066400000000000000000000012141450760104200214170ustar00rootroot00000000000000#pragma once #include // We need our own `IndexToOffset` implementation since we do not want to // access the last element of the `indexptr`. template struct IndexPtrToOffset { static inline __host__ __device__ int get(int idx, const at::cuda::detail::TensorInfo &info) { int offset = idx % (info.sizes[info.dims - 1] - 1); offset *= info.strides[info.dims - 1]; idx /= info.sizes[info.dims - 1] - 1; for (int i = info.dims - 2; i >= 0; --i) { offset += (idx % info.sizes[i]) * info.strides[i]; idx /= info.sizes[i]; } return offset; } }; pytorch_scatter-2.1.2/csrc/cuda/reducer.cuh000066400000000000000000000114661450760104200207400ustar00rootroot00000000000000#pragma once #include #include #include "atomics.cuh" enum ReductionType { SUM, MEAN, MUL, DIV, MIN, MAX }; const std::map reduce2REDUCE = { {"sum", SUM}, {"mean", MEAN}, {"mul", MUL}, {"div", DIV}, {"min", MIN}, {"max", MAX}, }; #define AT_DISPATCH_REDUCTION_TYPES(reduce, ...) \ [&] { \ switch (reduce2REDUCE.at(reduce)) { \ case SUM: { \ static constexpr ReductionType REDUCE = SUM; \ return __VA_ARGS__(); \ } \ case MEAN: { \ static constexpr ReductionType REDUCE = MEAN; \ return __VA_ARGS__(); \ } \ case MUL: { \ static constexpr ReductionType REDUCE = MUL; \ return __VA_ARGS__(); \ } \ case DIV: { \ static constexpr ReductionType REDUCE = DIV; \ return __VA_ARGS__(); \ } \ case MIN: { \ static constexpr ReductionType REDUCE = MIN; \ return __VA_ARGS__(); \ } \ case MAX: { \ static constexpr ReductionType REDUCE = MAX; \ return __VA_ARGS__(); \ } \ } \ }() template struct Reducer { static inline __host__ __device__ scalar_t init() { if (REDUCE == MUL || REDUCE == DIV) return (scalar_t)1; else if (REDUCE == MIN) return std::numeric_limits::max(); else if (REDUCE == MAX) return std::numeric_limits::lowest(); else return (scalar_t)0; } static inline __host__ __device__ void update(scalar_t *val, scalar_t new_val) { if (REDUCE == SUM || REDUCE == MEAN) *val = *val + new_val; else if (REDUCE == MUL) *val = *val * new_val; else if (REDUCE == DIV) *val = *val / new_val; else if ((REDUCE == MIN && new_val < *val) || (REDUCE == MAX && new_val > *val)) { *val = new_val; } } static inline __host__ __device__ void update(scalar_t *val, scalar_t new_val, int64_t *arg, int64_t new_arg) { if (REDUCE == SUM || REDUCE == MEAN) *val = *val + new_val; else if (REDUCE == MUL) *val = *val * new_val; else if (REDUCE == DIV) *val = *val / new_val; else if ((REDUCE == MIN && new_val < *val) || (REDUCE == MAX && new_val > *val)) { *val = new_val; *arg = new_arg; } } static inline __host__ __device__ void write(scalar_t *address, scalar_t val, int64_t *arg_address, int64_t arg, int count) { if (REDUCE == SUM || REDUCE == MUL || REDUCE == DIV) *address = val; else if (REDUCE == MEAN) *address = val / (scalar_t)(count > 0 ? count : 1); else if (REDUCE == MIN || REDUCE == MAX) { if (count > 0) { *address = val; *arg_address = arg; } else *address = (scalar_t)0; } } static inline __device__ void atomic_write(scalar_t *address, scalar_t val) { if (REDUCE == SUM || REDUCE == MEAN) atomAdd(address, val); else if (REDUCE == MUL) atomMul(address, val); else if (REDUCE == DIV) atomDiv(address, val); else if (REDUCE == MIN) atomMin(address, val); else if (REDUCE == MAX) atomMax(address, val); } }; pytorch_scatter-2.1.2/csrc/cuda/scatter_cuda.cu000066400000000000000000000105421450760104200215720ustar00rootroot00000000000000#include "scatter_cuda.h" #include #include #include #include "reducer.cuh" #include "utils.cuh" #define THREADS 256 #define BLOCKS(N) (N + THREADS - 1) / THREADS template __global__ void scatter_kernel(const scalar_t *src_data, const at::cuda::detail::TensorInfo index_info, scalar_t *out_data, int E, int K, int N, int numel) { int thread_idx = blockIdx.x * blockDim.x + threadIdx.x; int b = thread_idx / (E * K); int k = thread_idx % K; if (thread_idx < numel) { int offset = at::cuda::detail::IndexToOffset::get( thread_idx, index_info); int64_t idx = index_info.data[offset]; Reducer::atomic_write(out_data + b * N * K + idx * K + k, src_data[thread_idx]); } } template __global__ void scatter_arg_kernel(const scalar_t *src_data, const at::cuda::detail::TensorInfo index_info, const scalar_t *out_data, int64_t *arg_out_data, int E, int K, int N, int numel) { int thread_idx = blockIdx.x * blockDim.x + threadIdx.x; int b = thread_idx / (E * K); int e = (thread_idx / K) % E; int k = thread_idx % K; if (thread_idx < numel) { int offset = at::cuda::detail::IndexToOffset::get( thread_idx, index_info); int64_t idx = index_info.data[offset]; if (src_data[thread_idx] == out_data[b * N * K + idx * K + k]) { arg_out_data[b * N * K + idx * K + k] = e; } } } std::tuple> scatter_cuda(torch::Tensor src, torch::Tensor index, int64_t dim, torch::optional optional_out, torch::optional dim_size, std::string reduce) { CHECK_CUDA(src); CHECK_CUDA(index); if (optional_out.has_value()) CHECK_CUDA(optional_out.value()); cudaSetDevice(src.get_device()); CHECK_INPUT(src.dim() == index.dim()); for (auto i = 0; i < index.dim() - 1; i++) CHECK_INPUT(src.size(i) >= index.size(i)); src = src.contiguous(); torch::Tensor out; if (optional_out.has_value()) { out = optional_out.value().contiguous(); for (auto i = 0; i < out.dim(); i++) if (i != dim) CHECK_INPUT(src.size(i) == out.size(i)); } else { auto sizes = src.sizes().vec(); if (dim_size.has_value()) sizes[dim] = dim_size.value(); else if (index.numel() == 0) sizes[dim] = 0; else { sizes[dim] = 1 + index.max().cpu().data_ptr()[0]; } out = torch::empty(sizes, src.options()); } torch::optional arg_out = torch::nullopt; int64_t *arg_out_data = nullptr; if (reduce2REDUCE.at(reduce) == MIN || reduce2REDUCE.at(reduce) == MAX) { arg_out = torch::full_like(out, src.size(dim), index.options()); arg_out_data = arg_out.value().data_ptr(); } if (src.numel() == 0) { if (!optional_out.has_value()) out.fill_(0); return std::make_tuple(out, arg_out); } auto B = 1; for (auto i = 0; i < dim; i++) B *= src.size(i); auto E = src.size(dim); auto K = src.numel() / (B * E); auto N = out.size(dim); auto index_info = at::cuda::detail::getTensorInfo(index); auto stream = at::cuda::getCurrentCUDAStream(); AT_DISPATCH_ALL_TYPES_AND2(at::ScalarType::Half, at::ScalarType::BFloat16, src.scalar_type(), "_", [&] { auto src_data = src.data_ptr(); auto out_data = out.data_ptr(); AT_DISPATCH_REDUCTION_TYPES(reduce, [&] { if (!optional_out.has_value()) out.fill_(Reducer::init()); scatter_kernel <<>>( src_data, index_info, out_data, E, K, N, src.numel()); if (!optional_out.has_value() && (REDUCE == MIN || REDUCE == MAX)) out.masked_fill_(out == Reducer::init(), (scalar_t)0); if (REDUCE == MIN || REDUCE == MAX) scatter_arg_kernel <<>>( src_data, index_info, out_data, arg_out_data, E, K, N, src.numel()); }); }); return std::make_tuple(out, arg_out); } pytorch_scatter-2.1.2/csrc/cuda/scatter_cuda.h000066400000000000000000000004451450760104200214130ustar00rootroot00000000000000#pragma once #include "../extensions.h" std::tuple> scatter_cuda(torch::Tensor src, torch::Tensor index, int64_t dim, torch::optional optional_out, torch::optional dim_size, std::string reduce); pytorch_scatter-2.1.2/csrc/cuda/segment_coo_cuda.cu000066400000000000000000000315261450760104200224340ustar00rootroot00000000000000#include "segment_coo_cuda.h" #include #include #include #include "reducer.cuh" #include "utils.cuh" #define THREADS 256 #define BLOCKS(TB, N) (TB * N + THREADS - 1) / THREADS #define FULL_MASK 0xffffffff template __global__ void segment_coo_kernel(const scalar_t *src_data, const at::cuda::detail::TensorInfo index_info, scalar_t *out_data, size_t E, size_t N) { // Each thread processes exactly one entry. Within a warp, we perform a // parallel reduction across equal indices, and write the intermediate // result via atomics. int row_idx = blockIdx.x * blockDim.x + threadIdx.x; int lane_idx = row_idx & (32 - 1); int D = index_info.sizes[index_info.dims - 1]; if (row_idx < E) { int offset = at::cuda::detail::IndexToOffset::get( row_idx, index_info); int64_t idx = index_info.data[offset], next_idx; int out_idx = (row_idx / D) * N + idx; scalar_t val = HAS_VAL ? src_data[row_idx] : (scalar_t)1, tmp; #pragma unroll for (int i = 1; i < 32; i *= 2) { // Parallel reduction inside a single warp. tmp = SHFL_UP_SYNC(FULL_MASK, val, i); next_idx = SHFL_UP_SYNC(FULL_MASK, idx, i); if (lane_idx >= i && row_idx / D == (row_idx - i) / D) { assert(idx >= next_idx); if (idx == next_idx) Reducer::update(&val, tmp); } } next_idx = SHFL_DOWN_SYNC(FULL_MASK, idx, 1); if (lane_idx == 32 - 1 || row_idx / D != (row_idx + 1) / D || idx != next_idx) Reducer::atomic_write(out_data + out_idx, val); } } template __global__ void segment_coo_arg_kernel( const scalar_t *src_data, const at::cuda::detail::TensorInfo index_info, scalar_t *out_data, int64_t *arg_out_data, size_t E, size_t N) { int row_idx = blockIdx.x * blockDim.x + threadIdx.x; int D = index_info.sizes[index_info.dims - 1]; if (row_idx < E) { int offset = at::cuda::detail::IndexToOffset::get( row_idx, index_info); int64_t idx = index_info.data[offset]; int out_idx = (row_idx / D) * N + idx; scalar_t val = __ldg(out_data + out_idx); if (src_data[row_idx] == val) arg_out_data[out_idx] = row_idx % D; } } template __global__ void segment_coo_broadcast_kernel( const scalar_t *src_data, const at::cuda::detail::TensorInfo index_info, scalar_t *out_data, size_t E, size_t K, size_t N) { // Each thread processes a single column and `TB` index entries. Coalesced // read and write is performed in column-major order. The intermediate // results are written via atomics. int D = index_info.sizes[index_info.dims - 1]; int E_1 = E / D; int E_2 = (D - 1) + TB - ((D - 1) % TB); int row_idx = blockIdx.x * blockDim.y + threadIdx.y; int col_idx = blockIdx.y * blockDim.x + threadIdx.x; int dim_start = (row_idx * TB) / E_2; int row_start = (row_idx * TB) % E_2; if (dim_start < E_1 && col_idx < K) { int offset = at::cuda::detail::IndexToOffset::get( dim_start * D + row_start, index_info); int idx1 = __ldg(index_info.data + offset), idx2; scalar_t val = src_data[K * (dim_start * D + row_start) + col_idx]; #pragma unroll for (int i = 1; i < TB; i++) { if (row_start + i >= D) break; idx2 = __ldg(index_info.data + offset + i * index_info.strides[index_info.dims - 1]); assert(idx1 <= idx2); if (idx1 == idx2) { Reducer::update( &val, src_data[K * (dim_start * D + row_start + i) + col_idx]); } else { Reducer::atomic_write( out_data + (dim_start * N + idx1) * K + col_idx, val); val = src_data[K * (dim_start * D + row_start + i) + col_idx]; } idx1 = idx2; } Reducer::atomic_write( out_data + (dim_start * N + idx1) * K + col_idx, val); } } template __global__ void segment_coo_arg_broadcast_kernel( const scalar_t *src_data, const at::cuda::detail::TensorInfo index_info, scalar_t *out_data, int64_t *arg_out_data, size_t E, size_t K, size_t N) { int thread_idx = blockIdx.x * blockDim.x + threadIdx.x; int row_idx = thread_idx / K; int col_idx = thread_idx % K; int D = index_info.sizes[index_info.dims - 1]; if (row_idx < E && col_idx < K) { int offset = at::cuda::detail::IndexToOffset::get( row_idx, index_info); int idx = __ldg(index_info.data + offset); int out_idx = ((row_idx / D) * N + idx) * K + col_idx; scalar_t val = __ldg(out_data + out_idx); if (src_data[thread_idx] == val) arg_out_data[out_idx] = row_idx % D; } } std::tuple> segment_coo_cuda(torch::Tensor src, torch::Tensor index, torch::optional optional_out, torch::optional dim_size, std::string reduce) { CHECK_CUDA(src); CHECK_CUDA(index); if (optional_out.has_value()) CHECK_CUDA(optional_out.value()); cudaSetDevice(src.get_device()); CHECK_INPUT(src.dim() >= index.dim()); auto sizes = index.sizes().vec(); for (int i = 0; i < index.dim(); i++) { sizes[i] = src.size(i); } index = index.expand(sizes); auto dim = index.dim() - 1; src = src.contiguous(); torch::Tensor out; if (optional_out.has_value()) { out = optional_out.value().contiguous(); for (int i = 0; i < out.dim(); i++) if (i != dim) CHECK_INPUT(src.size(i) == out.size(i)); } else { sizes = src.sizes().vec(); if (dim_size.has_value()) sizes[dim] = dim_size.value(); else if (index.numel() == 0) sizes[dim] = 0; else { auto tmp = index.select(dim, index.size(dim) - 1); tmp = tmp.numel() > 1 ? tmp.max() : tmp; sizes[dim] = 1 + tmp.cpu().data_ptr()[0]; } out = torch::zeros(sizes, src.options()); } torch::optional arg_out = torch::nullopt; int64_t *arg_out_data = nullptr; if (reduce2REDUCE.at(reduce) == MIN || reduce2REDUCE.at(reduce) == MAX) { arg_out = torch::full_like(out, src.size(dim), index.options()); arg_out_data = arg_out.value().data_ptr(); } else if (reduce2REDUCE.at(reduce) == MEAN) { auto sizes = index.sizes().vec(); sizes[dim] = out.size(dim); arg_out = torch::zeros(sizes, out.options()); } if (index.numel() == 0) return std::make_tuple(out, arg_out); auto E = index.numel(); auto E_2 = index.size(dim); auto E_1 = index.numel() / E_2; auto K = src.numel() / E; auto N = out.size(dim); auto avg_len = (float)E_2 / (float)N; auto index_info = at::cuda::detail::getTensorInfo(index); auto stream = at::cuda::getCurrentCUDAStream(); AT_DISPATCH_ALL_TYPES_AND2(at::ScalarType::Half, at::ScalarType::BFloat16, src.scalar_type(), "_", [&] { auto src_data = src.data_ptr(); auto out_data = out.data_ptr(); AT_DISPATCH_REDUCTION_TYPES(reduce, [&] { if (!optional_out.has_value()) out.fill_(Reducer::init()); if (K == 1) segment_coo_kernel <<>>(src_data, index_info, out_data, E, N); else if (avg_len <= 8) segment_coo_broadcast_kernel <<>>(src_data, index_info, out_data, E, K, N); else if (avg_len <= 16) segment_coo_broadcast_kernel <<>>(src_data, index_info, out_data, E, K, N); else if (avg_len <= 32) segment_coo_broadcast_kernel <<>>(src_data, index_info, out_data, E, K, N); else segment_coo_broadcast_kernel <<>>(src_data, index_info, out_data, E, K, N); if (!optional_out.has_value() && (REDUCE == MIN || REDUCE == MAX)) out.masked_fill_(out == Reducer::init(), (scalar_t)0); if (REDUCE == MIN || REDUCE == MAX) { if (K == 1) segment_coo_arg_kernel <<>>( src_data, index_info, out_data, arg_out_data, E, N); else segment_coo_arg_broadcast_kernel <<>>( src_data, index_info, out_data, arg_out_data, E, K, N); } if (REDUCE == MEAN) { auto count_data = arg_out.value().data_ptr(); segment_coo_kernel <<>>(nullptr, index_info, count_data, E, N); arg_out.value().masked_fill_(arg_out.value() < (scalar_t)1, (scalar_t)1); auto count = arg_out.value(); for (int i = dim + 1; i < out.dim(); i++) count = count.unsqueeze(-1); if (out.is_floating_point()) out.true_divide_(count); else out.div_(count, "floor"); } }); }); return std::make_tuple(out, arg_out); } template __global__ void gather_coo_kernel(const scalar_t *src_data, const at::cuda::detail::TensorInfo index_info, scalar_t *out_data, size_t E, size_t N) { int row_idx = blockIdx.x * blockDim.x + threadIdx.x; if (row_idx < E) { int offset = at::cuda::detail::IndexToOffset::get( row_idx, index_info); int row = index_info.data[offset]; offset = (row_idx / index_info.sizes[index_info.dims - 1]) * N; scalar_t val = __ldg(src_data + offset + row); out_data[row_idx] = val; } } template __global__ void gather_coo_broadcast_kernel( const scalar_t *src_data, const at::cuda::detail::TensorInfo index_info, scalar_t *out_data, size_t E, size_t K, size_t N) { int thread_idx = blockIdx.x * blockDim.x + threadIdx.x; int row_idx = thread_idx / K; int col_idx = thread_idx % K; if (thread_idx < E * K) { int offset = at::cuda::detail::IndexToOffset::get( row_idx, index_info); int row = index_info.data[offset]; offset = (row_idx / index_info.sizes[index_info.dims - 1]) * N * K; scalar_t val = __ldg(src_data + offset + K * row + col_idx); out_data[thread_idx] = val; } } torch::Tensor gather_coo_cuda(torch::Tensor src, torch::Tensor index, torch::optional optional_out) { CHECK_CUDA(src); CHECK_CUDA(index); if (optional_out.has_value()) CHECK_CUDA(optional_out.value()); cudaSetDevice(src.get_device()); CHECK_INPUT(src.dim() >= index.dim()); auto sizes = index.sizes().vec(); for (auto i = 0; i < index.dim() - 1; i++) sizes[i] = src.size(i); index = index.expand(sizes); auto dim = index.dim() - 1; src = src.contiguous(); torch::Tensor out; if (optional_out.has_value()) { out = optional_out.value().contiguous(); for (auto i = 0; i < src.dim(); i++) if (i != dim) CHECK_INPUT(src.size(i) == out.size(i)); CHECK_INPUT(index.size(dim) == out.size(dim)); } else { auto sizes = src.sizes().vec(); sizes[dim] = index.size(dim); out = torch::empty(sizes, src.options()); } if (index.numel() == 0) return out; auto E = index.numel(); auto K = out.numel() / E; auto N = src.size(dim); auto index_info = at::cuda::detail::getTensorInfo(index); auto stream = at::cuda::getCurrentCUDAStream(); AT_DISPATCH_ALL_TYPES_AND2(at::ScalarType::Half, at::ScalarType::BFloat16, src.scalar_type(), "_", [&] { auto src_data = src.data_ptr(); auto out_data = out.data_ptr(); if (K == 1) gather_coo_kernel<<>>( src_data, index_info, out_data, E, N); else gather_coo_broadcast_kernel <<>>(src_data, index_info, out_data, E, K, N); }); return out; } pytorch_scatter-2.1.2/csrc/cuda/segment_coo_cuda.h000066400000000000000000000006671450760104200222560ustar00rootroot00000000000000#pragma once #include "../extensions.h" std::tuple> segment_coo_cuda(torch::Tensor src, torch::Tensor index, torch::optional optional_out, torch::optional dim_size, std::string reduce); torch::Tensor gather_coo_cuda(torch::Tensor src, torch::Tensor index, torch::optional optional_out); pytorch_scatter-2.1.2/csrc/cuda/segment_csr_cuda.cu000066400000000000000000000227611450760104200224440ustar00rootroot00000000000000#include "segment_csr_cuda.h" #include #include #include #include "index_info.cuh" #include "reducer.cuh" #include "utils.cuh" #define THREADS 256 #define BLOCKS(TB, N) (TB * N + THREADS - 1) / THREADS #define FULL_MASK 0xffffffff template __global__ void segment_csr_kernel(const scalar_t *src_data, const at::cuda::detail::TensorInfo indptr_info, scalar_t *out_data, int64_t *arg_out_data, size_t N, size_t E) { // Each warp processes exactly `32/TB` rows and aggregates all row values // via a parallel reduction. int thread_idx = blockIdx.x * blockDim.x + threadIdx.x; int row_idx = thread_idx / TB; int lane_idx = thread_idx & (TB - 1); if (row_idx < N) { int offset = IndexPtrToOffset::get(row_idx, indptr_info); int64_t row_start = __ldg(indptr_info.data + offset); int64_t row_end = __ldg(indptr_info.data + offset + indptr_info.strides[indptr_info.dims - 1]); scalar_t val = Reducer::init(); int64_t arg, arg_tmp; offset = (row_idx / (indptr_info.sizes[indptr_info.dims - 1] - 1)) * E; for (int64_t src_idx = row_start + lane_idx; src_idx < row_end; src_idx += TB) { Reducer::update(&val, src_data[offset + src_idx], &arg, src_idx); } #pragma unroll for (int i = TB / 2; i > 0; i /= 2) { // Parallel reduction inside a single warp. if (REDUCE == MIN || REDUCE == MAX) arg_tmp = SHFL_DOWN_SYNC(FULL_MASK, arg, i); Reducer::update( &val, SHFL_DOWN_SYNC(FULL_MASK, val, i), &arg, arg_tmp); } if (lane_idx == 0) { Reducer::write(out_data + row_idx, val, arg_out_data + row_idx, arg, row_end - row_start); } } } template __global__ void segment_csr_broadcast_kernel( const scalar_t *src_data, const at::cuda::detail::TensorInfo indptr_info, scalar_t *out_data, int64_t *arg_out_data, size_t N, size_t K, size_t E) { // Each thread processes exactly one row. It turned out that is more // efficient than using shared memory due to avoiding synchronization // barriers. int thread_idx = blockIdx.x * blockDim.x + threadIdx.x; int row_idx = thread_idx / K; int lane_idx = thread_idx % K; if (thread_idx < N * K) { int offset = IndexPtrToOffset::get(row_idx, indptr_info); int64_t row_start = __ldg(indptr_info.data + offset); int64_t row_end = __ldg(indptr_info.data + offset + indptr_info.strides[indptr_info.dims - 1]); scalar_t val = Reducer::init(); int64_t arg; offset = (row_idx / (indptr_info.sizes[indptr_info.dims - 1] - 1)) * E * K; for (int64_t src_idx = row_start; src_idx < row_end; src_idx++) { Reducer::update( &val, src_data[offset + K * src_idx + lane_idx], &arg, src_idx); } Reducer::write(out_data + thread_idx, val, arg_out_data + thread_idx, arg, row_end - row_start); } } std::tuple> segment_csr_cuda(torch::Tensor src, torch::Tensor indptr, torch::optional optional_out, std::string reduce) { CHECK_CUDA(src); CHECK_CUDA(indptr); if (optional_out.has_value()) CHECK_CUDA(optional_out.value()); cudaSetDevice(src.get_device()); CHECK_INPUT(src.dim() >= indptr.dim()); auto sizes = indptr.sizes().vec(); for (auto i = 0; i < indptr.dim() - 1; i++) sizes[i] = src.size(i); indptr = indptr.expand(sizes); auto dim = indptr.dim() - 1; src = src.contiguous(); torch::Tensor out; if (optional_out.has_value()) { out = optional_out.value().contiguous(); for (int i = 0; i < out.dim(); i++) if (i != dim) CHECK_INPUT(src.size(i) == out.size(i)); CHECK_INPUT(src.numel() == 0 || out.size(dim) == indptr.size(dim) - 1); } else { sizes = src.sizes().vec(); sizes[dim] = std::max(indptr.size(dim) - 1, 0); out = torch::empty(sizes, src.options()); } torch::optional arg_out = torch::nullopt; int64_t *arg_out_data = nullptr; if (reduce2REDUCE.at(reduce) == MIN || reduce2REDUCE.at(reduce) == MAX) { arg_out = torch::full(out.sizes(), src.size(dim), indptr.options()); arg_out_data = arg_out.value().data_ptr(); } if (src.numel() == 0) { if (!optional_out.has_value()) out.fill_(0); return std::make_tuple(out, arg_out); } auto N = out.size(dim) * (indptr.numel() / indptr.size(-1)); auto K = out.numel() / N; auto E = src.size(dim); auto indptr_info = at::cuda::detail::getTensorInfo(indptr); auto stream = at::cuda::getCurrentCUDAStream(); AT_DISPATCH_ALL_TYPES_AND2(at::ScalarType::Half, at::ScalarType::BFloat16, src.scalar_type(), "_", [&] { auto src_data = src.data_ptr(); auto out_data = out.data_ptr(); AT_DISPATCH_REDUCTION_TYPES(reduce, [&] { if (K == 1) { segment_csr_kernel <<>>( src_data, indptr_info, out_data, arg_out_data, N, E); } else { segment_csr_broadcast_kernel <<>>( src_data, indptr_info, out_data, arg_out_data, N, K, E); } }); }); return std::make_tuple(out, arg_out); } template __global__ void gather_csr_kernel(const scalar_t *src_data, const at::cuda::detail::TensorInfo indptr_info, scalar_t *out_data, size_t N, size_t E) { int thread_idx = blockIdx.x * blockDim.x + threadIdx.x; int row_idx = thread_idx / TB; int lane_idx = thread_idx % TB; if (row_idx < N) { int offset = IndexPtrToOffset::get(row_idx, indptr_info); int row_start = __ldg(indptr_info.data + offset); int row_end = __ldg(indptr_info.data + offset + indptr_info.strides[indptr_info.dims - 1]); scalar_t val = __ldg(src_data + row_idx); offset = (row_idx / (indptr_info.sizes[indptr_info.dims - 1] - 1)) * E; for (int out_idx = row_start + lane_idx; out_idx < row_end; out_idx += TB) { out_data[offset + out_idx] = val; // "Mostly" coalesced. } } } template __global__ void gather_csr_broadcast_kernel( const scalar_t *src_data, const at::cuda::detail::TensorInfo indptr_info, scalar_t *out_data, size_t N, size_t K, size_t E) { int thread_idx = blockIdx.x * blockDim.x + threadIdx.x; int row_idx = thread_idx / K; int lane_idx = thread_idx % K; if (thread_idx < N * K) { int offset = IndexPtrToOffset::get(row_idx, indptr_info); int row_start = __ldg(indptr_info.data + offset); int row_end = __ldg(indptr_info.data + offset + indptr_info.strides[indptr_info.dims - 1]); scalar_t val = src_data[thread_idx]; // Coalesced. offset = (row_idx / (indptr_info.sizes[indptr_info.dims - 1] - 1)) * E * K; for (int out_idx = row_start; out_idx < row_end; out_idx++) { out_data[offset + K * out_idx + lane_idx] = val; // "Mostly" coalesced. } } } torch::Tensor gather_csr_cuda(torch::Tensor src, torch::Tensor indptr, torch::optional optional_out) { CHECK_CUDA(src); CHECK_CUDA(indptr); if (optional_out.has_value()) CHECK_CUDA(optional_out.value()); cudaSetDevice(src.get_device()); CHECK_INPUT(src.dim() >= indptr.dim()); auto sizes = indptr.sizes().vec(); for (auto i = 0; i < indptr.dim() - 1; i++) sizes[i] = src.size(i); indptr = indptr.expand(sizes); auto dim = indptr.dim() - 1; CHECK_INPUT(src.size(dim) == 0 || src.size(dim) == indptr.size(dim) - 1); src = src.contiguous(); torch::Tensor out; if (optional_out.has_value()) { out = optional_out.value().contiguous(); for (auto i = 0; i < out.dim(); i++) if (i != dim) CHECK_INPUT(src.size(i) == out.size(i)); } else { auto sizes = src.sizes().vec(); if (src.numel() > 0) { sizes[dim] = indptr.flatten()[-1].cpu().data_ptr()[0]; } else { sizes[dim] = 0; } out = torch::empty(sizes, src.options()); } if (src.numel() == 0) { if (!optional_out.has_value()) out.fill_(0); return out; } auto N = src.size(dim) * (indptr.numel() / indptr.size(-1)); auto K = src.numel() / N; auto E = out.size(dim); auto indptr_info = at::cuda::detail::getTensorInfo(indptr); auto stream = at::cuda::getCurrentCUDAStream(); AT_DISPATCH_ALL_TYPES_AND2(at::ScalarType::Half, at::ScalarType::BFloat16, src.scalar_type(), "_", [&] { auto src_data = src.data_ptr(); auto out_data = out.data_ptr(); if (K == 1) gather_csr_kernel<<>>( src_data, indptr_info, out_data, N, E); else gather_csr_broadcast_kernel <<>>(src_data, indptr_info, out_data, N, K, E); }); return out; } pytorch_scatter-2.1.2/csrc/cuda/segment_csr_cuda.h000066400000000000000000000006261450760104200222600ustar00rootroot00000000000000#pragma once #include "../extensions.h" std::tuple> segment_csr_cuda(torch::Tensor src, torch::Tensor indptr, torch::optional optional_out, std::string reduce); torch::Tensor gather_csr_cuda(torch::Tensor src, torch::Tensor indptr, torch::optional optional_out); pytorch_scatter-2.1.2/csrc/cuda/utils.cuh000066400000000000000000000021621450760104200204400ustar00rootroot00000000000000#pragma once #include "../extensions.h" #define CHECK_CUDA(x) \ AT_ASSERTM(x.device().is_cuda(), #x " must be CUDA tensor") #define CHECK_INPUT(x) AT_ASSERTM(x, "Input mismatch") __device__ __inline__ at::Half __shfl_up_sync(const unsigned mask, const at::Half var, const unsigned int delta) { return __shfl_up_sync(mask, var.operator __half(), delta); } __device__ __inline__ at::Half __shfl_down_sync(const unsigned mask, const at::Half var, const unsigned int delta) { return __shfl_down_sync(mask, var.operator __half(), delta); } #ifdef USE_ROCM __device__ __inline__ at::Half __ldg(const at::Half* ptr) { return __ldg(reinterpret_cast(ptr)); } #define SHFL_UP_SYNC(mask, var, delta) __shfl_up(var, delta) #define SHFL_DOWN_SYNC(mask, var, delta) __shfl_down(var, delta) #else #define SHFL_UP_SYNC __shfl_up_sync #define SHFL_DOWN_SYNC __shfl_down_sync #endif pytorch_scatter-2.1.2/csrc/extensions.h000066400000000000000000000000551450760104200202320ustar00rootroot00000000000000#include "macros.h" #include pytorch_scatter-2.1.2/csrc/macros.h000066400000000000000000000006751450760104200173270ustar00rootroot00000000000000#pragma once #ifdef _WIN32 #if defined(torchscatter_EXPORTS) #define SCATTER_API __declspec(dllexport) #else #define SCATTER_API __declspec(dllimport) #endif #else #define SCATTER_API #endif #if (defined __cpp_inline_variables) || __cplusplus >= 201703L #define SCATTER_INLINE_VARIABLE inline #else #ifdef _MSC_VER #define SCATTER_INLINE_VARIABLE __declspec(selectany) #else #define SCATTER_INLINE_VARIABLE __attribute__((weak)) #endif #endif pytorch_scatter-2.1.2/csrc/scatter.cpp000066400000000000000000000245511450760104200200420ustar00rootroot00000000000000#ifdef WITH_PYTHON #include #endif #include #include "cpu/scatter_cpu.h" #include "macros.h" #include "utils.h" #ifdef WITH_CUDA #include "cuda/scatter_cuda.h" #endif #ifdef _WIN32 #ifdef WITH_PYTHON #ifdef WITH_CUDA PyMODINIT_FUNC PyInit__scatter_cuda(void) { return NULL; } #else PyMODINIT_FUNC PyInit__scatter_cpu(void) { return NULL; } #endif #endif #endif torch::Tensor broadcast(torch::Tensor src, torch::Tensor other, int64_t dim) { if (src.dim() == 1) for (auto i = 0; i < dim; i++) src = src.unsqueeze(0); for (auto i = src.dim(); i < other.dim(); i++) src = src.unsqueeze(-1); src = src.expand(other.sizes().vec()); return src; } std::tuple> scatter_fw(torch::Tensor src, torch::Tensor index, int64_t dim, torch::optional optional_out, torch::optional dim_size, std::string reduce) { if (src.device().is_cuda()) { #ifdef WITH_CUDA return scatter_cuda(src, index, dim, optional_out, dim_size, reduce); #else AT_ERROR("Not compiled with CUDA support"); #endif } else { return scatter_cpu(src, index, dim, optional_out, dim_size, reduce); } } using torch::autograd::AutogradContext; using torch::autograd::Variable; using torch::autograd::variable_list; class ScatterSum : public torch::autograd::Function { public: static variable_list forward(AutogradContext *ctx, Variable src, Variable index, int64_t dim, torch::optional optional_out, torch::optional dim_size) { dim = dim < 0 ? src.dim() + dim : dim; ctx->saved_data["dim"] = dim; ctx->saved_data["src_shape"] = src.sizes(); index = broadcast(index, src, dim); auto result = scatter_fw(src, index, dim, optional_out, dim_size, "sum"); auto out = std::get<0>(result); ctx->save_for_backward({index}); if (optional_out.has_value()) ctx->mark_dirty({optional_out.value()}); return {out}; } static variable_list backward(AutogradContext *ctx, variable_list grad_outs) { auto grad_out = grad_outs[0]; auto saved = ctx->get_saved_variables(); auto index = saved[0]; auto dim = ctx->saved_data["dim"].toInt(); auto src_shape = list2vec(ctx->saved_data["src_shape"].toIntList()); auto grad_in = torch::gather(grad_out, dim, index, false); return {grad_in, Variable(), Variable(), Variable(), Variable()}; } }; class ScatterMul : public torch::autograd::Function { public: static variable_list forward(AutogradContext *ctx, Variable src, Variable index, int64_t dim, torch::optional optional_out, torch::optional dim_size) { dim = dim < 0 ? src.dim() + dim : dim; ctx->saved_data["dim"] = dim; ctx->saved_data["src_shape"] = src.sizes(); index = broadcast(index, src, dim); auto result = scatter_fw(src, index, dim, optional_out, dim_size, "mul"); auto out = std::get<0>(result); ctx->save_for_backward({src, index, out}); if (optional_out.has_value()) ctx->mark_dirty({optional_out.value()}); return {out}; } static variable_list backward(AutogradContext *ctx, variable_list grad_outs) { auto grad_out = grad_outs[0]; auto saved = ctx->get_saved_variables(); auto src = saved[0]; auto index = saved[1]; auto out = saved[2]; auto dim = ctx->saved_data["dim"].toInt(); auto src_shape = list2vec(ctx->saved_data["src_shape"].toIntList()); auto grad_in = torch::gather(grad_out * out, dim, index, false).div_(src); grad_in.masked_fill_(grad_in.isnan(), 0); return {grad_in, Variable(), Variable(), Variable(), Variable()}; } }; class ScatterMean : public torch::autograd::Function { public: static variable_list forward(AutogradContext *ctx, Variable src, Variable index, int64_t dim, torch::optional optional_out, torch::optional dim_size) { dim = dim < 0 ? src.dim() + dim : dim; ctx->saved_data["dim"] = dim; ctx->saved_data["src_shape"] = src.sizes(); auto old_index = index; index = broadcast(index, src, dim); auto result = scatter_fw(src, index, dim, optional_out, dim_size, "sum"); auto out = std::get<0>(result); auto ones = torch::ones(old_index.sizes(), src.options()); result = scatter_fw(ones, old_index, old_index.dim() <= dim ? old_index.dim() - 1 : dim, torch::nullopt, out.size(dim), "sum"); auto count = std::get<0>(result); count.masked_fill_(count < 1, 1); count = broadcast(count, out, dim); if (out.is_floating_point()) out.true_divide_(count); else out.div_(count, "floor"); ctx->save_for_backward({index, count}); if (optional_out.has_value()) ctx->mark_dirty({optional_out.value()}); return {out}; } static variable_list backward(AutogradContext *ctx, variable_list grad_outs) { auto grad_out = grad_outs[0]; auto saved = ctx->get_saved_variables(); auto index = saved[0]; auto count = saved[1]; auto dim = ctx->saved_data["dim"].toInt(); auto src_shape = list2vec(ctx->saved_data["src_shape"].toIntList()); count = torch::gather(count, dim, index, false); auto grad_in = torch::gather(grad_out, dim, index, false); grad_in.true_divide_(count); return {grad_in, Variable(), Variable(), Variable(), Variable()}; } }; class ScatterMin : public torch::autograd::Function { public: static variable_list forward(AutogradContext *ctx, Variable src, Variable index, int64_t dim, torch::optional optional_out, torch::optional dim_size) { dim = dim < 0 ? src.dim() + dim : dim; ctx->saved_data["dim"] = dim; ctx->saved_data["src_shape"] = src.sizes(); index = broadcast(index, src, dim); auto result = scatter_fw(src, index, dim, optional_out, dim_size, "min"); auto out = std::get<0>(result); auto arg_out = std::get<1>(result).value(); ctx->save_for_backward({index, arg_out}); ctx->mark_non_differentiable({arg_out}); if (optional_out.has_value()) ctx->mark_dirty({optional_out.value()}); return {out, arg_out}; } static variable_list backward(AutogradContext *ctx, variable_list grad_outs) { auto grad_out = grad_outs[0]; auto saved = ctx->get_saved_variables(); auto index = saved[0]; auto arg_out = saved[1]; auto dim = ctx->saved_data["dim"].toInt(); auto src_shape = list2vec(ctx->saved_data["src_shape"].toIntList()); src_shape[dim] += 1; auto grad_in = torch::zeros(src_shape, grad_out.options()); grad_in.scatter_(dim, arg_out, grad_out); grad_in = grad_in.narrow(dim, 0, src_shape[dim] - 1); return {grad_in, Variable(), Variable(), Variable(), Variable()}; } }; class ScatterMax : public torch::autograd::Function { public: static variable_list forward(AutogradContext *ctx, Variable src, Variable index, int64_t dim, torch::optional optional_out, torch::optional dim_size) { dim = dim < 0 ? src.dim() + dim : dim; ctx->saved_data["dim"] = dim; ctx->saved_data["src_shape"] = src.sizes(); index = broadcast(index, src, dim); auto result = scatter_fw(src, index, dim, optional_out, dim_size, "max"); auto out = std::get<0>(result); auto arg_out = std::get<1>(result).value(); ctx->save_for_backward({index, arg_out}); ctx->mark_non_differentiable({arg_out}); if (optional_out.has_value()) ctx->mark_dirty({optional_out.value()}); return {out, arg_out}; } static variable_list backward(AutogradContext *ctx, variable_list grad_outs) { auto grad_out = grad_outs[0]; auto saved = ctx->get_saved_variables(); auto index = saved[0]; auto arg_out = saved[1]; auto dim = ctx->saved_data["dim"].toInt(); auto src_shape = list2vec(ctx->saved_data["src_shape"].toIntList()); src_shape[dim] += 1; auto grad_in = torch::zeros(src_shape, grad_out.options()); grad_in.scatter_(dim, arg_out, grad_out); grad_in = grad_in.narrow(dim, 0, src_shape[dim] - 1); return {grad_in, Variable(), Variable(), Variable(), Variable()}; } }; SCATTER_API torch::Tensor scatter_sum(torch::Tensor src, torch::Tensor index, int64_t dim, torch::optional optional_out, torch::optional dim_size) { return ScatterSum::apply(src, index, dim, optional_out, dim_size)[0]; } SCATTER_API torch::Tensor scatter_mul(torch::Tensor src, torch::Tensor index, int64_t dim, torch::optional optional_out, torch::optional dim_size) { return ScatterMul::apply(src, index, dim, optional_out, dim_size)[0]; } SCATTER_API torch::Tensor scatter_mean(torch::Tensor src, torch::Tensor index, int64_t dim, torch::optional optional_out, torch::optional dim_size) { return ScatterMean::apply(src, index, dim, optional_out, dim_size)[0]; } SCATTER_API std::tuple scatter_min(torch::Tensor src, torch::Tensor index, int64_t dim, torch::optional optional_out, torch::optional dim_size) { auto result = ScatterMin::apply(src, index, dim, optional_out, dim_size); return std::make_tuple(result[0], result[1]); } SCATTER_API std::tuple scatter_max(torch::Tensor src, torch::Tensor index, int64_t dim, torch::optional optional_out, torch::optional dim_size) { auto result = ScatterMax::apply(src, index, dim, optional_out, dim_size); return std::make_tuple(result[0], result[1]); } static auto registry = torch::RegisterOperators() .op("torch_scatter::scatter_sum", &scatter_sum) .op("torch_scatter::scatter_mul", &scatter_mul) .op("torch_scatter::scatter_mean", &scatter_mean) .op("torch_scatter::scatter_min", &scatter_min) .op("torch_scatter::scatter_max", &scatter_max); pytorch_scatter-2.1.2/csrc/scatter.h000066400000000000000000000057231450760104200175070ustar00rootroot00000000000000#pragma once #include "extensions.h" namespace scatter { SCATTER_API int64_t cuda_version() noexcept; namespace detail { SCATTER_INLINE_VARIABLE int64_t _cuda_version = cuda_version(); } // namespace detail } // namespace scatter SCATTER_API torch::Tensor scatter_sum(torch::Tensor src, torch::Tensor index, int64_t dim, torch::optional optional_out, torch::optional dim_size); SCATTER_API torch::Tensor scatter_mul(torch::Tensor src, torch::Tensor index, int64_t dim, torch::optional optional_out, torch::optional dim_size); SCATTER_API torch::Tensor scatter_mean(torch::Tensor src, torch::Tensor index, int64_t dim, torch::optional optional_out, torch::optional dim_size); SCATTER_API std::tuple scatter_min(torch::Tensor src, torch::Tensor index, int64_t dim, torch::optional optional_out, torch::optional dim_size); SCATTER_API std::tuple scatter_max(torch::Tensor src, torch::Tensor index, int64_t dim, torch::optional optional_out, torch::optional dim_size); SCATTER_API torch::Tensor segment_sum_coo(torch::Tensor src, torch::Tensor index, torch::optional optional_out, torch::optional dim_size); SCATTER_API torch::Tensor segment_mean_coo(torch::Tensor src, torch::Tensor index, torch::optional optional_out, torch::optional dim_size); SCATTER_API std::tuple segment_min_coo(torch::Tensor src, torch::Tensor index, torch::optional optional_out, torch::optional dim_size); SCATTER_API std::tuple segment_max_coo(torch::Tensor src, torch::Tensor index, torch::optional optional_out, torch::optional dim_size); SCATTER_API torch::Tensor gather_coo(torch::Tensor src, torch::Tensor index, torch::optional optional_out); SCATTER_API torch::Tensor segment_sum_csr(torch::Tensor src, torch::Tensor indptr, torch::optional optional_out); SCATTER_API torch::Tensor segment_mean_csr(torch::Tensor src, torch::Tensor indptr, torch::optional optional_out); SCATTER_API std::tuple segment_min_csr(torch::Tensor src, torch::Tensor indptr, torch::optional optional_out); SCATTER_API std::tuple segment_max_csr(torch::Tensor src, torch::Tensor indptr, torch::optional optional_out); SCATTER_API torch::Tensor gather_csr(torch::Tensor src, torch::Tensor indptr, torch::optional optional_out); pytorch_scatter-2.1.2/csrc/segment_coo.cpp000066400000000000000000000220751450760104200206760ustar00rootroot00000000000000#ifdef WITH_PYTHON #include #endif #include #include "cpu/segment_coo_cpu.h" #include "macros.h" #include "utils.h" #ifdef WITH_CUDA #include "cuda/segment_coo_cuda.h" #endif #ifdef _WIN32 #ifdef WITH_PYTHON #ifdef WITH_CUDA PyMODINIT_FUNC PyInit__segment_coo_cuda(void) { return NULL; } #else PyMODINIT_FUNC PyInit__segment_coo_cpu(void) { return NULL; } #endif #endif #endif std::tuple> segment_coo_fw(torch::Tensor src, torch::Tensor index, torch::optional optional_out, torch::optional dim_size, std::string reduce) { if (src.device().is_cuda()) { #ifdef WITH_CUDA return segment_coo_cuda(src, index, optional_out, dim_size, reduce); #else AT_ERROR("Not compiled with CUDA support"); #endif } else { return segment_coo_cpu(src, index, optional_out, dim_size, reduce); } } torch::Tensor gather_coo_fw(torch::Tensor src, torch::Tensor index, torch::optional optional_out) { if (src.device().is_cuda()) { #ifdef WITH_CUDA return gather_coo_cuda(src, index, optional_out); #else AT_ERROR("Not compiled with CUDA support"); #endif } else { return gather_coo_cpu(src, index, optional_out); } } using torch::autograd::AutogradContext; using torch::autograd::Variable; using torch::autograd::variable_list; class SegmentSumCOO : public torch::autograd::Function { public: static variable_list forward(AutogradContext *ctx, Variable src, Variable index, torch::optional optional_out, torch::optional dim_size) { ctx->saved_data["src_shape"] = src.sizes(); auto result = segment_coo_fw(src, index, optional_out, dim_size, "sum"); auto out = std::get<0>(result); ctx->save_for_backward({index}); if (optional_out.has_value()) ctx->mark_dirty({optional_out.value()}); return {out}; } static variable_list backward(AutogradContext *ctx, variable_list grad_outs) { auto grad_out = grad_outs[0]; auto saved = ctx->get_saved_variables(); auto index = saved[0]; auto src_shape = list2vec(ctx->saved_data["src_shape"].toIntList()); auto grad_in = torch::empty(src_shape, grad_out.options()); gather_coo_fw(grad_out, index, grad_in); return {grad_in, Variable(), Variable(), Variable()}; } }; class SegmentMeanCOO : public torch::autograd::Function { public: static variable_list forward(AutogradContext *ctx, Variable src, Variable index, torch::optional optional_out, torch::optional dim_size) { ctx->saved_data["src_shape"] = src.sizes(); auto result = segment_coo_fw(src, index, optional_out, dim_size, "mean"); auto out = std::get<0>(result); auto count = std::get<1>(result).value(); ctx->save_for_backward({index, count}); if (optional_out.has_value()) ctx->mark_dirty({optional_out.value()}); return {out}; } static variable_list backward(AutogradContext *ctx, variable_list grad_outs) { auto grad_out = grad_outs[0]; auto saved = ctx->get_saved_variables(); auto index = saved[0]; auto count = saved[1]; auto src_shape = list2vec(ctx->saved_data["src_shape"].toIntList()); auto grad_in = torch::empty(src_shape, grad_out.options()); gather_coo_fw(grad_out, index, grad_in); count = gather_coo_fw(count, index, torch::nullopt); for (auto i = 0; i < grad_out.dim() - index.dim(); i++) count = count.unsqueeze(-1); grad_in.true_divide_(count); return {grad_in, Variable(), Variable(), Variable()}; } }; class SegmentMinCOO : public torch::autograd::Function { public: static variable_list forward(AutogradContext *ctx, Variable src, Variable index, torch::optional optional_out, torch::optional dim_size) { ctx->saved_data["src_shape"] = src.sizes(); auto result = segment_coo_fw(src, index, optional_out, dim_size, "min"); auto out = std::get<0>(result); auto arg_out = std::get<1>(result).value(); ctx->save_for_backward({index, arg_out}); ctx->mark_non_differentiable({arg_out}); if (optional_out.has_value()) ctx->mark_dirty({optional_out.value()}); return {out, arg_out}; } static variable_list backward(AutogradContext *ctx, variable_list grad_outs) { auto grad_out = grad_outs[0]; auto saved = ctx->get_saved_variables(); auto index = saved[0]; auto arg_out = saved[1]; auto src_shape = list2vec(ctx->saved_data["src_shape"].toIntList()); src_shape[index.dim() - 1] += 1; auto grad_in = torch::zeros(src_shape, grad_out.options()); grad_in.scatter_(index.dim() - 1, arg_out, grad_out); grad_in = grad_in.narrow(index.dim() - 1, 0, src_shape[index.dim() - 1] - 1); return {grad_in, Variable(), Variable(), Variable()}; } }; class SegmentMaxCOO : public torch::autograd::Function { public: static variable_list forward(AutogradContext *ctx, Variable src, Variable index, torch::optional optional_out, torch::optional dim_size) { ctx->saved_data["src_shape"] = src.sizes(); auto result = segment_coo_fw(src, index, optional_out, dim_size, "max"); auto out = std::get<0>(result); auto arg_out = std::get<1>(result).value(); ctx->save_for_backward({index, arg_out}); ctx->mark_non_differentiable({arg_out}); if (optional_out.has_value()) ctx->mark_dirty({optional_out.value()}); return {out, arg_out}; } static variable_list backward(AutogradContext *ctx, variable_list grad_outs) { auto grad_out = grad_outs[0]; auto saved = ctx->get_saved_variables(); auto index = saved[0]; auto arg_out = saved[1]; auto src_shape = list2vec(ctx->saved_data["src_shape"].toIntList()); src_shape[index.dim() - 1] += 1; auto grad_in = torch::zeros(src_shape, grad_out.options()); grad_in.scatter_(index.dim() - 1, arg_out, grad_out); grad_in = grad_in.narrow(index.dim() - 1, 0, src_shape[index.dim() - 1] - 1); return {grad_in, Variable(), Variable(), Variable()}; } }; class GatherCOO : public torch::autograd::Function { public: static variable_list forward(AutogradContext *ctx, Variable src, Variable index, torch::optional optional_out) { ctx->saved_data["src_shape"] = src.sizes(); auto out = gather_coo_fw(src, index, optional_out); ctx->save_for_backward({index}); if (optional_out.has_value()) ctx->mark_dirty({optional_out.value()}); return {out}; } static variable_list backward(AutogradContext *ctx, variable_list grad_outs) { auto grad_out = grad_outs[0]; auto saved = ctx->get_saved_variables(); auto index = saved[0]; auto src_shape = list2vec(ctx->saved_data["src_shape"].toIntList()); auto grad_in = torch::zeros(src_shape, grad_out.options()); segment_coo_fw(grad_out, index, grad_in, torch::nullopt, "sum"); return {grad_in, Variable(), Variable()}; } }; SCATTER_API torch::Tensor segment_sum_coo(torch::Tensor src, torch::Tensor index, torch::optional optional_out, torch::optional dim_size) { return SegmentSumCOO::apply(src, index, optional_out, dim_size)[0]; } SCATTER_API torch::Tensor segment_mean_coo(torch::Tensor src, torch::Tensor index, torch::optional optional_out, torch::optional dim_size) { return SegmentMeanCOO::apply(src, index, optional_out, dim_size)[0]; } SCATTER_API std::tuple segment_min_coo(torch::Tensor src, torch::Tensor index, torch::optional optional_out, torch::optional dim_size) { auto result = SegmentMinCOO::apply(src, index, optional_out, dim_size); return std::make_tuple(result[0], result[1]); } SCATTER_API std::tuple segment_max_coo(torch::Tensor src, torch::Tensor index, torch::optional optional_out, torch::optional dim_size) { auto result = SegmentMaxCOO::apply(src, index, optional_out, dim_size); return std::make_tuple(result[0], result[1]); } SCATTER_API torch::Tensor gather_coo(torch::Tensor src, torch::Tensor index, torch::optional optional_out) { return GatherCOO::apply(src, index, optional_out)[0]; } static auto registry = torch::RegisterOperators() .op("torch_scatter::segment_sum_coo", &segment_sum_coo) .op("torch_scatter::segment_mean_coo", &segment_mean_coo) .op("torch_scatter::segment_min_coo", &segment_min_coo) .op("torch_scatter::segment_max_coo", &segment_max_coo) .op("torch_scatter::gather_coo", &gather_coo); pytorch_scatter-2.1.2/csrc/segment_csr.cpp000066400000000000000000000210731450760104200207020ustar00rootroot00000000000000#ifdef WITH_PYTHON #include #endif #include #include "cpu/segment_csr_cpu.h" #include "macros.h" #include "utils.h" #ifdef WITH_CUDA #include "cuda/segment_csr_cuda.h" #endif #ifdef _WIN32 #ifdef WITH_PYTHON #ifdef WITH_CUDA PyMODINIT_FUNC PyInit__segment_csr_cuda(void) { return NULL; } #else PyMODINIT_FUNC PyInit__segment_csr_cpu(void) { return NULL; } #endif #endif #endif std::tuple> segment_csr_fw(torch::Tensor src, torch::Tensor indptr, torch::optional optional_out, std::string reduce) { if (src.device().is_cuda()) { #ifdef WITH_CUDA return segment_csr_cuda(src, indptr, optional_out, reduce); #else AT_ERROR("Not compiled with CUDA support"); #endif } else { return segment_csr_cpu(src, indptr, optional_out, reduce); } } torch::Tensor gather_csr_fw(torch::Tensor src, torch::Tensor indptr, torch::optional optional_out) { if (src.device().is_cuda()) { #ifdef WITH_CUDA return gather_csr_cuda(src, indptr, optional_out); #else AT_ERROR("Not compiled with CUDA support"); #endif } else { return gather_csr_cpu(src, indptr, optional_out); } } using torch::autograd::AutogradContext; using torch::autograd::Variable; using torch::autograd::variable_list; class SegmentSumCSR : public torch::autograd::Function { public: static variable_list forward(AutogradContext *ctx, Variable src, Variable indptr, torch::optional optional_out) { ctx->saved_data["src_shape"] = src.sizes(); auto out = std::get<0>(segment_csr_fw(src, indptr, optional_out, "sum")); ctx->save_for_backward({indptr}); if (optional_out.has_value()) ctx->mark_dirty({optional_out.value()}); return {out}; } static variable_list backward(AutogradContext *ctx, variable_list grad_outs) { auto grad_out = grad_outs[0]; auto saved = ctx->get_saved_variables(); auto indptr = saved[0]; auto src_shape = list2vec(ctx->saved_data["src_shape"].toIntList()); auto grad_in = torch::empty(src_shape, grad_out.options()); gather_csr_fw(grad_out, indptr, grad_in); return {grad_in, Variable(), Variable()}; } }; class SegmentMeanCSR : public torch::autograd::Function { public: static variable_list forward(AutogradContext *ctx, Variable src, Variable indptr, torch::optional optional_out) { ctx->saved_data["src_shape"] = src.sizes(); auto out = std::get<0>(segment_csr_fw(src, indptr, optional_out, "mean")); ctx->save_for_backward({indptr}); if (optional_out.has_value()) ctx->mark_dirty({optional_out.value()}); return {out}; } static variable_list backward(AutogradContext *ctx, variable_list grad_outs) { auto grad_out = grad_outs[0]; auto saved = ctx->get_saved_variables(); auto indptr = saved[0]; auto src_shape = list2vec(ctx->saved_data["src_shape"].toIntList()); auto grad_in = torch::empty(src_shape, grad_out.options()); if (grad_in.numel() > 0) { gather_csr_fw(grad_out, indptr, grad_in); auto indptr1 = indptr.narrow(-1, 0, indptr.size(-1) - 1); auto indptr2 = indptr.narrow(-1, 1, indptr.size(-1) - 1); auto count = (indptr2 - indptr1).to(grad_in.options()); count = gather_csr_fw(count, indptr, torch::nullopt); for (auto i = 0; i < grad_out.dim() - indptr.dim(); i++) count = count.unsqueeze(-1); grad_in.true_divide_(count); } return {grad_in, Variable(), Variable()}; } }; class SegmentMinCSR : public torch::autograd::Function { public: static variable_list forward(AutogradContext *ctx, Variable src, Variable indptr, torch::optional optional_out) { ctx->saved_data["src_shape"] = src.sizes(); auto result = segment_csr_fw(src, indptr, optional_out, "min"); auto out = std::get<0>(result); auto arg_out = std::get<1>(result).value(); ctx->save_for_backward({indptr, arg_out}); ctx->mark_non_differentiable({arg_out}); if (optional_out.has_value()) ctx->mark_dirty({optional_out.value()}); return {out, arg_out}; } static variable_list backward(AutogradContext *ctx, variable_list grad_outs) { auto grad_out = grad_outs[0]; auto saved = ctx->get_saved_variables(); auto indptr = saved[0]; auto arg_out = saved[1]; auto src_shape = list2vec(ctx->saved_data["src_shape"].toIntList()); src_shape[indptr.dim() - 1] += 1; auto grad_in = torch::zeros(src_shape, grad_out.options()); grad_in.scatter_(indptr.dim() - 1, arg_out, grad_out); grad_in = grad_in.narrow(indptr.dim() - 1, 0, src_shape[indptr.dim() - 1] - 1); return {grad_in, Variable(), Variable()}; } }; class SegmentMaxCSR : public torch::autograd::Function { public: static variable_list forward(AutogradContext *ctx, Variable src, Variable indptr, torch::optional optional_out) { ctx->saved_data["src_shape"] = src.sizes(); auto result = segment_csr_fw(src, indptr, optional_out, "max"); auto out = std::get<0>(result); auto arg_out = std::get<1>(result).value(); ctx->save_for_backward({indptr, arg_out}); ctx->mark_non_differentiable({arg_out}); if (optional_out.has_value()) ctx->mark_dirty({optional_out.value()}); return {out, arg_out}; } static variable_list backward(AutogradContext *ctx, variable_list grad_outs) { auto grad_out = grad_outs[0]; auto saved = ctx->get_saved_variables(); auto indptr = saved[0]; auto arg_out = saved[1]; auto src_shape = list2vec(ctx->saved_data["src_shape"].toIntList()); src_shape[indptr.dim() - 1] += 1; auto grad_in = torch::zeros(src_shape, grad_out.options()); grad_in.scatter_(indptr.dim() - 1, arg_out, grad_out); grad_in = grad_in.narrow(indptr.dim() - 1, 0, src_shape[indptr.dim() - 1] - 1); return {grad_in, Variable(), Variable()}; } }; class GatherCSR : public torch::autograd::Function { public: static variable_list forward(AutogradContext *ctx, Variable src, Variable indptr, torch::optional optional_out) { ctx->saved_data["src_shape"] = src.sizes(); auto out = gather_csr_fw(src, indptr, optional_out); ctx->save_for_backward({indptr}); if (optional_out.has_value()) ctx->mark_dirty({optional_out.value()}); return {out}; } static variable_list backward(AutogradContext *ctx, variable_list grad_outs) { auto grad_out = grad_outs[0]; auto saved = ctx->get_saved_variables(); auto indptr = saved[0]; auto src_shape = list2vec(ctx->saved_data["src_shape"].toIntList()); auto grad_in = torch::empty(src_shape, grad_out.options()); segment_csr_fw(grad_out, indptr, grad_in, "sum"); return {grad_in, Variable(), Variable()}; } }; SCATTER_API torch::Tensor segment_sum_csr(torch::Tensor src, torch::Tensor indptr, torch::optional optional_out) { return SegmentSumCSR::apply(src, indptr, optional_out)[0]; } SCATTER_API torch::Tensor segment_mean_csr(torch::Tensor src, torch::Tensor indptr, torch::optional optional_out) { return SegmentMeanCSR::apply(src, indptr, optional_out)[0]; } SCATTER_API std::tuple segment_min_csr(torch::Tensor src, torch::Tensor indptr, torch::optional optional_out) { auto result = SegmentMinCSR::apply(src, indptr, optional_out); return std::make_tuple(result[0], result[1]); } SCATTER_API std::tuple segment_max_csr(torch::Tensor src, torch::Tensor indptr, torch::optional optional_out) { auto result = SegmentMaxCSR::apply(src, indptr, optional_out); return std::make_tuple(result[0], result[1]); } SCATTER_API torch::Tensor gather_csr(torch::Tensor src, torch::Tensor indptr, torch::optional optional_out) { return GatherCSR::apply(src, indptr, optional_out)[0]; } static auto registry = torch::RegisterOperators() .op("torch_scatter::segment_sum_csr", &segment_sum_csr) .op("torch_scatter::segment_mean_csr", &segment_mean_csr) .op("torch_scatter::segment_min_csr", &segment_min_csr) .op("torch_scatter::segment_max_csr", &segment_max_csr) .op("torch_scatter::gather_csr", &gather_csr); pytorch_scatter-2.1.2/csrc/utils.h000066400000000000000000000004341450760104200171740ustar00rootroot00000000000000#pragma once #include #include inline std::vector list2vec(const c10::List list) { std::vector result; result.reserve(list.size()); for (size_t i = 0; i < list.size(); i++) result.push_back(list[i]); return result; } pytorch_scatter-2.1.2/csrc/version.cpp000066400000000000000000000013561450760104200200600ustar00rootroot00000000000000#ifdef WITH_PYTHON #include #endif #include #include "scatter.h" #include "macros.h" #ifdef WITH_CUDA #ifdef USE_ROCM #include #else #include #endif #endif #ifdef _WIN32 #ifdef WITH_PYTHON #ifdef WITH_CUDA PyMODINIT_FUNC PyInit__version_cuda(void) { return NULL; } #else PyMODINIT_FUNC PyInit__version_cpu(void) { return NULL; } #endif #endif #endif namespace scatter { SCATTER_API int64_t cuda_version() noexcept { #ifdef WITH_CUDA #ifdef USE_ROCM return HIP_VERSION; #else return CUDA_VERSION; #endif #else return -1; #endif } } // namespace scatter static auto registry = torch::RegisterOperators().op( "torch_scatter::cuda_version", [] { return scatter::cuda_version(); }); pytorch_scatter-2.1.2/docs/000077500000000000000000000000001450760104200156605ustar00rootroot00000000000000pytorch_scatter-2.1.2/docs/.nojekyll000066400000000000000000000000001450760104200174760ustar00rootroot00000000000000pytorch_scatter-2.1.2/docs/Makefile000066400000000000000000000002701450760104200173170ustar00rootroot00000000000000SPHINXBUILD := sphinx-build SPHINXPROJ := pytorch_scatter SOURCEDIR := source BUILDDIR := build .PHONY: help Makefile %: Makefile @$(SPHINXBUILD) -M $@ "$(SOURCEDIR)" "$(BUILDDIR)" pytorch_scatter-2.1.2/docs/index.html000066400000000000000000000002411450760104200176520ustar00rootroot00000000000000 Redirect pytorch_scatter-2.1.2/docs/requirements.txt000066400000000000000000000001561450760104200211460ustar00rootroot00000000000000https://download.pytorch.org/whl/cpu/torch-1.11.0%2Bcpu-cp38-cp38-linux_x86_64.whl sphinx>=3 sphinx_rtd_theme pytorch_scatter-2.1.2/docs/source/000077500000000000000000000000001450760104200171605ustar00rootroot00000000000000pytorch_scatter-2.1.2/docs/source/_figures/000077500000000000000000000000001450760104200207635ustar00rootroot00000000000000pytorch_scatter-2.1.2/docs/source/_figures/add.svg000066400000000000000000001301771450760104200222450ustar00rootroot00000000000000 pytorch_scatter-2.1.2/docs/source/_figures/add.tex000066400000000000000000000003541450760104200222370ustar00rootroot00000000000000\def\indices{{0, 0, 1, 0, 2, 2, 3, 3}} \def\inputs{{5, 1, 7, 2, 3, 2, 1, 3}} \def\outputs{{8, 7, 5, 4}} \def\colors{{"cyan", "orange", "olive", "magenta"}} \def\numberInputs{7} \def\numberOutputs{3} \def\operation{add} \input{template} pytorch_scatter-2.1.2/docs/source/_figures/build.sh000077500000000000000000000002171450760104200224210ustar00rootroot00000000000000#!/bin/bash files=(add sub mul div mean max min std) for name in "${files[@]}"; do pdflatex "$name" pdf2svg "$name.pdf" "$name.svg" done pytorch_scatter-2.1.2/docs/source/_figures/div.svg000066400000000000000000001411731450760104200222750ustar00rootroot00000000000000 pytorch_scatter-2.1.2/docs/source/_figures/div.tex000066400000000000000000000004451450760104200222720ustar00rootroot00000000000000\def\indices{{0, 0, 1, 0, 2, 2, 3, 3}} \def\inputs{{5, 1, 7, 2, 3, 2, 1, 3}} \def\outputs{{"$\frac{1}{10}$", "$\frac{1}{7}$", "$\frac{1}{6}$", "$\frac{1}{3}$"}} \def\colors{{"cyan", "orange", "olive", "magenta"}} \def\numberInputs{7} \def\numberOutputs{3} \def\operation{div} \input{template} pytorch_scatter-2.1.2/docs/source/_figures/max.svg000066400000000000000000001303121450760104200222710ustar00rootroot00000000000000 pytorch_scatter-2.1.2/docs/source/_figures/max.tex000066400000000000000000000003541450760104200222740ustar00rootroot00000000000000\def\indices{{0, 0, 1, 0, 2, 2, 3, 3}} \def\inputs{{5, 1, 7, 2, 3, 2, 1, 3}} \def\outputs{{5, 7, 3, 3}} \def\colors{{"cyan", "orange", "olive", "magenta"}} \def\numberInputs{7} \def\numberOutputs{3} \def\operation{max} \input{template} pytorch_scatter-2.1.2/docs/source/_figures/mean.svg000066400000000000000000001516231450760104200224340ustar00rootroot00000000000000 pytorch_scatter-2.1.2/docs/source/_figures/mean.tex000066400000000000000000000004451450760104200224300ustar00rootroot00000000000000\def\indices{{0, 0, 1, 0, 2, 2, 3, 3}} \def\inputs{{5, 1, 7, 2, 3, 2, 1, 3}} \def\outputs{{"$\frac{8}{3}$", "$\frac{7}{1}$", "$\frac{5}{2}$", "$\frac{4}{2}$"}} \def\colors{{"cyan", "orange", "olive", "magenta"}} \def\numberInputs{7} \def\numberOutputs{3} \def\operation{mean} \input{template} pytorch_scatter-2.1.2/docs/source/_figures/min.svg000066400000000000000000001301151450760104200222700ustar00rootroot00000000000000 pytorch_scatter-2.1.2/docs/source/_figures/min.tex000066400000000000000000000004201450760104200222640ustar00rootroot00000000000000\def\indices{{0, 0, 1, 0, 2, 2, 3, 3}} \def\inputs{{"-5", "-1", "-7", "-2", "-3", "-2", "-1", "-3"}} \def\outputs{{"-5", "-7", "-3", "-3"}} \def\colors{{"cyan", "orange", "olive", "magenta"}} \def\numberInputs{7} \def\numberOutputs{3} \def\operation{min} \input{template} pytorch_scatter-2.1.2/docs/source/_figures/mul.svg000066400000000000000000001300111450760104200222750ustar00rootroot00000000000000 pytorch_scatter-2.1.2/docs/source/_figures/mul.tex000066400000000000000000000003551450760104200223050ustar00rootroot00000000000000\def\indices{{0, 0, 1, 0, 2, 2, 3, 3}} \def\inputs{{5, 1, 7, 2, 3, 2, 1, 3}} \def\outputs{{10, 7, 6, 3}} \def\colors{{"cyan", "orange", "olive", "magenta"}} \def\numberInputs{7} \def\numberOutputs{3} \def\operation{mul} \input{template} pytorch_scatter-2.1.2/docs/source/_figures/segment_coo.svg000066400000000000000000001277241450760104200240230ustar00rootroot00000000000000 pytorch_scatter-2.1.2/docs/source/_figures/segment_coo.tex000066400000000000000000000003541450760104200240110ustar00rootroot00000000000000\def\indices{{0, 0, 0, 1, 2, 2, 3, 3}} \def\inputs{{5, 1, 2, 7, 3, 2, 1, 3}} \def\outputs{{8, 7, 5, 4}} \def\colors{{"cyan", "orange", "olive", "magenta"}} \def\numberInputs{7} \def\numberOutputs{3} \def\operation{add} \input{template} pytorch_scatter-2.1.2/docs/source/_figures/std.svg000066400000000000000000001306751450760104200223120ustar00rootroot00000000000000 pytorch_scatter-2.1.2/docs/source/_figures/std.tex000066400000000000000000000003621450760104200223000ustar00rootroot00000000000000\def\indices{{0, 0, 1, 0, 2, 2, 3, 3}} \def\inputs{{5, 1, 7, 2, 3, 2, 1, 3}} \def\outputs{{2.1, 0, 0.7, 1.4}} \def\colors{{"cyan", "orange", "olive", "magenta"}} \def\numberInputs{7} \def\numberOutputs{3} \def\operation{std} \input{template} pytorch_scatter-2.1.2/docs/source/_figures/sub.svg000066400000000000000000001324601450760104200223030ustar00rootroot00000000000000 pytorch_scatter-2.1.2/docs/source/_figures/sub.tex000066400000000000000000000003701450760104200222760ustar00rootroot00000000000000\def\indices{{0, 0, 1, 0, 2, 2, 3, 3}} \def\inputs{{5, 1, 7, 2, 3, 2, 1, 3}} \def\outputs{{"-8", "-7", "-5", "-4"}} \def\colors{{"cyan", "orange", "olive", "magenta"}} \def\numberInputs{7} \def\numberOutputs{3} \def\operation{sub} \input{template} pytorch_scatter-2.1.2/docs/source/_figures/template.tex000066400000000000000000000027441450760104200233270ustar00rootroot00000000000000\documentclass[class=minimal]{standalone} \usepackage{tikz} \usetikzlibrary{shapes.geometric} \newcommand{\tiny}[1]{\scalebox{0.45}{#1}} \begin{document} \begin{tikzpicture} \tikzstyle{title}=[text width=1.1cm, inner sep=0pt] \tikzstyle{square}=[rectangle, draw, minimum width=0.5cm, minimum height=0.5cm, inner sep=0pt, fill opacity=0.5, text opacity=1] \tikzstyle{op}=[ellipse, draw, inner sep=-1pt, minimum height=9pt, minimum width=12pt] \tikzstyle{edge}=[->] \tikzstyle{round}=[out=-90, in=90, looseness=0.85] \node[title] at (-0.8, 2.2) {index}; \node[title] at (-0.8, 1.5) {input}; \foreach \i in {0,...,\numberInputs} { \pgfmathparse{\indices[\i]}\let\idx\pgfmathresult \pgfmathparse{\inputs[\i]}\let\in\pgfmathresult \pgfmathparse{\colors[\idx]}\let\co\pgfmathresult \node[square] (index\i) at (\i * 0.5, 2.2) {\idx}; \node[square, fill=\co] (input\i) at (\i * 0.5, 1.5) {\in}; \draw[edge] (index\i) -- (input\i); } \node[title] at (-0.8, 0.0) {out}; \foreach \i in {0,...,\numberOutputs} { \pgfmathparse{\outputs[\i]}\let\out\pgfmathresult \pgfmathparse{\colors[\i]}\let\co\pgfmathresult \def \x{(\numberInputs - \numberOutputs) * 0.25 + \i * 0.5} \node[op] (op\i) at ({\x}, 0.6) {\tiny{\operation}}; \node[square, fill=\co] (output\i) at ({\x}, 0.0) {\out}; \draw[edge] (op\i) -- (output\i); } \foreach \i in {0,...,\numberInputs} { \pgfmathparse{\indices[\i]}\let\idx\pgfmathresult \draw[edge] (input\i) to[round] (op\idx); } \end{tikzpicture} \end{document} pytorch_scatter-2.1.2/docs/source/conf.py000066400000000000000000000012531450760104200204600ustar00rootroot00000000000000import datetime import doctest import sphinx_rtd_theme import torch_scatter extensions = [ 'sphinx.ext.autodoc', 'sphinx.ext.intersphinx', 'sphinx.ext.mathjax', 'sphinx.ext.napoleon', 'sphinx.ext.viewcode', ] source_suffix = '.rst' master_doc = 'index' author = 'Matthias Fey' project = 'pytorch_scatter' copyright = '{}, {}'.format(datetime.datetime.now().year, author) version = torch_scatter.__version__ release = torch_scatter.__version__ html_theme = 'sphinx_rtd_theme' html_theme_path = [sphinx_rtd_theme.get_html_theme_path()] doctest_default_flags = doctest.NORMALIZE_WHITESPACE intersphinx_mapping = {'python': ('https://docs.python.org/', None)} pytorch_scatter-2.1.2/docs/source/functions/000077500000000000000000000000001450760104200211705ustar00rootroot00000000000000pytorch_scatter-2.1.2/docs/source/functions/composite.rst000066400000000000000000000002111450760104200237160ustar00rootroot00000000000000composite ========= .. currentmodule:: torch_scatter.composite .. automodule:: torch_scatter.composite :members: :undoc-members: pytorch_scatter-2.1.2/docs/source/functions/scatter.rst000066400000000000000000000001271450760104200233670ustar00rootroot00000000000000Scatter ======= .. automodule:: torch_scatter :noindex: .. autofunction:: scatter pytorch_scatter-2.1.2/docs/source/functions/segment_coo.rst000066400000000000000000000001431450760104200242220ustar00rootroot00000000000000Segment COO =========== .. automodule:: torch_scatter :noindex: .. autofunction:: segment_coo pytorch_scatter-2.1.2/docs/source/functions/segment_csr.rst000066400000000000000000000001431450760104200242310ustar00rootroot00000000000000Segment CSR =========== .. automodule:: torch_scatter :noindex: .. autofunction:: segment_csr pytorch_scatter-2.1.2/docs/source/index.rst000066400000000000000000000017411450760104200210240ustar00rootroot00000000000000:github_url: https://github.com/rusty1s/pytorch_scatter PyTorch Scatter Documentation ============================= This package consists of a small extension library of highly optimized sparse update (scatter and segment) operations for the use in `PyTorch `_, which are missing in the main package. Scatter and segment operations can be roughly described as reduce operations based on a given "group-index" tensor. Segment operations require the "group-index" tensor to be sorted, whereas scatter operations are not subject to these requirements. All included operations are broadcastable, work on varying data types, are implemented both for CPU and GPU with corresponding backward implementations, and are fully traceable. .. toctree:: :glob: :maxdepth: 0 :caption: Package reference functions/scatter functions/segment_coo functions/segment_csr functions/composite Indices and tables ================== * :ref:`genindex` * :ref:`modindex` pytorch_scatter-2.1.2/readthedocs.yml000066400000000000000000000003061450760104200177370ustar00rootroot00000000000000version: 2 build: image: latest python: version: 3.8 system_packages: true install: - requirements: docs/requirements.txt - method: setuptools path: . formats: [] pytorch_scatter-2.1.2/setup.cfg000066400000000000000000000007761450760104200165630ustar00rootroot00000000000000[metadata] long_description=file: README.md long_description_content_type=text/markdown classifiers = Development Status :: 5 - Production/Stable License :: OSI Approved :: MIT License Programming Language :: Python Programming Language :: Python :: 3.8 Programming Language :: Python :: 3.9 Programming Language :: Python :: 3.10 Programming Language :: Python :: 3.11 Programming Language :: Python :: 3 :: Only [aliases] test = pytest [tool:pytest] addopts = --capture=no pytorch_scatter-2.1.2/setup.py000066400000000000000000000111101450760104200164340ustar00rootroot00000000000000import glob import os import os.path as osp import platform import sys from itertools import product import torch from setuptools import find_packages, setup from torch.__config__ import parallel_info from torch.utils.cpp_extension import (CUDA_HOME, BuildExtension, CppExtension, CUDAExtension) __version__ = '2.1.2' URL = 'https://github.com/rusty1s/pytorch_scatter' WITH_CUDA = False if torch.cuda.is_available(): WITH_CUDA = CUDA_HOME is not None or torch.version.hip suffices = ['cpu', 'cuda'] if WITH_CUDA else ['cpu'] if os.getenv('FORCE_CUDA', '0') == '1': suffices = ['cuda', 'cpu'] if os.getenv('FORCE_ONLY_CUDA', '0') == '1': suffices = ['cuda'] if os.getenv('FORCE_ONLY_CPU', '0') == '1': suffices = ['cpu'] BUILD_DOCS = os.getenv('BUILD_DOCS', '0') == '1' WITH_SYMBOLS = os.getenv('WITH_SYMBOLS', '0') == '1' def get_extensions(): extensions = [] extensions_dir = osp.join('csrc') main_files = glob.glob(osp.join(extensions_dir, '*.cpp')) # remove generated 'hip' files, in case of rebuilds main_files = [path for path in main_files if 'hip' not in path] for main, suffix in product(main_files, suffices): define_macros = [('WITH_PYTHON', None)] undef_macros = [] if sys.platform == 'win32': define_macros += [('torchscatter_EXPORTS', None)] extra_compile_args = {'cxx': ['-O3']} if not os.name == 'nt': # Not on Windows: extra_compile_args['cxx'] += ['-Wno-sign-compare'] extra_link_args = [] if WITH_SYMBOLS else ['-s'] info = parallel_info() if ('backend: OpenMP' in info and 'OpenMP not found' not in info and sys.platform != 'darwin'): extra_compile_args['cxx'] += ['-DAT_PARALLEL_OPENMP'] if sys.platform == 'win32': extra_compile_args['cxx'] += ['/openmp'] else: extra_compile_args['cxx'] += ['-fopenmp'] else: print('Compiling without OpenMP...') # Compile for mac arm64 if (sys.platform == 'darwin' and platform.machine() == 'arm64'): extra_compile_args['cxx'] += ['-arch', 'arm64'] extra_link_args += ['-arch', 'arm64'] if suffix == 'cuda': define_macros += [('WITH_CUDA', None)] nvcc_flags = os.getenv('NVCC_FLAGS', '') nvcc_flags = [] if nvcc_flags == '' else nvcc_flags.split(' ') nvcc_flags += ['-O3'] if torch.version.hip: # USE_ROCM was added to later versions of PyTorch. # Define here to support older PyTorch versions as well: define_macros += [('USE_ROCM', None)] undef_macros += ['__HIP_NO_HALF_CONVERSIONS__'] else: nvcc_flags += ['--expt-relaxed-constexpr'] extra_compile_args['nvcc'] = nvcc_flags name = main.split(os.sep)[-1][:-4] sources = [main] path = osp.join(extensions_dir, 'cpu', f'{name}_cpu.cpp') if osp.exists(path): sources += [path] path = osp.join(extensions_dir, 'cuda', f'{name}_cuda.cu') if suffix == 'cuda' and osp.exists(path): sources += [path] Extension = CppExtension if suffix == 'cpu' else CUDAExtension extension = Extension( f'torch_scatter._{name}_{suffix}', sources, include_dirs=[extensions_dir], define_macros=define_macros, undef_macros=undef_macros, extra_compile_args=extra_compile_args, extra_link_args=extra_link_args, ) extensions += [extension] return extensions install_requires = [] test_requires = [ 'pytest', 'pytest-cov', ] # work-around hipify abs paths include_package_data = True if torch.cuda.is_available() and torch.version.hip: include_package_data = False setup( name='torch_scatter', version=__version__, description='PyTorch Extension Library of Optimized Scatter Operations', author='Matthias Fey', author_email='matthias.fey@tu-dortmund.de', url=URL, download_url=f'{URL}/archive/{__version__}.tar.gz', keywords=['pytorch', 'scatter', 'segment', 'gather'], python_requires='>=3.8', install_requires=install_requires, extras_require={ 'test': test_requires, }, ext_modules=get_extensions() if not BUILD_DOCS else [], cmdclass={ 'build_ext': BuildExtension.with_options(no_python_abi_suffix=True, use_ninja=False) }, packages=find_packages(), include_package_data=include_package_data, ) pytorch_scatter-2.1.2/test/000077500000000000000000000000001450760104200157075ustar00rootroot00000000000000pytorch_scatter-2.1.2/test/composite/000077500000000000000000000000001450760104200177115ustar00rootroot00000000000000pytorch_scatter-2.1.2/test/composite/test_logsumexp.py000066400000000000000000000013611450760104200233460ustar00rootroot00000000000000import torch from torch_scatter import scatter_logsumexp def test_logsumexp(): inputs = torch.tensor([ 0.5, 0.5, 0.0, -2.1, 3.2, 7.0, -1.0, -100.0, ]) inputs.requires_grad_() index = torch.tensor([0, 0, 1, 1, 1, 2, 4, 4]) splits = [2, 3, 1, 0, 2] outputs = scatter_logsumexp(inputs, index) for src, out in zip(inputs.split(splits), outputs.unbind()): if src.numel() > 0: assert out.tolist() == torch.logsumexp(src, dim=0).tolist() else: assert out.item() == 0.0 outputs.backward(torch.randn_like(outputs)) jit = torch.jit.script(scatter_logsumexp) assert jit(inputs, index).tolist() == outputs.tolist() pytorch_scatter-2.1.2/test/composite/test_softmax.py000066400000000000000000000031621450760104200230050ustar00rootroot00000000000000import torch from torch_scatter import scatter_log_softmax, scatter_softmax def test_softmax(): src = torch.tensor([0.2, 0, 0.2, -2.1, 3.2, 7, -1, float('-inf')]) src.requires_grad_() index = torch.tensor([0, 1, 0, 1, 1, 2, 4, 4]) out = scatter_softmax(src, index) out0 = torch.softmax(torch.tensor([0.2, 0.2]), dim=-1) out1 = torch.softmax(torch.tensor([0, -2.1, 3.2]), dim=-1) out2 = torch.softmax(torch.tensor([7], dtype=torch.float), dim=-1) out4 = torch.softmax(torch.tensor([-1, float('-inf')]), dim=-1) expected = torch.stack([ out0[0], out1[0], out0[1], out1[1], out1[2], out2[0], out4[0], out4[1] ], dim=0) assert torch.allclose(out, expected) out.backward(torch.randn_like(out)) jit = torch.jit.script(scatter_softmax) assert jit(src, index).tolist() == out.tolist() def test_log_softmax(): src = torch.tensor([0.2, 0, 0.2, -2.1, 3.2, 7, -1, float('-inf')]) src.requires_grad_() index = torch.tensor([0, 1, 0, 1, 1, 2, 4, 4]) out = scatter_log_softmax(src, index) out0 = torch.log_softmax(torch.tensor([0.2, 0.2]), dim=-1) out1 = torch.log_softmax(torch.tensor([0, -2.1, 3.2]), dim=-1) out2 = torch.log_softmax(torch.tensor([7], dtype=torch.float), dim=-1) out4 = torch.log_softmax(torch.tensor([-1, float('-inf')]), dim=-1) expected = torch.stack([ out0[0], out1[0], out0[1], out1[1], out1[2], out2[0], out4[0], out4[1] ], dim=0) assert torch.allclose(out, expected) out.backward(torch.randn_like(out)) jit = torch.jit.script(scatter_log_softmax) assert jit(src, index).tolist() == out.tolist() pytorch_scatter-2.1.2/test/composite/test_std.py000066400000000000000000000011311450760104200221100ustar00rootroot00000000000000import torch from torch_scatter import scatter_std def test_std(): src = torch.tensor([[2, 0, 1, 4, 3], [0, 2, 1, 3, 4]], dtype=torch.float) src.requires_grad_() index = torch.tensor([[0, 0, 0, 0, 0], [1, 1, 1, 1, 1]], dtype=torch.long) out = scatter_std(src, index, dim=-1, unbiased=True) std = src.std(dim=-1, unbiased=True)[0] expected = torch.tensor([[std, 0], [0, std]]) assert torch.allclose(out, expected) out.backward(torch.randn_like(out)) jit = torch.jit.script(scatter_std) assert jit(src, index, dim=-1, unbiased=True).tolist() == out.tolist() pytorch_scatter-2.1.2/test/test_broadcasting.py000066400000000000000000000016601450760104200217630ustar00rootroot00000000000000from itertools import product import pytest import torch from torch_scatter import scatter from torch_scatter.testing import devices, reductions @pytest.mark.parametrize('reduce,device', product(reductions, devices)) def test_broadcasting(reduce, device): B, C, H, W = (4, 3, 8, 8) src = torch.randn((B, C, H, W), device=device) index = torch.randint(0, H, (H, )).to(device, torch.long) out = scatter(src, index, dim=2, dim_size=H, reduce=reduce) assert out.size() == (B, C, H, W) src = torch.randn((B, C, H, W), device=device) index = torch.randint(0, H, (B, 1, H, W)).to(device, torch.long) out = scatter(src, index, dim=2, dim_size=H, reduce=reduce) assert out.size() == (B, C, H, W) src = torch.randn((B, C, H, W), device=device) index = torch.randint(0, H, (H, )).to(device, torch.long) out = scatter(src, index, dim=2, dim_size=H, reduce=reduce) assert out.size() == (B, C, H, W) pytorch_scatter-2.1.2/test/test_gather.py000066400000000000000000000070421450760104200205750ustar00rootroot00000000000000from itertools import product import pytest import torch from torch.autograd import gradcheck from torch_scatter import gather_coo, gather_csr from torch_scatter.testing import devices, dtypes, tensor tests = [ { 'src': [1, 2, 3, 4], 'index': [0, 0, 1, 1, 1, 3], 'indptr': [0, 2, 5, 5, 6], 'expected': [1, 1, 2, 2, 2, 4], }, { 'src': [[1, 2], [3, 4], [5, 6], [7, 8]], 'index': [0, 0, 1, 1, 1, 3], 'indptr': [0, 2, 5, 5, 6], 'expected': [[1, 2], [1, 2], [3, 4], [3, 4], [3, 4], [7, 8]] }, { 'src': [[1, 3, 5, 7], [2, 4, 6, 8]], 'index': [[0, 0, 1, 1, 1, 3], [0, 0, 0, 1, 1, 2]], 'indptr': [[0, 2, 5, 5, 6], [0, 3, 5, 6, 6]], 'expected': [[1, 1, 3, 3, 3, 7], [2, 2, 2, 4, 4, 6]], }, { 'src': [[[1, 2], [3, 4], [5, 6]], [[7, 9], [10, 11], [12, 13]]], 'index': [[0, 0, 1], [0, 2, 2]], 'indptr': [[0, 2, 3, 3], [0, 1, 1, 3]], 'expected': [[[1, 2], [1, 2], [3, 4]], [[7, 9], [12, 13], [12, 13]]], }, { 'src': [[1], [2]], 'index': [[0, 0], [0, 0]], 'indptr': [[0, 2], [0, 2]], 'expected': [[1, 1], [2, 2]], }, { 'src': [[[1, 1]], [[2, 2]]], 'index': [[0, 0], [0, 0]], 'indptr': [[0, 2], [0, 2]], 'expected': [[[1, 1], [1, 1]], [[2, 2], [2, 2]]], }, ] @pytest.mark.parametrize('test,dtype,device', product(tests, dtypes, devices)) def test_forward(test, dtype, device): src = tensor(test['src'], dtype, device) index = tensor(test['index'], torch.long, device) indptr = tensor(test['indptr'], torch.long, device) expected = tensor(test['expected'], dtype, device) out = gather_csr(src, indptr) assert torch.all(out == expected) out = gather_coo(src, index) assert torch.all(out == expected) @pytest.mark.parametrize('test,device', product(tests, devices)) def test_backward(test, device): src = tensor(test['src'], torch.double, device) src.requires_grad_() index = tensor(test['index'], torch.long, device) indptr = tensor(test['indptr'], torch.long, device) assert gradcheck(gather_csr, (src, indptr, None)) is True assert gradcheck(gather_coo, (src, index, None)) is True @pytest.mark.parametrize('test,dtype,device', product(tests, dtypes, devices)) def test_out(test, dtype, device): src = tensor(test['src'], dtype, device) index = tensor(test['index'], torch.long, device) indptr = tensor(test['indptr'], torch.long, device) expected = tensor(test['expected'], dtype, device) size = list(src.size()) size[index.dim() - 1] = index.size(-1) out = src.new_full(size, -2) gather_csr(src, indptr, out) assert torch.all(out == expected) out.fill_(-2) gather_coo(src, index, out) assert torch.all(out == expected) @pytest.mark.parametrize('test,dtype,device', product(tests, dtypes, devices)) def test_non_contiguous(test, dtype, device): src = tensor(test['src'], dtype, device) index = tensor(test['index'], torch.long, device) indptr = tensor(test['indptr'], torch.long, device) expected = tensor(test['expected'], dtype, device) if src.dim() > 1: src = src.transpose(0, 1).contiguous().transpose(0, 1) if index.dim() > 1: index = index.transpose(0, 1).contiguous().transpose(0, 1) if indptr.dim() > 1: indptr = indptr.transpose(0, 1).contiguous().transpose(0, 1) out = gather_csr(src, indptr) assert torch.all(out == expected) out = gather_coo(src, index) assert torch.all(out == expected) pytorch_scatter-2.1.2/test/test_multi_gpu.py000066400000000000000000000025041450760104200213260ustar00rootroot00000000000000from itertools import product import pytest import torch import torch_scatter from torch_scatter.testing import dtypes, reductions, tensor tests = [ { 'src': [1, 2, 3, 4, 5, 6], 'index': [0, 0, 1, 1, 1, 3], 'indptr': [0, 2, 5, 5, 6], 'dim': 0, 'sum': [3, 12, 0, 6], 'add': [3, 12, 0, 6], 'mean': [1.5, 4, 0, 6], 'min': [1, 3, 0, 6], 'max': [2, 5, 0, 6], }, ] @pytest.mark.skipif(not torch.cuda.is_available(), reason='CUDA not available') @pytest.mark.skipif(torch.cuda.device_count() < 2, reason='No multiple GPUS') @pytest.mark.parametrize('test,reduce,dtype', product(tests, reductions, dtypes)) def test_forward(test, reduce, dtype): device = torch.device('cuda:1') src = tensor(test['src'], dtype, device) index = tensor(test['index'], torch.long, device) indptr = tensor(test['indptr'], torch.long, device) dim = test['dim'] expected = tensor(test[reduce], dtype, device) out = torch_scatter.scatter(src, index, dim, reduce=reduce) assert torch.all(out == expected) out = torch_scatter.segment_coo(src, index, reduce=reduce) assert torch.all(out == expected) out = torch_scatter.segment_csr(src, indptr, reduce=reduce) assert torch.all(out == expected) pytorch_scatter-2.1.2/test/test_scatter.py000066400000000000000000000140341450760104200207670ustar00rootroot00000000000000from itertools import product import pytest import torch import torch_scatter from torch.autograd import gradcheck from torch_scatter.testing import devices, dtypes, reductions, tensor reductions = reductions + ['mul'] tests = [ { 'src': [1, 3, 2, 4, 5, 6], 'index': [0, 1, 0, 1, 1, 3], 'dim': -1, 'sum': [3, 12, 0, 6], 'add': [3, 12, 0, 6], 'mul': [2, 60, 1, 6], 'mean': [1.5, 4, 0, 6], 'min': [1, 3, 0, 6], 'arg_min': [0, 1, 6, 5], 'max': [2, 5, 0, 6], 'arg_max': [2, 4, 6, 5], }, { 'src': [[1, 2], [5, 6], [3, 4], [7, 8], [9, 10], [11, 12]], 'index': [0, 1, 0, 1, 1, 3], 'dim': 0, 'sum': [[4, 6], [21, 24], [0, 0], [11, 12]], 'add': [[4, 6], [21, 24], [0, 0], [11, 12]], 'mul': [[1 * 3, 2 * 4], [5 * 7 * 9, 6 * 8 * 10], [1, 1], [11, 12]], 'mean': [[2, 3], [7, 8], [0, 0], [11, 12]], 'min': [[1, 2], [5, 6], [0, 0], [11, 12]], 'arg_min': [[0, 0], [1, 1], [6, 6], [5, 5]], 'max': [[3, 4], [9, 10], [0, 0], [11, 12]], 'arg_max': [[2, 2], [4, 4], [6, 6], [5, 5]], }, { 'src': [[1, 5, 3, 7, 9, 11], [2, 4, 8, 6, 10, 12]], 'index': [[0, 1, 0, 1, 1, 3], [0, 0, 1, 0, 1, 2]], 'dim': 1, 'sum': [[4, 21, 0, 11], [12, 18, 12, 0]], 'add': [[4, 21, 0, 11], [12, 18, 12, 0]], 'mul': [[1 * 3, 5 * 7 * 9, 1, 11], [2 * 4 * 6, 8 * 10, 12, 1]], 'mean': [[2, 7, 0, 11], [4, 9, 12, 0]], 'min': [[1, 5, 0, 11], [2, 8, 12, 0]], 'arg_min': [[0, 1, 6, 5], [0, 2, 5, 6]], 'max': [[3, 9, 0, 11], [6, 10, 12, 0]], 'arg_max': [[2, 4, 6, 5], [3, 4, 5, 6]], }, { 'src': [[[1, 2], [5, 6], [3, 4]], [[10, 11], [7, 9], [12, 13]]], 'index': [[0, 1, 0], [2, 0, 2]], 'dim': 1, 'sum': [[[4, 6], [5, 6], [0, 0]], [[7, 9], [0, 0], [22, 24]]], 'add': [[[4, 6], [5, 6], [0, 0]], [[7, 9], [0, 0], [22, 24]]], 'mul': [[[3, 8], [5, 6], [1, 1]], [[7, 9], [1, 1], [120, 11 * 13]]], 'mean': [[[2, 3], [5, 6], [0, 0]], [[7, 9], [0, 0], [11, 12]]], 'min': [[[1, 2], [5, 6], [0, 0]], [[7, 9], [0, 0], [10, 11]]], 'arg_min': [[[0, 0], [1, 1], [3, 3]], [[1, 1], [3, 3], [0, 0]]], 'max': [[[3, 4], [5, 6], [0, 0]], [[7, 9], [0, 0], [12, 13]]], 'arg_max': [[[2, 2], [1, 1], [3, 3]], [[1, 1], [3, 3], [2, 2]]], }, { 'src': [[1, 3], [2, 4]], 'index': [[0, 0], [0, 0]], 'dim': 1, 'sum': [[4], [6]], 'add': [[4], [6]], 'mul': [[3], [8]], 'mean': [[2], [3]], 'min': [[1], [2]], 'arg_min': [[0], [0]], 'max': [[3], [4]], 'arg_max': [[1], [1]], }, { 'src': [[[1, 1], [3, 3]], [[2, 2], [4, 4]]], 'index': [[0, 0], [0, 0]], 'dim': 1, 'sum': [[[4, 4]], [[6, 6]]], 'add': [[[4, 4]], [[6, 6]]], 'mul': [[[3, 3]], [[8, 8]]], 'mean': [[[2, 2]], [[3, 3]]], 'min': [[[1, 1]], [[2, 2]]], 'arg_min': [[[0, 0]], [[0, 0]]], 'max': [[[3, 3]], [[4, 4]]], 'arg_max': [[[1, 1]], [[1, 1]]], }, ] @pytest.mark.parametrize('test,reduce,dtype,device', product(tests, reductions, dtypes, devices)) def test_forward(test, reduce, dtype, device): src = tensor(test['src'], dtype, device) index = tensor(test['index'], torch.long, device) dim = test['dim'] expected = tensor(test[reduce], dtype, device) fn = getattr(torch_scatter, 'scatter_' + reduce) jit = torch.jit.script(fn) out1 = fn(src, index, dim) out2 = jit(src, index, dim) if isinstance(out1, tuple): out1, arg_out1 = out1 out2, arg_out2 = out2 arg_expected = tensor(test['arg_' + reduce], torch.long, device) assert torch.all(arg_out1 == arg_expected) assert arg_out1.tolist() == arg_out1.tolist() assert torch.all(out1 == expected) assert out1.tolist() == out2.tolist() @pytest.mark.parametrize('test,reduce,device', product(tests, reductions, devices)) def test_backward(test, reduce, device): src = tensor(test['src'], torch.double, device) src.requires_grad_() index = tensor(test['index'], torch.long, device) dim = test['dim'] assert gradcheck(torch_scatter.scatter, (src, index, dim, None, None, reduce)) @pytest.mark.parametrize('test,reduce,dtype,device', product(tests, reductions, dtypes, devices)) def test_out(test, reduce, dtype, device): src = tensor(test['src'], dtype, device) index = tensor(test['index'], torch.long, device) dim = test['dim'] expected = tensor(test[reduce], dtype, device) out = torch.full_like(expected, -2) getattr(torch_scatter, 'scatter_' + reduce)(src, index, dim, out) if reduce == 'sum' or reduce == 'add': expected = expected - 2 elif reduce == 'mul': expected = out # We can not really test this here. elif reduce == 'mean': expected = out # We can not really test this here. elif reduce == 'min': expected = expected.fill_(-2) elif reduce == 'max': expected[expected == 0] = -2 else: raise ValueError assert torch.all(out == expected) @pytest.mark.parametrize('test,reduce,dtype,device', product(tests, reductions, dtypes, devices)) def test_non_contiguous(test, reduce, dtype, device): src = tensor(test['src'], dtype, device) index = tensor(test['index'], torch.long, device) dim = test['dim'] expected = tensor(test[reduce], dtype, device) if src.dim() > 1: src = src.transpose(0, 1).contiguous().transpose(0, 1) if index.dim() > 1: index = index.transpose(0, 1).contiguous().transpose(0, 1) out = getattr(torch_scatter, 'scatter_' + reduce)(src, index, dim) if isinstance(out, tuple): out, arg_out = out arg_expected = tensor(test['arg_' + reduce], torch.long, device) assert torch.all(arg_out == arg_expected) assert torch.all(out == expected) pytorch_scatter-2.1.2/test/test_segment.py000066400000000000000000000157051450760104200207720ustar00rootroot00000000000000from itertools import product import pytest import torch import torch_scatter from torch.autograd import gradcheck from torch_scatter.testing import devices, dtypes, reductions, tensor tests = [ { 'src': [1, 2, 3, 4, 5, 6], 'index': [0, 0, 1, 1, 1, 3], 'indptr': [0, 2, 5, 5, 6], 'sum': [3, 12, 0, 6], 'add': [3, 12, 0, 6], 'mean': [1.5, 4, 0, 6], 'min': [1, 3, 0, 6], 'arg_min': [0, 2, 6, 5], 'max': [2, 5, 0, 6], 'arg_max': [1, 4, 6, 5], }, { 'src': [[1, 2], [3, 4], [5, 6], [7, 8], [9, 10], [11, 12]], 'index': [0, 0, 1, 1, 1, 3], 'indptr': [0, 2, 5, 5, 6], 'sum': [[4, 6], [21, 24], [0, 0], [11, 12]], 'add': [[4, 6], [21, 24], [0, 0], [11, 12]], 'mean': [[2, 3], [7, 8], [0, 0], [11, 12]], 'min': [[1, 2], [5, 6], [0, 0], [11, 12]], 'arg_min': [[0, 0], [2, 2], [6, 6], [5, 5]], 'max': [[3, 4], [9, 10], [0, 0], [11, 12]], 'arg_max': [[1, 1], [4, 4], [6, 6], [5, 5]], }, { 'src': [[1, 3, 5, 7, 9, 11], [2, 4, 6, 8, 10, 12]], 'index': [[0, 0, 1, 1, 1, 3], [0, 0, 0, 1, 1, 2]], 'indptr': [[0, 2, 5, 5, 6], [0, 3, 5, 6, 6]], 'sum': [[4, 21, 0, 11], [12, 18, 12, 0]], 'add': [[4, 21, 0, 11], [12, 18, 12, 0]], 'mean': [[2, 7, 0, 11], [4, 9, 12, 0]], 'min': [[1, 5, 0, 11], [2, 8, 12, 0]], 'arg_min': [[0, 2, 6, 5], [0, 3, 5, 6]], 'max': [[3, 9, 0, 11], [6, 10, 12, 0]], 'arg_max': [[1, 4, 6, 5], [2, 4, 5, 6]], }, { 'src': [[[1, 2], [3, 4], [5, 6]], [[7, 9], [10, 11], [12, 13]]], 'index': [[0, 0, 1], [0, 2, 2]], 'indptr': [[0, 2, 3, 3], [0, 1, 1, 3]], 'sum': [[[4, 6], [5, 6], [0, 0]], [[7, 9], [0, 0], [22, 24]]], 'add': [[[4, 6], [5, 6], [0, 0]], [[7, 9], [0, 0], [22, 24]]], 'mean': [[[2, 3], [5, 6], [0, 0]], [[7, 9], [0, 0], [11, 12]]], 'min': [[[1, 2], [5, 6], [0, 0]], [[7, 9], [0, 0], [10, 11]]], 'arg_min': [[[0, 0], [2, 2], [3, 3]], [[0, 0], [3, 3], [1, 1]]], 'max': [[[3, 4], [5, 6], [0, 0]], [[7, 9], [0, 0], [12, 13]]], 'arg_max': [[[1, 1], [2, 2], [3, 3]], [[0, 0], [3, 3], [2, 2]]], }, { 'src': [[1, 3], [2, 4]], 'index': [[0, 0], [0, 0]], 'indptr': [[0, 2], [0, 2]], 'sum': [[4], [6]], 'add': [[4], [6]], 'mean': [[2], [3]], 'min': [[1], [2]], 'arg_min': [[0], [0]], 'max': [[3], [4]], 'arg_max': [[1], [1]], }, { 'src': [[[1, 1], [3, 3]], [[2, 2], [4, 4]]], 'index': [[0, 0], [0, 0]], 'indptr': [[0, 2], [0, 2]], 'sum': [[[4, 4]], [[6, 6]]], 'add': [[[4, 4]], [[6, 6]]], 'mean': [[[2, 2]], [[3, 3]]], 'min': [[[1, 1]], [[2, 2]]], 'arg_min': [[[0, 0]], [[0, 0]]], 'max': [[[3, 3]], [[4, 4]]], 'arg_max': [[[1, 1]], [[1, 1]]], }, ] @pytest.mark.parametrize('test,reduce,dtype,device', product(tests, reductions, dtypes, devices)) def test_forward(test, reduce, dtype, device): src = tensor(test['src'], dtype, device) index = tensor(test['index'], torch.long, device) indptr = tensor(test['indptr'], torch.long, device) expected = tensor(test[reduce], dtype, device) fn = getattr(torch_scatter, 'segment_' + reduce + '_csr') jit = torch.jit.script(fn) out1 = fn(src, indptr) out2 = jit(src, indptr) if isinstance(out1, tuple): out1, arg_out1 = out1 out2, arg_out2 = out2 arg_expected = tensor(test['arg_' + reduce], torch.long, device) assert torch.all(arg_out1 == arg_expected) assert arg_out1.tolist() == arg_out2.tolist() assert torch.all(out1 == expected) assert out1.tolist() == out2.tolist() fn = getattr(torch_scatter, 'segment_' + reduce + '_coo') jit = torch.jit.script(fn) out1 = fn(src, index) out2 = jit(src, index) if isinstance(out1, tuple): out1, arg_out1 = out1 out2, arg_out2 = out2 arg_expected = tensor(test['arg_' + reduce], torch.long, device) assert torch.all(arg_out1 == arg_expected) assert arg_out1.tolist() == arg_out2.tolist() assert torch.all(out1 == expected) assert out1.tolist() == out2.tolist() @pytest.mark.parametrize('test,reduce,device', product(tests, reductions, devices)) def test_backward(test, reduce, device): src = tensor(test['src'], torch.double, device) src.requires_grad_() index = tensor(test['index'], torch.long, device) indptr = tensor(test['indptr'], torch.long, device) assert gradcheck(torch_scatter.segment_csr, (src, indptr, None, reduce)) assert gradcheck(torch_scatter.segment_coo, (src, index, None, None, reduce)) @pytest.mark.parametrize('test,reduce,dtype,device', product(tests, reductions, dtypes, devices)) def test_out(test, reduce, dtype, device): src = tensor(test['src'], dtype, device) index = tensor(test['index'], torch.long, device) indptr = tensor(test['indptr'], torch.long, device) expected = tensor(test[reduce], dtype, device) out = torch.full_like(expected, -2) getattr(torch_scatter, 'segment_' + reduce + '_csr')(src, indptr, out) assert torch.all(out == expected) out.fill_(-2) getattr(torch_scatter, 'segment_' + reduce + '_coo')(src, index, out) if reduce == 'sum' or reduce == 'add': expected = expected - 2 elif reduce == 'mean': expected = out # We can not really test this here. elif reduce == 'min': expected = expected.fill_(-2) elif reduce == 'max': expected[expected == 0] = -2 else: raise ValueError assert torch.all(out == expected) @pytest.mark.parametrize('test,reduce,dtype,device', product(tests, reductions, dtypes, devices)) def test_non_contiguous(test, reduce, dtype, device): src = tensor(test['src'], dtype, device) index = tensor(test['index'], torch.long, device) indptr = tensor(test['indptr'], torch.long, device) expected = tensor(test[reduce], dtype, device) if src.dim() > 1: src = src.transpose(0, 1).contiguous().transpose(0, 1) if index.dim() > 1: index = index.transpose(0, 1).contiguous().transpose(0, 1) if indptr.dim() > 1: indptr = indptr.transpose(0, 1).contiguous().transpose(0, 1) out = getattr(torch_scatter, 'segment_' + reduce + '_csr')(src, indptr) if isinstance(out, tuple): out, arg_out = out arg_expected = tensor(test['arg_' + reduce], torch.long, device) assert torch.all(arg_out == arg_expected) assert torch.all(out == expected) out = getattr(torch_scatter, 'segment_' + reduce + '_coo')(src, index) if isinstance(out, tuple): out, arg_out = out arg_expected = tensor(test['arg_' + reduce], torch.long, device) assert torch.all(arg_out == arg_expected) assert torch.all(out == expected) pytorch_scatter-2.1.2/test/test_zero_tensors.py000066400000000000000000000023171450760104200220570ustar00rootroot00000000000000from itertools import product import pytest import torch from torch_scatter import (gather_coo, gather_csr, scatter, segment_coo, segment_csr) from torch_scatter.testing import devices, grad_dtypes, reductions, tensor @pytest.mark.parametrize('reduce,dtype,device', product(reductions, grad_dtypes, devices)) def test_zero_elements(reduce, dtype, device): x = torch.randn(0, 0, 0, 16, dtype=dtype, device=device, requires_grad=True) index = tensor([], torch.long, device) indptr = tensor([], torch.long, device) out = scatter(x, index, dim=0, dim_size=0, reduce=reduce) out.backward(torch.randn_like(out)) assert out.size() == (0, 0, 0, 16) out = segment_coo(x, index, dim_size=0, reduce=reduce) out.backward(torch.randn_like(out)) assert out.size() == (0, 0, 0, 16) out = gather_coo(x, index) out.backward(torch.randn_like(out)) assert out.size() == (0, 0, 0, 16) out = segment_csr(x, indptr, reduce=reduce) out.backward(torch.randn_like(out)) assert out.size() == (0, 0, 0, 16) out = gather_csr(x, indptr) out.backward(torch.randn_like(out)) assert out.size() == (0, 0, 0, 16) pytorch_scatter-2.1.2/torch_scatter/000077500000000000000000000000001450760104200175745ustar00rootroot00000000000000pytorch_scatter-2.1.2/torch_scatter/__init__.py000066400000000000000000000104451450760104200217110ustar00rootroot00000000000000import importlib import os import os.path as osp import torch __version__ = '2.1.2' for library in ['_version', '_scatter', '_segment_csr', '_segment_coo']: cuda_spec = importlib.machinery.PathFinder().find_spec( f'{library}_cuda', [osp.dirname(__file__)]) cpu_spec = importlib.machinery.PathFinder().find_spec( f'{library}_cpu', [osp.dirname(__file__)]) spec = cuda_spec or cpu_spec if spec is not None: torch.ops.load_library(spec.origin) elif os.getenv('BUILD_DOCS', '0') != '1': # pragma: no cover raise ImportError(f"Could not find module '{library}_cpu' in " f"{osp.dirname(__file__)}") else: # pragma: no cover from .placeholder import cuda_version_placeholder torch.ops.torch_scatter.cuda_version = cuda_version_placeholder from .placeholder import scatter_placeholder torch.ops.torch_scatter.scatter_mul = scatter_placeholder from .placeholder import scatter_arg_placeholder torch.ops.torch_scatter.scatter_min = scatter_arg_placeholder torch.ops.torch_scatter.scatter_max = scatter_arg_placeholder from .placeholder import (gather_csr_placeholder, segment_csr_arg_placeholder, segment_csr_placeholder) torch.ops.torch_scatter.segment_sum_csr = segment_csr_placeholder torch.ops.torch_scatter.segment_mean_csr = segment_csr_placeholder torch.ops.torch_scatter.segment_min_csr = segment_csr_arg_placeholder torch.ops.torch_scatter.segment_max_csr = segment_csr_arg_placeholder torch.ops.torch_scatter.gather_csr = gather_csr_placeholder from .placeholder import (gather_coo_placeholder, segment_coo_arg_placeholder, segment_coo_placeholder) torch.ops.torch_scatter.segment_sum_coo = segment_coo_placeholder torch.ops.torch_scatter.segment_mean_coo = segment_coo_placeholder torch.ops.torch_scatter.segment_min_coo = segment_coo_arg_placeholder torch.ops.torch_scatter.segment_max_coo = segment_coo_arg_placeholder torch.ops.torch_scatter.gather_coo = gather_coo_placeholder cuda_version = torch.ops.torch_scatter.cuda_version() is_not_hip = torch.version.hip is None is_cuda = torch.version.cuda is not None if is_not_hip and is_cuda and cuda_version != -1: # pragma: no cover if cuda_version < 10000: major, minor = int(str(cuda_version)[0]), int(str(cuda_version)[2]) else: major, minor = int(str(cuda_version)[0:2]), int(str(cuda_version)[3]) t_major, t_minor = [int(x) for x in torch.version.cuda.split('.')] if t_major != major: raise RuntimeError( f'Detected that PyTorch and torch_scatter were compiled with ' f'different CUDA versions. PyTorch has CUDA version ' f'{t_major}.{t_minor} and torch_scatter has CUDA version ' f'{major}.{minor}. Please reinstall the torch_scatter that ' f'matches your PyTorch install.') from .scatter import scatter_sum, scatter_add, scatter_mul # noqa from .scatter import scatter_mean, scatter_min, scatter_max, scatter # noqa from .segment_csr import segment_sum_csr, segment_add_csr # noqa from .segment_csr import segment_mean_csr, segment_min_csr # noqa from .segment_csr import segment_max_csr, segment_csr, gather_csr # noqa from .segment_coo import segment_sum_coo, segment_add_coo # noqa from .segment_coo import segment_mean_coo, segment_min_coo # noqa from .segment_coo import segment_max_coo, segment_coo, gather_coo # noqa from .composite import scatter_std, scatter_logsumexp # noqa from .composite import scatter_softmax, scatter_log_softmax # noqa __all__ = [ 'scatter_sum', 'scatter_add', 'scatter_mul', 'scatter_mean', 'scatter_min', 'scatter_max', 'scatter', 'segment_sum_csr', 'segment_add_csr', 'segment_mean_csr', 'segment_min_csr', 'segment_max_csr', 'segment_csr', 'gather_csr', 'segment_sum_coo', 'segment_add_coo', 'segment_mean_coo', 'segment_min_coo', 'segment_max_coo', 'segment_coo', 'gather_coo', 'scatter_std', 'scatter_logsumexp', 'scatter_softmax', 'scatter_log_softmax', 'torch_scatter', '__version__', ] pytorch_scatter-2.1.2/torch_scatter/composite/000077500000000000000000000000001450760104200215765ustar00rootroot00000000000000pytorch_scatter-2.1.2/torch_scatter/composite/__init__.py000066400000000000000000000003551450760104200237120ustar00rootroot00000000000000from .std import scatter_std from .logsumexp import scatter_logsumexp from .softmax import scatter_log_softmax, scatter_softmax __all__ = [ 'scatter_std', 'scatter_logsumexp', 'scatter_softmax', 'scatter_log_softmax', ] pytorch_scatter-2.1.2/torch_scatter/composite/logsumexp.py000066400000000000000000000027411450760104200241770ustar00rootroot00000000000000from typing import Optional import torch from torch_scatter import scatter_sum, scatter_max from torch_scatter.utils import broadcast def scatter_logsumexp(src: torch.Tensor, index: torch.Tensor, dim: int = -1, out: Optional[torch.Tensor] = None, dim_size: Optional[int] = None, eps: float = 1e-12) -> torch.Tensor: if not torch.is_floating_point(src): raise ValueError('`scatter_logsumexp` can only be computed over ' 'tensors with floating point data types.') index = broadcast(index, src, dim) if out is not None: dim_size = out.size(dim) else: if dim_size is None: dim_size = int(index.max()) + 1 size = list(src.size()) size[dim] = dim_size max_value_per_index = torch.full(size, float('-inf'), dtype=src.dtype, device=src.device) scatter_max(src, index, dim, max_value_per_index, dim_size=dim_size)[0] max_per_src_element = max_value_per_index.gather(dim, index) recentered_score = src - max_per_src_element recentered_score.masked_fill_(torch.isnan(recentered_score), float('-inf')) if out is not None: out = out.sub_(max_value_per_index).exp_() sum_per_index = scatter_sum(recentered_score.exp_(), index, dim, out, dim_size) out = sum_per_index.add_(eps).log_().add_(max_value_per_index) return out.nan_to_num_(neginf=0.0) pytorch_scatter-2.1.2/torch_scatter/composite/softmax.py000066400000000000000000000035111450760104200236310ustar00rootroot00000000000000from typing import Optional import torch from torch_scatter import scatter_sum, scatter_max from torch_scatter.utils import broadcast def scatter_softmax(src: torch.Tensor, index: torch.Tensor, dim: int = -1, dim_size: Optional[int] = None) -> torch.Tensor: if not torch.is_floating_point(src): raise ValueError('`scatter_softmax` can only be computed over tensors ' 'with floating point data types.') index = broadcast(index, src, dim) max_value_per_index = scatter_max( src, index, dim=dim, dim_size=dim_size)[0] max_per_src_element = max_value_per_index.gather(dim, index) recentered_scores = src - max_per_src_element recentered_scores_exp = recentered_scores.exp_() sum_per_index = scatter_sum( recentered_scores_exp, index, dim, dim_size=dim_size) normalizing_constants = sum_per_index.gather(dim, index) return recentered_scores_exp.div(normalizing_constants) def scatter_log_softmax(src: torch.Tensor, index: torch.Tensor, dim: int = -1, eps: float = 1e-12, dim_size: Optional[int] = None) -> torch.Tensor: if not torch.is_floating_point(src): raise ValueError('`scatter_log_softmax` can only be computed over ' 'tensors with floating point data types.') index = broadcast(index, src, dim) max_value_per_index = scatter_max( src, index, dim=dim, dim_size=dim_size)[0] max_per_src_element = max_value_per_index.gather(dim, index) recentered_scores = src - max_per_src_element sum_per_index = scatter_sum( recentered_scores.exp(), index, dim, dim_size=dim_size) normalizing_constants = sum_per_index.add_(eps).log_().gather(dim, index) return recentered_scores.sub_(normalizing_constants) pytorch_scatter-2.1.2/torch_scatter/composite/std.py000066400000000000000000000020711450760104200227420ustar00rootroot00000000000000from typing import Optional import torch from torch_scatter import scatter_sum from torch_scatter.utils import broadcast def scatter_std(src: torch.Tensor, index: torch.Tensor, dim: int = -1, out: Optional[torch.Tensor] = None, dim_size: Optional[int] = None, unbiased: bool = True) -> torch.Tensor: if out is not None: dim_size = out.size(dim) if dim < 0: dim = src.dim() + dim count_dim = dim if index.dim() <= dim: count_dim = index.dim() - 1 ones = torch.ones(index.size(), dtype=src.dtype, device=src.device) count = scatter_sum(ones, index, count_dim, dim_size=dim_size) index = broadcast(index, src, dim) tmp = scatter_sum(src, index, dim, dim_size=dim_size) count = broadcast(count, tmp, dim).clamp(1) mean = tmp.div(count) var = (src - mean.gather(dim, index)) var = var * var out = scatter_sum(var, index, dim, out, dim_size) if unbiased: count = count.sub(1).clamp_(1) out = out.div(count + 1e-6).sqrt() return out pytorch_scatter-2.1.2/torch_scatter/placeholder.py000066400000000000000000000036631450760104200224400ustar00rootroot00000000000000from typing import Optional, Tuple import torch def cuda_version_placeholder() -> int: return -1 def scatter_placeholder(src: torch.Tensor, index: torch.Tensor, dim: int, out: Optional[torch.Tensor], dim_size: Optional[int]) -> torch.Tensor: raise ImportError return src def scatter_arg_placeholder(src: torch.Tensor, index: torch.Tensor, dim: int, out: Optional[torch.Tensor], dim_size: Optional[int] ) -> Tuple[torch.Tensor, torch.Tensor]: raise ImportError return src, index def segment_csr_placeholder(src: torch.Tensor, indptr: torch.Tensor, out: Optional[torch.Tensor]) -> torch.Tensor: raise ImportError return src def segment_csr_arg_placeholder(src: torch.Tensor, indptr: torch.Tensor, out: Optional[torch.Tensor] ) -> Tuple[torch.Tensor, torch.Tensor]: raise ImportError return src, indptr def gather_csr_placeholder(src: torch.Tensor, indptr: torch.Tensor, out: Optional[torch.Tensor]) -> torch.Tensor: raise ImportError return src def segment_coo_placeholder(src: torch.Tensor, index: torch.Tensor, out: Optional[torch.Tensor], dim_size: Optional[int]) -> torch.Tensor: raise ImportError return src def segment_coo_arg_placeholder(src: torch.Tensor, index: torch.Tensor, out: Optional[torch.Tensor], dim_size: Optional[int] ) -> Tuple[torch.Tensor, torch.Tensor]: raise ImportError return src, index def gather_coo_placeholder(src: torch.Tensor, index: torch.Tensor, out: Optional[torch.Tensor]) -> torch.Tensor: raise ImportError return src pytorch_scatter-2.1.2/torch_scatter/scatter.py000066400000000000000000000136041450760104200216170ustar00rootroot00000000000000from typing import Optional, Tuple import torch from .utils import broadcast def scatter_sum(src: torch.Tensor, index: torch.Tensor, dim: int = -1, out: Optional[torch.Tensor] = None, dim_size: Optional[int] = None) -> torch.Tensor: index = broadcast(index, src, dim) if out is None: size = list(src.size()) if dim_size is not None: size[dim] = dim_size elif index.numel() == 0: size[dim] = 0 else: size[dim] = int(index.max()) + 1 out = torch.zeros(size, dtype=src.dtype, device=src.device) return out.scatter_add_(dim, index, src) else: return out.scatter_add_(dim, index, src) def scatter_add(src: torch.Tensor, index: torch.Tensor, dim: int = -1, out: Optional[torch.Tensor] = None, dim_size: Optional[int] = None) -> torch.Tensor: return scatter_sum(src, index, dim, out, dim_size) def scatter_mul(src: torch.Tensor, index: torch.Tensor, dim: int = -1, out: Optional[torch.Tensor] = None, dim_size: Optional[int] = None) -> torch.Tensor: return torch.ops.torch_scatter.scatter_mul(src, index, dim, out, dim_size) def scatter_mean(src: torch.Tensor, index: torch.Tensor, dim: int = -1, out: Optional[torch.Tensor] = None, dim_size: Optional[int] = None) -> torch.Tensor: out = scatter_sum(src, index, dim, out, dim_size) dim_size = out.size(dim) index_dim = dim if index_dim < 0: index_dim = index_dim + src.dim() if index.dim() <= index_dim: index_dim = index.dim() - 1 ones = torch.ones(index.size(), dtype=src.dtype, device=src.device) count = scatter_sum(ones, index, index_dim, None, dim_size) count[count < 1] = 1 count = broadcast(count, out, dim) if out.is_floating_point(): out.true_divide_(count) else: out.div_(count, rounding_mode='floor') return out def scatter_min( src: torch.Tensor, index: torch.Tensor, dim: int = -1, out: Optional[torch.Tensor] = None, dim_size: Optional[int] = None) -> Tuple[torch.Tensor, torch.Tensor]: return torch.ops.torch_scatter.scatter_min(src, index, dim, out, dim_size) def scatter_max( src: torch.Tensor, index: torch.Tensor, dim: int = -1, out: Optional[torch.Tensor] = None, dim_size: Optional[int] = None) -> Tuple[torch.Tensor, torch.Tensor]: return torch.ops.torch_scatter.scatter_max(src, index, dim, out, dim_size) def scatter(src: torch.Tensor, index: torch.Tensor, dim: int = -1, out: Optional[torch.Tensor] = None, dim_size: Optional[int] = None, reduce: str = "sum") -> torch.Tensor: r""" | .. image:: https://raw.githubusercontent.com/rusty1s/pytorch_scatter/ master/docs/source/_figures/add.svg?sanitize=true :align: center :width: 400px | Reduces all values from the :attr:`src` tensor into :attr:`out` at the indices specified in the :attr:`index` tensor along a given axis :attr:`dim`. For each value in :attr:`src`, its output index is specified by its index in :attr:`src` for dimensions outside of :attr:`dim` and by the corresponding value in :attr:`index` for dimension :attr:`dim`. The applied reduction is defined via the :attr:`reduce` argument. Formally, if :attr:`src` and :attr:`index` are :math:`n`-dimensional tensors with size :math:`(x_0, ..., x_{i-1}, x_i, x_{i+1}, ..., x_{n-1})` and :attr:`dim` = `i`, then :attr:`out` must be an :math:`n`-dimensional tensor with size :math:`(x_0, ..., x_{i-1}, y, x_{i+1}, ..., x_{n-1})`. Moreover, the values of :attr:`index` must be between :math:`0` and :math:`y - 1`, although no specific ordering of indices is required. The :attr:`index` tensor supports broadcasting in case its dimensions do not match with :attr:`src`. For one-dimensional tensors with :obj:`reduce="sum"`, the operation computes .. math:: \mathrm{out}_i = \mathrm{out}_i + \sum_j~\mathrm{src}_j where :math:`\sum_j` is over :math:`j` such that :math:`\mathrm{index}_j = i`. .. note:: This operation is implemented via atomic operations on the GPU and is therefore **non-deterministic** since the order of parallel operations to the same value is undetermined. For floating-point variables, this results in a source of variance in the result. :param src: The source tensor. :param index: The indices of elements to scatter. :param dim: The axis along which to index. (default: :obj:`-1`) :param out: The destination tensor. :param dim_size: If :attr:`out` is not given, automatically create output with size :attr:`dim_size` at dimension :attr:`dim`. If :attr:`dim_size` is not given, a minimal sized output tensor according to :obj:`index.max() + 1` is returned. :param reduce: The reduce operation (:obj:`"sum"`, :obj:`"mul"`, :obj:`"mean"`, :obj:`"min"` or :obj:`"max"`). (default: :obj:`"sum"`) :rtype: :class:`Tensor` .. code-block:: python from torch_scatter import scatter src = torch.randn(10, 6, 64) index = torch.tensor([0, 1, 0, 1, 2, 1]) # Broadcasting in the first and last dim. out = scatter(src, index, dim=1, reduce="sum") print(out.size()) .. code-block:: torch.Size([10, 3, 64]) """ if reduce == 'sum' or reduce == 'add': return scatter_sum(src, index, dim, out, dim_size) if reduce == 'mul': return scatter_mul(src, index, dim, out, dim_size) elif reduce == 'mean': return scatter_mean(src, index, dim, out, dim_size) elif reduce == 'min': return scatter_min(src, index, dim, out, dim_size)[0] elif reduce == 'max': return scatter_max(src, index, dim, out, dim_size)[0] else: raise ValueError pytorch_scatter-2.1.2/torch_scatter/segment_coo.py000066400000000000000000000123661450760104200224600ustar00rootroot00000000000000from typing import Optional, Tuple import torch def segment_sum_coo(src: torch.Tensor, index: torch.Tensor, out: Optional[torch.Tensor] = None, dim_size: Optional[int] = None) -> torch.Tensor: return torch.ops.torch_scatter.segment_sum_coo(src, index, out, dim_size) def segment_add_coo(src: torch.Tensor, index: torch.Tensor, out: Optional[torch.Tensor] = None, dim_size: Optional[int] = None) -> torch.Tensor: return torch.ops.torch_scatter.segment_sum_coo(src, index, out, dim_size) def segment_mean_coo(src: torch.Tensor, index: torch.Tensor, out: Optional[torch.Tensor] = None, dim_size: Optional[int] = None) -> torch.Tensor: return torch.ops.torch_scatter.segment_mean_coo(src, index, out, dim_size) def segment_min_coo( src: torch.Tensor, index: torch.Tensor, out: Optional[torch.Tensor] = None, dim_size: Optional[int] = None) -> Tuple[torch.Tensor, torch.Tensor]: return torch.ops.torch_scatter.segment_min_coo(src, index, out, dim_size) def segment_max_coo( src: torch.Tensor, index: torch.Tensor, out: Optional[torch.Tensor] = None, dim_size: Optional[int] = None) -> Tuple[torch.Tensor, torch.Tensor]: return torch.ops.torch_scatter.segment_max_coo(src, index, out, dim_size) def segment_coo(src: torch.Tensor, index: torch.Tensor, out: Optional[torch.Tensor] = None, dim_size: Optional[int] = None, reduce: str = "sum") -> torch.Tensor: r""" | .. image:: https://raw.githubusercontent.com/rusty1s/pytorch_scatter/ master/docs/source/_figures/segment_coo.svg?sanitize=true :align: center :width: 400px | Reduces all values from the :attr:`src` tensor into :attr:`out` at the indices specified in the :attr:`index` tensor along the last dimension of :attr:`index`. For each value in :attr:`src`, its output index is specified by its index in :attr:`src` for dimensions outside of :obj:`index.dim() - 1` and by the corresponding value in :attr:`index` for dimension :obj:`index.dim() - 1`. The applied reduction is defined via the :attr:`reduce` argument. Formally, if :attr:`src` and :attr:`index` are :math:`n`-dimensional and :math:`m`-dimensional tensors with size :math:`(x_0, ..., x_{m-1}, x_m, x_{m+1}, ..., x_{n-1})` and :math:`(x_0, ..., x_{m-1}, x_m)`, respectively, then :attr:`out` must be an :math:`n`-dimensional tensor with size :math:`(x_0, ..., x_{m-1}, y, x_{m+1}, ..., x_{n-1})`. Moreover, the values of :attr:`index` must be between :math:`0` and :math:`y - 1` in ascending order. The :attr:`index` tensor supports broadcasting in case its dimensions do not match with :attr:`src`. For one-dimensional tensors with :obj:`reduce="sum"`, the operation computes .. math:: \mathrm{out}_i = \mathrm{out}_i + \sum_j~\mathrm{src}_j where :math:`\sum_j` is over :math:`j` such that :math:`\mathrm{index}_j = i`. In contrast to :meth:`scatter`, this method expects values in :attr:`index` **to be sorted** along dimension :obj:`index.dim() - 1`. Due to the use of sorted indices, :meth:`segment_coo` is usually faster than the more general :meth:`scatter` operation. .. note:: This operation is implemented via atomic operations on the GPU and is therefore **non-deterministic** since the order of parallel operations to the same value is undetermined. For floating-point variables, this results in a source of variance in the result. :param src: The source tensor. :param index: The sorted indices of elements to segment. The number of dimensions of :attr:`index` needs to be less than or equal to :attr:`src`. :param out: The destination tensor. :param dim_size: If :attr:`out` is not given, automatically create output with size :attr:`dim_size` at dimension :obj:`index.dim() - 1`. If :attr:`dim_size` is not given, a minimal sized output tensor according to :obj:`index.max() + 1` is returned. :param reduce: The reduce operation (:obj:`"sum"`, :obj:`"mean"`, :obj:`"min"` or :obj:`"max"`). (default: :obj:`"sum"`) :rtype: :class:`Tensor` .. code-block:: python from torch_scatter import segment_coo src = torch.randn(10, 6, 64) index = torch.tensor([0, 0, 1, 1, 1, 2]) index = index.view(1, -1) # Broadcasting in the first and last dim. out = segment_coo(src, index, reduce="sum") print(out.size()) .. code-block:: torch.Size([10, 3, 64]) """ if reduce == 'sum' or reduce == 'add': return segment_sum_coo(src, index, out, dim_size) elif reduce == 'mean': return segment_mean_coo(src, index, out, dim_size) elif reduce == 'min': return segment_min_coo(src, index, out, dim_size)[0] elif reduce == 'max': return segment_max_coo(src, index, out, dim_size)[0] else: raise ValueError def gather_coo(src: torch.Tensor, index: torch.Tensor, out: Optional[torch.Tensor] = None) -> torch.Tensor: return torch.ops.torch_scatter.gather_coo(src, index, out) pytorch_scatter-2.1.2/torch_scatter/segment_csr.py000066400000000000000000000101101450760104200224500ustar00rootroot00000000000000from typing import Optional, Tuple import torch def segment_sum_csr(src: torch.Tensor, indptr: torch.Tensor, out: Optional[torch.Tensor] = None) -> torch.Tensor: return torch.ops.torch_scatter.segment_sum_csr(src, indptr, out) def segment_add_csr(src: torch.Tensor, indptr: torch.Tensor, out: Optional[torch.Tensor] = None) -> torch.Tensor: return torch.ops.torch_scatter.segment_sum_csr(src, indptr, out) def segment_mean_csr(src: torch.Tensor, indptr: torch.Tensor, out: Optional[torch.Tensor] = None) -> torch.Tensor: return torch.ops.torch_scatter.segment_mean_csr(src, indptr, out) def segment_min_csr( src: torch.Tensor, indptr: torch.Tensor, out: Optional[torch.Tensor] = None ) -> Tuple[torch.Tensor, torch.Tensor]: return torch.ops.torch_scatter.segment_min_csr(src, indptr, out) def segment_max_csr( src: torch.Tensor, indptr: torch.Tensor, out: Optional[torch.Tensor] = None ) -> Tuple[torch.Tensor, torch.Tensor]: return torch.ops.torch_scatter.segment_max_csr(src, indptr, out) def segment_csr(src: torch.Tensor, indptr: torch.Tensor, out: Optional[torch.Tensor] = None, reduce: str = "sum") -> torch.Tensor: r""" Reduces all values from the :attr:`src` tensor into :attr:`out` within the ranges specified in the :attr:`indptr` tensor along the last dimension of :attr:`indptr`. For each value in :attr:`src`, its output index is specified by its index in :attr:`src` for dimensions outside of :obj:`indptr.dim() - 1` and by the corresponding range index in :attr:`indptr` for dimension :obj:`indptr.dim() - 1`. The applied reduction is defined via the :attr:`reduce` argument. Formally, if :attr:`src` and :attr:`indptr` are :math:`n`-dimensional and :math:`m`-dimensional tensors with size :math:`(x_0, ..., x_{m-1}, x_m, x_{m+1}, ..., x_{n-1})` and :math:`(x_0, ..., x_{m-2}, y)`, respectively, then :attr:`out` must be an :math:`n`-dimensional tensor with size :math:`(x_0, ..., x_{m-2}, y - 1, x_{m}, ..., x_{n-1})`. Moreover, the values of :attr:`indptr` must be between :math:`0` and :math:`x_m` in ascending order. The :attr:`indptr` tensor supports broadcasting in case its dimensions do not match with :attr:`src`. For one-dimensional tensors with :obj:`reduce="sum"`, the operation computes .. math:: \mathrm{out}_i = \sum_{j = \mathrm{indptr}[i]}^{\mathrm{indptr}[i+1]-1}~\mathrm{src}_j. Due to the use of index pointers, :meth:`segment_csr` is the fastest method to apply for grouped reductions. .. note:: In contrast to :meth:`scatter()` and :meth:`segment_coo`, this operation is **fully-deterministic**. :param src: The source tensor. :param indptr: The index pointers between elements to segment. The number of dimensions of :attr:`index` needs to be less than or equal to :attr:`src`. :param out: The destination tensor. :param reduce: The reduce operation (:obj:`"sum"`, :obj:`"mean"`, :obj:`"min"` or :obj:`"max"`). (default: :obj:`"sum"`) :rtype: :class:`Tensor` .. code-block:: python from torch_scatter import segment_csr src = torch.randn(10, 6, 64) indptr = torch.tensor([0, 2, 5, 6]) indptr = indptr.view(1, -1) # Broadcasting in the first and last dim. out = segment_csr(src, indptr, reduce="sum") print(out.size()) .. code-block:: torch.Size([10, 3, 64]) """ if reduce == 'sum' or reduce == 'add': return segment_sum_csr(src, indptr, out) elif reduce == 'mean': return segment_mean_csr(src, indptr, out) elif reduce == 'min': return segment_min_csr(src, indptr, out)[0] elif reduce == 'max': return segment_max_csr(src, indptr, out)[0] else: raise ValueError def gather_csr(src: torch.Tensor, indptr: torch.Tensor, out: Optional[torch.Tensor] = None) -> torch.Tensor: return torch.ops.torch_scatter.gather_csr(src, indptr, out) pytorch_scatter-2.1.2/torch_scatter/testing.py000066400000000000000000000007271450760104200216310ustar00rootroot00000000000000from typing import Any import torch reductions = ['sum', 'add', 'mean', 'min', 'max'] dtypes = [ torch.half, torch.bfloat16, torch.float, torch.double, torch.int, torch.long ] grad_dtypes = [torch.float, torch.double] devices = [torch.device('cpu')] if torch.cuda.is_available(): devices += [torch.device('cuda:0')] def tensor(x: Any, dtype: torch.dtype, device: torch.device): return None if x is None else torch.tensor(x, device=device).to(dtype) pytorch_scatter-2.1.2/torch_scatter/utils.py000066400000000000000000000005301450760104200213040ustar00rootroot00000000000000import torch def broadcast(src: torch.Tensor, other: torch.Tensor, dim: int): if dim < 0: dim = other.dim() + dim if src.dim() == 1: for _ in range(0, dim): src = src.unsqueeze(0) for _ in range(src.dim(), other.dim()): src = src.unsqueeze(-1) src = src.expand(other.size()) return src