pax_global_header00006660000000000000000000000064145115162370014517gustar00rootroot0000000000000052 comment=29cd22bf1a5b82fc06b108d6573f81302c5d6b12 pytorch_cluster-1.6.3/000077500000000000000000000000001451151623700147575ustar00rootroot00000000000000pytorch_cluster-1.6.3/.coveragerc000066400000000000000000000001771451151623700171050ustar00rootroot00000000000000[run] source=torch_cluster [report] exclude_lines = pragma: no cover torch.jit.script raise except is_cuda pytorch_cluster-1.6.3/.github/000077500000000000000000000000001451151623700163175ustar00rootroot00000000000000pytorch_cluster-1.6.3/.github/workflows/000077500000000000000000000000001451151623700203545ustar00rootroot00000000000000pytorch_cluster-1.6.3/.github/workflows/building-conda.yml000066400000000000000000000053341451151623700237630ustar00rootroot00000000000000name: Building Conda on: [workflow_dispatch] jobs: conda-build: runs-on: ${{ matrix.os }} strategy: fail-fast: false matrix: # We have trouble building for Windows - drop for now. os: [ubuntu-20.04, macos-11] # windows-2019 python-version: ['3.8', '3.9', '3.10', '3.11'] torch-version: [2.0.0, 2.1.0] cuda-version: ['cpu', 'cu117', 'cu118', 'cu121'] exclude: - torch-version: 2.0.0 cuda-version: 'cu121' - torch-version: 2.1.0 cuda-version: 'cu117' - os: macos-11 cuda-version: 'cu117' - os: macos-11 cuda-version: 'cu118' - os: macos-11 cuda-version: 'cu121' steps: - uses: actions/checkout@v2 - name: Set up Conda for Python ${{ matrix.python-version }} uses: conda-incubator/setup-miniconda@v2 with: python-version: ${{ matrix.python-version }} - name: Free Disk Space (Ubuntu) if: ${{ runner.os == 'Linux' }} uses: jlumbroso/free-disk-space@main - name: Install Conda packages run: | conda install conda-build conda-verify --yes shell: bash -l {0} - name: Install CUDA ${{ matrix.cuda-version }} if: ${{ matrix.cuda-version != 'cpu' }} run: | bash .github/workflows/cuda/${{ matrix.cuda-version }}-${{ runner.os }}.sh shell: bash - name: Build Conda package for CPU if: ${{ matrix.cuda-version == 'cpu' }} run: | FORCE_CUDA=0 TORCH_CUDA_ARCH_LIST=0 ./conda/pytorch-cluster/build_conda.sh ${{ matrix.python-version }} ${{ matrix.torch-version }} ${{ matrix.cuda-version }} shell: bash -l {0} - name: Build Conda package for GPU if: ${{ matrix.cuda-version != 'cpu' }} run: | source .github/workflows/cuda/${{ matrix.cuda-version }}-${{ runner.os }}-env.sh ./conda/pytorch-cluster/build_conda.sh ${{ matrix.python-version }} ${{ matrix.torch-version }} ${{ matrix.cuda-version }} shell: bash -l {0} - name: Publish Conda package on organization channel run: | conda install anaconda-client --yes anaconda upload --force --label main $HOME/conda-bld/*/*.tar.bz2 env: ANACONDA_API_TOKEN: ${{ secrets.PYG_ANACONDA_TOKEN }} shell: bash -l {0} - name: Publish Conda package on personal channel run: | conda install anaconda-client --yes anaconda upload --force --label main $HOME/conda-bld/*/*.tar.bz2 env: ANACONDA_API_TOKEN: ${{ secrets.RUSTY1S_ANACONDA_TOKEN }} shell: bash -l {0} pytorch_cluster-1.6.3/.github/workflows/building.yml000066400000000000000000000067321451151623700227040ustar00rootroot00000000000000name: Building Wheels on: [workflow_dispatch] jobs: wheel: runs-on: ${{ matrix.os }} strategy: fail-fast: false matrix: os: [ubuntu-20.04, macos-11, windows-2019] python-version: ['3.8', '3.9', '3.10', '3.11'] torch-version: [2.0.0, 2.1.0] cuda-version: ['cpu', 'cu117', 'cu118', 'cu121'] exclude: - torch-version: 2.0.0 cuda-version: 'cu121' - torch-version: 2.1.0 cuda-version: 'cu117' - os: macos-11 cuda-version: 'cu117' - os: macos-11 cuda-version: 'cu118' - os: macos-11 cuda-version: 'cu121' steps: - uses: actions/checkout@v2 - name: Set up Python ${{ matrix.python-version }} uses: actions/setup-python@v2 with: python-version: ${{ matrix.python-version }} - name: Upgrade pip run: | pip install --upgrade setuptools pip install scipy==1.10.1 # Python 3.8 support pip list - name: Free Disk Space (Ubuntu) if: ${{ runner.os == 'Linux' }} uses: jlumbroso/free-disk-space@main - name: Install CUDA ${{ matrix.cuda-version }} if: ${{ matrix.cuda-version != 'cpu' }} run: | bash .github/workflows/cuda/${{ matrix.cuda-version }}-${{ runner.os }}.sh - name: Install PyTorch ${{ matrix.torch-version }}+${{ matrix.cuda-version }} run: | pip install torch==${{ matrix.torch-version }} --extra-index-url https://download.pytorch.org/whl/${{ matrix.cuda-version }} python -c "import torch; print('PyTorch:', torch.__version__)" python -c "import torch; print('CUDA:', torch.version.cuda)" - name: Set version if: ${{ runner.os != 'macOS' }} run: | VERSION=`sed -n "s/^__version__ = '\(.*\)'/\1/p" torch_cluster/__init__.py` TORCH_VERSION=`echo "pt${{ matrix.torch-version }}" | sed "s/..$//" | sed "s/\.//g"` CUDA_VERSION=`echo ${{ matrix.cuda-version }}` echo "New version name: $VERSION+$TORCH_VERSION$CUDA_VERSION" sed -i "s/$VERSION/$VERSION+$TORCH_VERSION$CUDA_VERSION/" setup.py sed -i "s/$VERSION/$VERSION+$TORCH_VERSION$CUDA_VERSION/" torch_cluster/__init__.py shell: bash - name: Install main package for CPU if: ${{ matrix.cuda-version == 'cpu' }} run: | FORCE_ONLY_CPU=1 python setup.py develop shell: bash - name: Install main package for GPU if: ${{ matrix.cuda-version != 'cpu' }} run: | source .github/workflows/cuda/${{ matrix.cuda-version }}-${{ runner.os }}-env.sh python setup.py develop shell: bash - name: Test installation run: | python -c "import torch_cluster; print('torch-cluster:', torch_cluster.__version__)" - name: Build wheel run: | pip install wheel python setup.py bdist_wheel --dist-dir=dist - name: Configure AWS uses: aws-actions/configure-aws-credentials@v1 with: aws-access-key-id: ${{ secrets.AWS_ACCESS_KEY_ID }} aws-secret-access-key: ${{ secrets.AWS_SECRET_ACCESS_KEY }} aws-region: us-west-1 - name: Upload wheel run: | aws s3 sync dist s3://data.pyg.org/whl/torch-${{ matrix.torch-version }}+${{ matrix.cuda-version }} --grants read=uri=http://acs.amazonaws.com/groups/global/AllUsers pytorch_cluster-1.6.3/.github/workflows/cuda/000077500000000000000000000000001451151623700212705ustar00rootroot00000000000000pytorch_cluster-1.6.3/.github/workflows/cuda/cu101-Linux-env.sh000066400000000000000000000003131451151623700243350ustar00rootroot00000000000000#!/bin/bash CUDA_HOME=/usr/local/cuda-10.1 LD_LIBRARY_PATH=${CUDA_HOME}/lib64:${LD_LIBRARY_PATH} PATH=${CUDA_HOME}/bin:${PATH} export FORCE_CUDA=1 export TORCH_CUDA_ARCH_LIST="3.5;5.0+PTX;6.0;7.0;7.5" pytorch_cluster-1.6.3/.github/workflows/cuda/cu101-Linux.sh000077500000000000000000000013361451151623700235600ustar00rootroot00000000000000#!/bin/bash OS=ubuntu1804 wget -nv https://developer.download.nvidia.com/compute/cuda/repos/${OS}/x86_64/cuda-${OS}.pin sudo mv cuda-${OS}.pin /etc/apt/preferences.d/cuda-repository-pin-600 wget -nv https://developer.download.nvidia.com/compute/cuda/10.1/Prod/local_installers/cuda-repo-${OS}-10-1-local-10.1.243-418.87.00_1.0-1_amd64.deb sudo dpkg -i cuda-repo-${OS}-10-1-local-10.1.243-418.87.00_1.0-1_amd64.deb sudo apt-key add /var/cuda-repo-10-1-local-10.1.243-418.87.00/7fa2af80.pub sudo apt-get -qq update sudo apt install cuda-nvcc-10-1 cuda-libraries-dev-10-1 sudo apt clean rm -f https://developer.download.nvidia.com/compute/cuda/10.1/Prod/local_installers/cuda-repo-${OS}-10-1-local-10.1.243-418.87.00_1.0-1_amd64.deb pytorch_cluster-1.6.3/.github/workflows/cuda/cu101-Windows-env.sh000066400000000000000000000004341451151623700246740ustar00rootroot00000000000000#!/bin/bash CUDA_HOME=/c/Program\ Files/NVIDIA\ GPU\ Computing\ Toolkit/CUDA/v10.1 PATH=${CUDA_HOME}/bin:$PATH PATH=/c/Program\ Files\ \(x86\)/Microsoft\ Visual\ Studio/2017/BuildTools/MSBuild/15.0/Bin:$PATH export FORCE_CUDA=1 export TORCH_CUDA_ARCH_LIST="3.5;5.0+PTX;6.0;7.0;7.5" pytorch_cluster-1.6.3/.github/workflows/cuda/cu101-Windows.sh000077500000000000000000000020761451151623700241150ustar00rootroot00000000000000#!/bin/bash # Install NVIDIA drivers, see: # https://github.com/pytorch/vision/blob/master/packaging/windows/internal/cuda_install.bat#L99-L102 curl -k -L "https://drive.google.com/u/0/uc?id=1injUyo3lnarMgWyRcXqKg4UGnN0ysmuq&export=download" --output "/tmp/gpu_driver_dlls.zip" 7z x "/tmp/gpu_driver_dlls.zip" -o"/c/Windows/System32" export CUDA_SHORT=10.1 export CUDA_URL=https://developer.download.nvidia.com/compute/cuda/${CUDA_SHORT}/Prod/local_installers/ export CUDA_FILE=cuda_${CUDA_SHORT}.243_426.00_win10.exe # Install CUDA: curl -k -L "${CUDA_URL}/${CUDA_FILE}" --output "${CUDA_FILE}" echo "" echo "Installing from ${CUDA_FILE}..." PowerShell -Command "Start-Process -FilePath \"${CUDA_FILE}\" -ArgumentList \"-s nvcc_${CUDA_SHORT} cuobjdump_${CUDA_SHORT} nvprune_${CUDA_SHORT} cupti_${CUDA_SHORT} cublas_dev_${CUDA_SHORT} cudart_${CUDA_SHORT} cufft_dev_${CUDA_SHORT} curand_dev_${CUDA_SHORT} cusolver_dev_${CUDA_SHORT} cusparse_dev_${CUDA_SHORT} npp_dev_${CUDA_SHORT} nvrtc_dev_${CUDA_SHORT} nvml_dev_${CUDA_SHORT}\" -Wait -NoNewWindow" echo "Done!" rm -f "${CUDA_FILE}" pytorch_cluster-1.6.3/.github/workflows/cuda/cu102-Linux-env.sh000066400000000000000000000003131451151623700243360ustar00rootroot00000000000000#!/bin/bash CUDA_HOME=/usr/local/cuda-10.2 LD_LIBRARY_PATH=${CUDA_HOME}/lib64:${LD_LIBRARY_PATH} PATH=${CUDA_HOME}/bin:${PATH} export FORCE_CUDA=1 export TORCH_CUDA_ARCH_LIST="3.5;5.0+PTX;6.0;7.0;7.5" pytorch_cluster-1.6.3/.github/workflows/cuda/cu102-Linux.sh000077500000000000000000000013321451151623700235550ustar00rootroot00000000000000#!/bin/bash OS=ubuntu1804 wget -nv https://developer.download.nvidia.com/compute/cuda/repos/${OS}/x86_64/cuda-${OS}.pin sudo mv cuda-${OS}.pin /etc/apt/preferences.d/cuda-repository-pin-600 wget -nv https://developer.download.nvidia.com/compute/cuda/10.2/Prod/local_installers/cuda-repo-${OS}-10-2-local-10.2.89-440.33.01_1.0-1_amd64.deb sudo dpkg -i cuda-repo-${OS}-10-2-local-10.2.89-440.33.01_1.0-1_amd64.deb sudo apt-key add /var/cuda-repo-10-2-local-10.2.89-440.33.01/7fa2af80.pub sudo apt-get -qq update sudo apt install cuda-nvcc-10-2 cuda-libraries-dev-10-2 sudo apt clean rm -f https://developer.download.nvidia.com/compute/cuda/10.2/Prod/local_installers/cuda-repo-${OS}-10-2-local-10.2.89-440.33.01_1.0-1_amd64.deb pytorch_cluster-1.6.3/.github/workflows/cuda/cu102-Windows-env.sh000066400000000000000000000004341451151623700246750ustar00rootroot00000000000000#!/bin/bash CUDA_HOME=/c/Program\ Files/NVIDIA\ GPU\ Computing\ Toolkit/CUDA/v10.2 PATH=${CUDA_HOME}/bin:$PATH PATH=/c/Program\ Files\ \(x86\)/Microsoft\ Visual\ Studio/2017/BuildTools/MSBuild/15.0/Bin:$PATH export FORCE_CUDA=1 export TORCH_CUDA_ARCH_LIST="3.5;5.0+PTX;6.0;7.0;7.5" pytorch_cluster-1.6.3/.github/workflows/cuda/cu102-Windows.sh000077500000000000000000000020741451151623700241140ustar00rootroot00000000000000#!/bin/bash # Install NVIDIA drivers, see: # https://github.com/pytorch/vision/blob/master/packaging/windows/internal/cuda_install.bat#L99-L102 curl -k -L "https://drive.google.com/u/0/uc?id=1injUyo3lnarMgWyRcXqKg4UGnN0ysmuq&export=download" --output "/tmp/gpu_driver_dlls.zip" 7z x "/tmp/gpu_driver_dlls.zip" -o"/c/Windows/System32" export CUDA_SHORT=10.2 export CUDA_URL=https://developer.download.nvidia.com/compute/cuda/${CUDA_SHORT}/Prod/local_installers export CUDA_FILE=cuda_${CUDA_SHORT}.89_441.22_win10.exe # Install CUDA: curl -k -L "${CUDA_URL}/${CUDA_FILE}" --output "${CUDA_FILE}" echo "" echo "Installing from ${CUDA_FILE}..." PowerShell -Command "Start-Process -FilePath \"${CUDA_FILE}\" -ArgumentList \"-s nvcc_${CUDA_SHORT} cuobjdump_${CUDA_SHORT} nvprune_${CUDA_SHORT} cupti_${CUDA_SHORT} cublas_dev_${CUDA_SHORT} cudart_${CUDA_SHORT} cufft_dev_${CUDA_SHORT} curand_dev_${CUDA_SHORT} cusolver_dev_${CUDA_SHORT} cusparse_dev_${CUDA_SHORT} npp_dev_${CUDA_SHORT} nvrtc_dev_${CUDA_SHORT} nvml_dev_${CUDA_SHORT}\" -Wait -NoNewWindow" echo "Done!" rm -f "${CUDA_FILE}" pytorch_cluster-1.6.3/.github/workflows/cuda/cu111-Linux-env.sh000066400000000000000000000003231451151623700243370ustar00rootroot00000000000000#!/bin/bash CUDA_HOME=/usr/local/cuda-11.1 LD_LIBRARY_PATH=${CUDA_HOME}/lib64:${LD_LIBRARY_PATH} PATH=${CUDA_HOME}/bin:${PATH} export FORCE_CUDA=1 export TORCH_CUDA_ARCH_LIST="3.5;5.0+PTX;6.0;7.0;7.5;8.0;8.6" pytorch_cluster-1.6.3/.github/workflows/cuda/cu111-Linux.sh000077500000000000000000000012711451151623700235570ustar00rootroot00000000000000#!/bin/bash OS=ubuntu1804 wget -nv https://developer.download.nvidia.com/compute/cuda/repos/${OS}/x86_64/cuda-${OS}.pin sudo mv cuda-${OS}.pin /etc/apt/preferences.d/cuda-repository-pin-600 wget -nv https://developer.download.nvidia.com/compute/cuda/11.1.1/local_installers/cuda-repo-${OS}-11-1-local_11.1.1-455.32.00-1_amd64.deb sudo dpkg -i cuda-repo-${OS}-11-1-local_11.1.1-455.32.00-1_amd64.deb sudo apt-key add /var/cuda-repo-${OS}-11-1-local/7fa2af80.pub sudo apt-get -qq update sudo apt install cuda-nvcc-11-1 cuda-libraries-dev-11-1 sudo apt clean rm -f https://developer.download.nvidia.com/compute/cuda/11.1.1/local_installers/cuda-repo-${OS}-11-1-local_11.1.1-455.32.00-1_amd64.deb pytorch_cluster-1.6.3/.github/workflows/cuda/cu111-Windows-env.sh000066400000000000000000000004441451151623700246760ustar00rootroot00000000000000#!/bin/bash CUDA_HOME=/c/Program\ Files/NVIDIA\ GPU\ Computing\ Toolkit/CUDA/v11.1 PATH=${CUDA_HOME}/bin:$PATH PATH=/c/Program\ Files\ \(x86\)/Microsoft\ Visual\ Studio/2017/BuildTools/MSBuild/15.0/Bin:$PATH export FORCE_CUDA=1 export TORCH_CUDA_ARCH_LIST="3.5;5.0+PTX;6.0;7.0;7.5;8.0;8.6" pytorch_cluster-1.6.3/.github/workflows/cuda/cu111-Windows.sh000077500000000000000000000020701451151623700241100ustar00rootroot00000000000000#!/bin/bash # Install NVIDIA drivers, see: # https://github.com/pytorch/vision/blob/master/packaging/windows/internal/cuda_install.bat#L99-L102 curl -k -L "https://drive.google.com/u/0/uc?id=1injUyo3lnarMgWyRcXqKg4UGnN0ysmuq&export=download" --output "/tmp/gpu_driver_dlls.zip" 7z x "/tmp/gpu_driver_dlls.zip" -o"/c/Windows/System32" export CUDA_SHORT=11.1 export CUDA_URL=https://developer.download.nvidia.com/compute/cuda/${CUDA_SHORT}.1/local_installers export CUDA_FILE=cuda_${CUDA_SHORT}.1_456.81_win10.exe # Install CUDA: curl -k -L "${CUDA_URL}/${CUDA_FILE}" --output "${CUDA_FILE}" echo "" echo "Installing from ${CUDA_FILE}..." PowerShell -Command "Start-Process -FilePath \"${CUDA_FILE}\" -ArgumentList \"-s nvcc_${CUDA_SHORT} cuobjdump_${CUDA_SHORT} nvprune_${CUDA_SHORT} cupti_${CUDA_SHORT} cublas_dev_${CUDA_SHORT} cudart_${CUDA_SHORT} cufft_dev_${CUDA_SHORT} curand_dev_${CUDA_SHORT} cusolver_dev_${CUDA_SHORT} cusparse_dev_${CUDA_SHORT} npp_dev_${CUDA_SHORT} nvrtc_dev_${CUDA_SHORT} nvml_dev_${CUDA_SHORT}\" -Wait -NoNewWindow" echo "Done!" rm -f "${CUDA_FILE}" pytorch_cluster-1.6.3/.github/workflows/cuda/cu113-Linux-env.sh000066400000000000000000000003231451151623700243410ustar00rootroot00000000000000#!/bin/bash CUDA_HOME=/usr/local/cuda-11.3 LD_LIBRARY_PATH=${CUDA_HOME}/lib64:${LD_LIBRARY_PATH} PATH=${CUDA_HOME}/bin:${PATH} export FORCE_CUDA=1 export TORCH_CUDA_ARCH_LIST="3.5;5.0+PTX;6.0;7.0;7.5;8.0;8.6" pytorch_cluster-1.6.3/.github/workflows/cuda/cu113-Linux.sh000077500000000000000000000012711451151623700235610ustar00rootroot00000000000000#!/bin/bash OS=ubuntu1804 wget -nv https://developer.download.nvidia.com/compute/cuda/repos/${OS}/x86_64/cuda-${OS}.pin sudo mv cuda-${OS}.pin /etc/apt/preferences.d/cuda-repository-pin-600 wget -nv https://developer.download.nvidia.com/compute/cuda/11.3.0/local_installers/cuda-repo-${OS}-11-3-local_11.3.0-465.19.01-1_amd64.deb sudo dpkg -i cuda-repo-${OS}-11-3-local_11.3.0-465.19.01-1_amd64.deb sudo apt-key add /var/cuda-repo-${OS}-11-3-local/7fa2af80.pub sudo apt-get -qq update sudo apt install cuda-nvcc-11-3 cuda-libraries-dev-11-3 sudo apt clean rm -f https://developer.download.nvidia.com/compute/cuda/11.3.0/local_installers/cuda-repo-${OS}-11-3-local_11.3.0-465.19.01-1_amd64.deb pytorch_cluster-1.6.3/.github/workflows/cuda/cu113-Windows-env.sh000066400000000000000000000004441451151623700247000ustar00rootroot00000000000000#!/bin/bash CUDA_HOME=/c/Program\ Files/NVIDIA\ GPU\ Computing\ Toolkit/CUDA/v11.3 PATH=${CUDA_HOME}/bin:$PATH PATH=/c/Program\ Files\ \(x86\)/Microsoft\ Visual\ Studio/2017/BuildTools/MSBuild/15.0/Bin:$PATH export FORCE_CUDA=1 export TORCH_CUDA_ARCH_LIST="3.5;5.0+PTX;6.0;7.0;7.5;8.0;8.6" pytorch_cluster-1.6.3/.github/workflows/cuda/cu113-Windows.sh000077500000000000000000000021151451151623700241120ustar00rootroot00000000000000#!/bin/bash # Install NVIDIA drivers, see: # https://github.com/pytorch/vision/blob/master/packaging/windows/internal/cuda_install.bat#L99-L102 curl -k -L "https://drive.google.com/u/0/uc?id=1injUyo3lnarMgWyRcXqKg4UGnN0ysmuq&export=download" --output "/tmp/gpu_driver_dlls.zip" 7z x "/tmp/gpu_driver_dlls.zip" -o"/c/Windows/System32" export CUDA_SHORT=11.3 export CUDA_URL=https://developer.download.nvidia.com/compute/cuda/${CUDA_SHORT}.0/local_installers export CUDA_FILE=cuda_${CUDA_SHORT}.0_465.89_win10.exe # Install CUDA: curl -k -L "${CUDA_URL}/${CUDA_FILE}" --output "${CUDA_FILE}" echo "" echo "Installing from ${CUDA_FILE}..." PowerShell -Command "Start-Process -FilePath \"${CUDA_FILE}\" -ArgumentList \"-s nvcc_${CUDA_SHORT} cuobjdump_${CUDA_SHORT} nvprune_${CUDA_SHORT} cupti_${CUDA_SHORT} cublas_dev_${CUDA_SHORT} cudart_${CUDA_SHORT} cufft_dev_${CUDA_SHORT} curand_dev_${CUDA_SHORT} cusolver_dev_${CUDA_SHORT} cusparse_dev_${CUDA_SHORT} thrust_${CUDA_SHORT} npp_dev_${CUDA_SHORT} nvrtc_dev_${CUDA_SHORT} nvml_dev_${CUDA_SHORT}\" -Wait -NoNewWindow" echo "Done!" rm -f "${CUDA_FILE}" pytorch_cluster-1.6.3/.github/workflows/cuda/cu115-Linux-env.sh000066400000000000000000000003231451151623700243430ustar00rootroot00000000000000#!/bin/bash CUDA_HOME=/usr/local/cuda-11.5 LD_LIBRARY_PATH=${CUDA_HOME}/lib64:${LD_LIBRARY_PATH} PATH=${CUDA_HOME}/bin:${PATH} export FORCE_CUDA=1 export TORCH_CUDA_ARCH_LIST="3.5;5.0+PTX;6.0;7.0;7.5;8.0;8.6" pytorch_cluster-1.6.3/.github/workflows/cuda/cu115-Linux.sh000077500000000000000000000012711451151623700235630ustar00rootroot00000000000000#!/bin/bash OS=ubuntu1804 wget -nv https://developer.download.nvidia.com/compute/cuda/repos/${OS}/x86_64/cuda-${OS}.pin sudo mv cuda-${OS}.pin /etc/apt/preferences.d/cuda-repository-pin-600 wget -nv https://developer.download.nvidia.com/compute/cuda/11.5.2/local_installers/cuda-repo-${OS}-11-5-local_11.5.2-495.29.05-1_amd64.deb sudo dpkg -i cuda-repo-${OS}-11-5-local_11.5.2-495.29.05-1_amd64.deb sudo apt-key add /var/cuda-repo-${OS}-11-5-local/7fa2af80.pub sudo apt-get -qq update sudo apt install cuda-nvcc-11-5 cuda-libraries-dev-11-5 sudo apt clean rm -f https://developer.download.nvidia.com/compute/cuda/11.5.2/local_installers/cuda-repo-${OS}-11-5-local_11.5.2-495.29.05-1_amd64.deb pytorch_cluster-1.6.3/.github/workflows/cuda/cu115-Windows-env.sh000066400000000000000000000004141451151623700246770ustar00rootroot00000000000000#!/bin/bash CUDA_HOME=/c/Program\ Files/NVIDIA\ GPU\ Computing\ Toolkit/CUDA/v11.3 PATH=${CUDA_HOME}/bin:$PATH PATH=/c/Program\ Files\ \(x86\)/Microsoft\ Visual\ Studio/2017/BuildTools/MSBuild/15.0/Bin:$PATH export FORCE_CUDA=1 export TORCH_CUDA_ARCH_LIST="6.0+PTX" pytorch_cluster-1.6.3/.github/workflows/cuda/cu115-Windows.sh000077500000000000000000000022221451151623700241130ustar00rootroot00000000000000#!/bin/bash # TODO We currently use CUDA 11.3 to build CUDA 11.5 Windows wheels # Install NVIDIA drivers, see: # https://github.com/pytorch/vision/blob/master/packaging/windows/internal/cuda_install.bat#L99-L102 curl -k -L "https://drive.google.com/u/0/uc?id=1injUyo3lnarMgWyRcXqKg4UGnN0ysmuq&export=download" --output "/tmp/gpu_driver_dlls.zip" 7z x "/tmp/gpu_driver_dlls.zip" -o"/c/Windows/System32" export CUDA_SHORT=11.3 export CUDA_URL=https://developer.download.nvidia.com/compute/cuda/${CUDA_SHORT}.0/local_installers export CUDA_FILE=cuda_${CUDA_SHORT}.0_465.89_win10.exe # Install CUDA: curl -k -L "${CUDA_URL}/${CUDA_FILE}" --output "${CUDA_FILE}" echo "" echo "Installing from ${CUDA_FILE}..." PowerShell -Command "Start-Process -FilePath \"${CUDA_FILE}\" -ArgumentList \"-s nvcc_${CUDA_SHORT} cuobjdump_${CUDA_SHORT} nvprune_${CUDA_SHORT} cupti_${CUDA_SHORT} cublas_dev_${CUDA_SHORT} cudart_${CUDA_SHORT} cufft_dev_${CUDA_SHORT} curand_dev_${CUDA_SHORT} cusolver_dev_${CUDA_SHORT} cusparse_dev_${CUDA_SHORT} thrust_${CUDA_SHORT} npp_dev_${CUDA_SHORT} nvrtc_dev_${CUDA_SHORT} nvml_dev_${CUDA_SHORT}\" -Wait -NoNewWindow" echo "Done!" rm -f "${CUDA_FILE}" pytorch_cluster-1.6.3/.github/workflows/cuda/cu116-Linux-env.sh000066400000000000000000000003231451151623700243440ustar00rootroot00000000000000#!/bin/bash CUDA_HOME=/usr/local/cuda-11.6 LD_LIBRARY_PATH=${CUDA_HOME}/lib64:${LD_LIBRARY_PATH} PATH=${CUDA_HOME}/bin:${PATH} export FORCE_CUDA=1 export TORCH_CUDA_ARCH_LIST="3.5;5.0+PTX;6.0;7.0;7.5;8.0;8.6" pytorch_cluster-1.6.3/.github/workflows/cuda/cu116-Linux.sh000077500000000000000000000012711451151623700235640ustar00rootroot00000000000000#!/bin/bash OS=ubuntu1804 wget -nv https://developer.download.nvidia.com/compute/cuda/repos/${OS}/x86_64/cuda-${OS}.pin sudo mv cuda-${OS}.pin /etc/apt/preferences.d/cuda-repository-pin-600 wget -nv https://developer.download.nvidia.com/compute/cuda/11.6.2/local_installers/cuda-repo-${OS}-11-6-local_11.6.2-510.47.03-1_amd64.deb sudo dpkg -i cuda-repo-${OS}-11-6-local_11.6.2-510.47.03-1_amd64.deb sudo apt-key add /var/cuda-repo-${OS}-11-6-local/7fa2af80.pub sudo apt-get -qq update sudo apt install cuda-nvcc-11-6 cuda-libraries-dev-11-6 sudo apt clean rm -f https://developer.download.nvidia.com/compute/cuda/11.5.2/local_installers/cuda-repo-${OS}-11-6-local_11.6.2-510.47.03-1_amd64.deb pytorch_cluster-1.6.3/.github/workflows/cuda/cu116-Windows-env.sh000066400000000000000000000004141451151623700247000ustar00rootroot00000000000000#!/bin/bash CUDA_HOME=/c/Program\ Files/NVIDIA\ GPU\ Computing\ Toolkit/CUDA/v11.3 PATH=${CUDA_HOME}/bin:$PATH PATH=/c/Program\ Files\ \(x86\)/Microsoft\ Visual\ Studio/2017/BuildTools/MSBuild/15.0/Bin:$PATH export FORCE_CUDA=1 export TORCH_CUDA_ARCH_LIST="6.0+PTX" pytorch_cluster-1.6.3/.github/workflows/cuda/cu116-Windows.sh000077500000000000000000000022221451151623700241140ustar00rootroot00000000000000#!/bin/bash # TODO We currently use CUDA 11.3 to build CUDA 11.6 Windows wheels # Install NVIDIA drivers, see: # https://github.com/pytorch/vision/blob/master/packaging/windows/internal/cuda_install.bat#L99-L102 curl -k -L "https://drive.google.com/u/0/uc?id=1injUyo3lnarMgWyRcXqKg4UGnN0ysmuq&export=download" --output "/tmp/gpu_driver_dlls.zip" 7z x "/tmp/gpu_driver_dlls.zip" -o"/c/Windows/System32" export CUDA_SHORT=11.3 export CUDA_URL=https://developer.download.nvidia.com/compute/cuda/${CUDA_SHORT}.0/local_installers export CUDA_FILE=cuda_${CUDA_SHORT}.0_465.89_win10.exe # Install CUDA: curl -k -L "${CUDA_URL}/${CUDA_FILE}" --output "${CUDA_FILE}" echo "" echo "Installing from ${CUDA_FILE}..." PowerShell -Command "Start-Process -FilePath \"${CUDA_FILE}\" -ArgumentList \"-s nvcc_${CUDA_SHORT} cuobjdump_${CUDA_SHORT} nvprune_${CUDA_SHORT} cupti_${CUDA_SHORT} cublas_dev_${CUDA_SHORT} cudart_${CUDA_SHORT} cufft_dev_${CUDA_SHORT} curand_dev_${CUDA_SHORT} cusolver_dev_${CUDA_SHORT} cusparse_dev_${CUDA_SHORT} thrust_${CUDA_SHORT} npp_dev_${CUDA_SHORT} nvrtc_dev_${CUDA_SHORT} nvml_dev_${CUDA_SHORT}\" -Wait -NoNewWindow" echo "Done!" rm -f "${CUDA_FILE}" pytorch_cluster-1.6.3/.github/workflows/cuda/cu117-Linux-env.sh000066400000000000000000000003231451151623700243450ustar00rootroot00000000000000#!/bin/bash CUDA_HOME=/usr/local/cuda-11.7 LD_LIBRARY_PATH=${CUDA_HOME}/lib64:${LD_LIBRARY_PATH} PATH=${CUDA_HOME}/bin:${PATH} export FORCE_CUDA=1 export TORCH_CUDA_ARCH_LIST="3.5;5.0+PTX;6.0;7.0;7.5;8.0;8.6" pytorch_cluster-1.6.3/.github/workflows/cuda/cu117-Linux.sh000077500000000000000000000013131451151623700235620ustar00rootroot00000000000000#!/bin/bash OS=ubuntu2004 wget -nv https://developer.download.nvidia.com/compute/cuda/repos/${OS}/x86_64/cuda-${OS}.pin sudo mv cuda-${OS}.pin /etc/apt/preferences.d/cuda-repository-pin-600 wget -nv https://developer.download.nvidia.com/compute/cuda/11.7.1/local_installers/cuda-repo-${OS}-11-7-local_11.7.1-515.65.01-1_amd64.deb sudo dpkg -i cuda-repo-${OS}-11-7-local_11.7.1-515.65.01-1_amd64.deb sudo cp /var/cuda-repo-${OS}-11-7-local/cuda-*-keyring.gpg /usr/share/keyrings/ sudo apt-get -qq update sudo apt install cuda-nvcc-11-7 cuda-libraries-dev-11-7 sudo apt clean rm -f https://developer.download.nvidia.com/compute/cuda/11.7.1/local_installers/cuda-repo-${OS}-11-7-local_11.7.1-515.65.01-1_amd64.deb pytorch_cluster-1.6.3/.github/workflows/cuda/cu117-Windows-env.sh000066400000000000000000000004141451151623700247010ustar00rootroot00000000000000#!/bin/bash CUDA_HOME=/c/Program\ Files/NVIDIA\ GPU\ Computing\ Toolkit/CUDA/v11.7 PATH=${CUDA_HOME}/bin:$PATH PATH=/c/Program\ Files\ \(x86\)/Microsoft\ Visual\ Studio/2017/BuildTools/MSBuild/15.0/Bin:$PATH export FORCE_CUDA=1 export TORCH_CUDA_ARCH_LIST="6.0+PTX" pytorch_cluster-1.6.3/.github/workflows/cuda/cu117-Windows.sh000077500000000000000000000021171451151623700241200ustar00rootroot00000000000000#!/bin/bash # Install NVIDIA drivers, see: # https://github.com/pytorch/vision/blob/master/packaging/windows/internal/cuda_install.bat#L99-L102 curl -k -L "https://drive.google.com/u/0/uc?id=1injUyo3lnarMgWyRcXqKg4UGnN0ysmuq&export=download" --output "/tmp/gpu_driver_dlls.zip" 7z x "/tmp/gpu_driver_dlls.zip" -o"/c/Windows/System32" export CUDA_SHORT=11.7 export CUDA_URL=https://developer.download.nvidia.com/compute/cuda/${CUDA_SHORT}.1/local_installers export CUDA_FILE=cuda_${CUDA_SHORT}.1_516.94_windows.exe # Install CUDA: curl -k -L "${CUDA_URL}/${CUDA_FILE}" --output "${CUDA_FILE}" echo "" echo "Installing from ${CUDA_FILE}..." PowerShell -Command "Start-Process -FilePath \"${CUDA_FILE}\" -ArgumentList \"-s nvcc_${CUDA_SHORT} cuobjdump_${CUDA_SHORT} nvprune_${CUDA_SHORT} cupti_${CUDA_SHORT} cublas_dev_${CUDA_SHORT} cudart_${CUDA_SHORT} cufft_dev_${CUDA_SHORT} curand_dev_${CUDA_SHORT} cusolver_dev_${CUDA_SHORT} cusparse_dev_${CUDA_SHORT} thrust_${CUDA_SHORT} npp_dev_${CUDA_SHORT} nvrtc_dev_${CUDA_SHORT} nvml_dev_${CUDA_SHORT}\" -Wait -NoNewWindow" echo "Done!" rm -f "${CUDA_FILE}" pytorch_cluster-1.6.3/.github/workflows/cuda/cu118-Linux-env.sh000066400000000000000000000003231451151623700243460ustar00rootroot00000000000000#!/bin/bash CUDA_HOME=/usr/local/cuda-11.8 LD_LIBRARY_PATH=${CUDA_HOME}/lib64:${LD_LIBRARY_PATH} PATH=${CUDA_HOME}/bin:${PATH} export FORCE_CUDA=1 export TORCH_CUDA_ARCH_LIST="3.5;5.0+PTX;6.0;7.0;7.5;8.0;8.6" pytorch_cluster-1.6.3/.github/workflows/cuda/cu118-Linux.sh000077500000000000000000000013131451151623700235630ustar00rootroot00000000000000#!/bin/bash OS=ubuntu2004 wget -nv https://developer.download.nvidia.com/compute/cuda/repos/${OS}/x86_64/cuda-${OS}.pin sudo mv cuda-${OS}.pin /etc/apt/preferences.d/cuda-repository-pin-600 wget -nv https://developer.download.nvidia.com/compute/cuda/11.8.0/local_installers/cuda-repo-${OS}-11-8-local_11.8.0-520.61.05-1_amd64.deb sudo dpkg -i cuda-repo-${OS}-11-8-local_11.8.0-520.61.05-1_amd64.deb sudo cp /var/cuda-repo-${OS}-11-8-local/cuda-*-keyring.gpg /usr/share/keyrings/ sudo apt-get -qq update sudo apt install cuda-nvcc-11-8 cuda-libraries-dev-11-8 sudo apt clean rm -f https://developer.download.nvidia.com/compute/cuda/11.8.0/local_installers/cuda-repo-${OS}-11-8-local_11.8.0-520.61.05-1_amd64.deb pytorch_cluster-1.6.3/.github/workflows/cuda/cu118-Windows-env.sh000066400000000000000000000004141451151623700247020ustar00rootroot00000000000000#!/bin/bash CUDA_HOME=/c/Program\ Files/NVIDIA\ GPU\ Computing\ Toolkit/CUDA/v11.8 PATH=${CUDA_HOME}/bin:$PATH PATH=/c/Program\ Files\ \(x86\)/Microsoft\ Visual\ Studio/2017/BuildTools/MSBuild/15.0/Bin:$PATH export FORCE_CUDA=1 export TORCH_CUDA_ARCH_LIST="6.0+PTX" pytorch_cluster-1.6.3/.github/workflows/cuda/cu118-Windows.sh000077500000000000000000000021171451151623700241210ustar00rootroot00000000000000#!/bin/bash # Install NVIDIA drivers, see: # https://github.com/pytorch/vision/blob/master/packaging/windows/internal/cuda_install.bat#L99-L102 curl -k -L "https://drive.google.com/u/0/uc?id=1injUyo3lnarMgWyRcXqKg4UGnN0ysmuq&export=download" --output "/tmp/gpu_driver_dlls.zip" 7z x "/tmp/gpu_driver_dlls.zip" -o"/c/Windows/System32" export CUDA_SHORT=11.8 export CUDA_URL=https://developer.download.nvidia.com/compute/cuda/${CUDA_SHORT}.0/local_installers export CUDA_FILE=cuda_${CUDA_SHORT}.0_522.06_windows.exe # Install CUDA: curl -k -L "${CUDA_URL}/${CUDA_FILE}" --output "${CUDA_FILE}" echo "" echo "Installing from ${CUDA_FILE}..." PowerShell -Command "Start-Process -FilePath \"${CUDA_FILE}\" -ArgumentList \"-s nvcc_${CUDA_SHORT} cuobjdump_${CUDA_SHORT} nvprune_${CUDA_SHORT} cupti_${CUDA_SHORT} cublas_dev_${CUDA_SHORT} cudart_${CUDA_SHORT} cufft_dev_${CUDA_SHORT} curand_dev_${CUDA_SHORT} cusolver_dev_${CUDA_SHORT} cusparse_dev_${CUDA_SHORT} thrust_${CUDA_SHORT} npp_dev_${CUDA_SHORT} nvrtc_dev_${CUDA_SHORT} nvml_dev_${CUDA_SHORT}\" -Wait -NoNewWindow" echo "Done!" rm -f "${CUDA_FILE}" pytorch_cluster-1.6.3/.github/workflows/cuda/cu121-Linux-env.sh000066400000000000000000000003231451151623700243400ustar00rootroot00000000000000#!/bin/bash CUDA_HOME=/usr/local/cuda-12.1 LD_LIBRARY_PATH=${CUDA_HOME}/lib64:${LD_LIBRARY_PATH} PATH=${CUDA_HOME}/bin:${PATH} export FORCE_CUDA=1 export TORCH_CUDA_ARCH_LIST="5.0+PTX;6.0;7.0;7.5;8.0;8.6;9.0" pytorch_cluster-1.6.3/.github/workflows/cuda/cu121-Linux.sh000077500000000000000000000013131451151623700235550ustar00rootroot00000000000000#!/bin/bash OS=ubuntu2004 wget -nv https://developer.download.nvidia.com/compute/cuda/repos/${OS}/x86_64/cuda-${OS}.pin sudo mv cuda-${OS}.pin /etc/apt/preferences.d/cuda-repository-pin-600 wget -nv https://developer.download.nvidia.com/compute/cuda/12.1.1/local_installers/cuda-repo-${OS}-12-1-local_12.1.1-530.30.02-1_amd64.deb sudo dpkg -i cuda-repo-${OS}-12-1-local_12.1.1-530.30.02-1_amd64.deb sudo cp /var/cuda-repo-${OS}-12-1-local/cuda-*-keyring.gpg /usr/share/keyrings/ sudo apt-get -qq update sudo apt install cuda-nvcc-12-1 cuda-libraries-dev-12-1 sudo apt clean rm -f https://developer.download.nvidia.com/compute/cuda/12.1.0/local_installers/cuda-repo-${OS}-12-1-local_12.1.1-530.30.02-1_amd64.deb pytorch_cluster-1.6.3/.github/workflows/cuda/cu121-Windows-env.sh000066400000000000000000000004141451151623700246740ustar00rootroot00000000000000#!/bin/bash CUDA_HOME=/c/Program\ Files/NVIDIA\ GPU\ Computing\ Toolkit/CUDA/v12.1 PATH=${CUDA_HOME}/bin:$PATH PATH=/c/Program\ Files\ \(x86\)/Microsoft\ Visual\ Studio/2017/BuildTools/MSBuild/15.0/Bin:$PATH export FORCE_CUDA=1 export TORCH_CUDA_ARCH_LIST="6.0+PTX" pytorch_cluster-1.6.3/.github/workflows/cuda/cu121-Windows.sh000077500000000000000000000021171451151623700241130ustar00rootroot00000000000000#!/bin/bash # Install NVIDIA drivers, see: # https://github.com/pytorch/vision/blob/master/packaging/windows/internal/cuda_install.bat#L99-L102 curl -k -L "https://drive.google.com/u/0/uc?id=1injUyo3lnarMgWyRcXqKg4UGnN0ysmuq&export=download" --output "/tmp/gpu_driver_dlls.zip" 7z x "/tmp/gpu_driver_dlls.zip" -o"/c/Windows/System32" export CUDA_SHORT=12.1 export CUDA_URL=https://developer.download.nvidia.com/compute/cuda/${CUDA_SHORT}.1/local_installers export CUDA_FILE=cuda_${CUDA_SHORT}.1_531.14_windows.exe # Install CUDA: curl -k -L "${CUDA_URL}/${CUDA_FILE}" --output "${CUDA_FILE}" echo "" echo "Installing from ${CUDA_FILE}..." PowerShell -Command "Start-Process -FilePath \"${CUDA_FILE}\" -ArgumentList \"-s nvcc_${CUDA_SHORT} cuobjdump_${CUDA_SHORT} nvprune_${CUDA_SHORT} cupti_${CUDA_SHORT} cublas_dev_${CUDA_SHORT} cudart_${CUDA_SHORT} cufft_dev_${CUDA_SHORT} curand_dev_${CUDA_SHORT} cusolver_dev_${CUDA_SHORT} cusparse_dev_${CUDA_SHORT} thrust_${CUDA_SHORT} npp_dev_${CUDA_SHORT} nvrtc_dev_${CUDA_SHORT} nvml_dev_${CUDA_SHORT}\" -Wait -NoNewWindow" echo "Done!" rm -f "${CUDA_FILE}" pytorch_cluster-1.6.3/.github/workflows/linting.yml000066400000000000000000000006371451151623700225510ustar00rootroot00000000000000name: Linting on: push: branches: - master pull_request: jobs: flake8: runs-on: ubuntu-latest steps: - uses: actions/checkout@v2 - name: Set up Python uses: actions/setup-python@v2 with: python-version: 3.8 - name: Install dependencies run: | pip install flake8 - name: Run linting run: | flake8 . pytorch_cluster-1.6.3/.github/workflows/stale.yml000066400000000000000000000014261451151623700222120ustar00rootroot00000000000000name: "Close stale issues and PRs" on: schedule: # Every day at 00:00 - cron: "0 0 * * *" workflow_dispatch: jobs: stale: runs-on: ubuntu-latest steps: - uses: actions/stale@v4.0.0 with: stale-issue-message: 'This issue had no activity for **6 months**. It will be closed in **2 weeks** unless there is some new activity. Is this issue already resolved?' stale-issue-label: 'stale' exempt-issue-labels: 'bug,enhancement,good first issue' stale-pr-message: 'This pull request had no activity for **6 months**. It will be closed in **2 weeks** unless there is some new activity.' stale-pr-label: 'stale' days-before-stale: 180 days-before-close: 14 operations-per-run: 200 pytorch_cluster-1.6.3/.github/workflows/testing.yml000066400000000000000000000021011451151623700225460ustar00rootroot00000000000000name: Testing on: push: branches: - master pull_request: jobs: pytest: runs-on: ${{ matrix.os }} strategy: fail-fast: false matrix: os: [ubuntu-latest, windows-latest] python-version: [3.8] torch-version: [2.0.0, 2.1.0] steps: - uses: actions/checkout@v2 - name: Set up Python ${{ matrix.python-version }} uses: actions/setup-python@v2 with: python-version: ${{ matrix.python-version }} - name: Install PyTorch ${{ matrix.torch-version }} run: | pip install torch==${{ matrix.torch-version }} --extra-index-url https://download.pytorch.org/whl/cpu - name: Install main package run: | pip install scipy==1.10.1 # Python 3.8 support python setup.py develop - name: Run test-suite run: | pip install pytest pytest-cov pytest --cov --cov-report=xml - name: Upload coverage uses: codecov/codecov-action@v1 if: success() with: fail_ci_if_error: false pytorch_cluster-1.6.3/.gitignore000066400000000000000000000001211451151623700167410ustar00rootroot00000000000000__pycache__/ _ext/ build/ dist/ alpha/ .cache/ .eggs/ *.egg-info/ .coverage *.so pytorch_cluster-1.6.3/CMakeLists.txt000066400000000000000000000054211451151623700175210ustar00rootroot00000000000000cmake_minimum_required(VERSION 3.0) project(torchcluster) set(CMAKE_CXX_STANDARD 14) set(TORCHCLUSTER_VERSION 1.6.3) option(WITH_CUDA "Enable CUDA support" OFF) option(WITH_PYTHON "Link to Python when building" ON) if(WITH_CUDA) enable_language(CUDA) add_definitions(-D__CUDA_NO_HALF_OPERATORS__) add_definitions(-DWITH_CUDA) set(CMAKE_CUDA_FLAGS "${CMAKE_CUDA_FLAGS} --expt-relaxed-constexpr") endif() if (WITH_PYTHON) add_definitions(-DWITH_PYTHON) find_package(Python3 COMPONENTS Development) endif() find_package(Torch REQUIRED) file(GLOB HEADERS csrc/*.h) file(GLOB OPERATOR_SOURCES csrc/*.* csrc/cpu/*.*) if(WITH_CUDA) file(GLOB OPERATOR_SOURCES ${OPERATOR_SOURCES} csrc/cuda/*.h csrc/cuda/*.cu) endif() add_library(${PROJECT_NAME} SHARED ${OPERATOR_SOURCES}) target_link_libraries(${PROJECT_NAME} PRIVATE ${TORCH_LIBRARIES}) if (WITH_PYTHON) target_link_libraries(${PROJECT_NAME} PRIVATE Python3::Python) endif() set_target_properties(${PROJECT_NAME} PROPERTIES EXPORT_NAME TorchCluster) target_include_directories(${PROJECT_NAME} INTERFACE "$" $) include(GNUInstallDirs) include(CMakePackageConfigHelpers) set(TORCHCLUSTER_CMAKECONFIG_INSTALL_DIR "share/cmake/TorchCluster" CACHE STRING "install path for TorchClusterConfig.cmake") configure_package_config_file(cmake/TorchClusterConfig.cmake.in "${CMAKE_CURRENT_BINARY_DIR}/TorchClusterConfig.cmake" INSTALL_DESTINATION ${TORCHCLUSTER_CMAKECONFIG_INSTALL_DIR}) write_basic_package_version_file(${CMAKE_CURRENT_BINARY_DIR}/TorchClusterConfigVersion.cmake VERSION ${TORCHCLUSTER_VERSION} COMPATIBILITY AnyNewerVersion) install(FILES ${CMAKE_CURRENT_BINARY_DIR}/TorchClusterConfig.cmake ${CMAKE_CURRENT_BINARY_DIR}/TorchClusterConfigVersion.cmake DESTINATION ${TORCHCLUSTER_CMAKECONFIG_INSTALL_DIR}) install(TARGETS ${PROJECT_NAME} EXPORT TorchClusterTargets LIBRARY DESTINATION ${CMAKE_INSTALL_LIBDIR} ) install(EXPORT TorchClusterTargets NAMESPACE TorchCluster:: DESTINATION ${TORCHCLUSTER_CMAKECONFIG_INSTALL_DIR}) install(FILES ${HEADERS} DESTINATION ${CMAKE_INSTALL_INCLUDEDIR}/${PROJECT_NAME}) install(FILES csrc/cpu/fps_cpu.h csrc/cpu/graclus_cpu.h csrc/cpu/grid_cpu.h csrc/cpu/rw_cpu.h csrc/cpu/sampler_cpu.h DESTINATION ${CMAKE_INSTALL_INCLUDEDIR}/${PROJECT_NAME}/cpu) if(WITH_CUDA) install(FILES csrc/cuda/fps_cuda.h csrc/cuda/graclus_cuda.h csrc/cuda/grid_cuda.h csrc/cuda/knn_cuda.h csrc/cuda/nearest_cuda.h csrc/cuda/radius_cuda.h csrc/cuda/rw_cuda.h DESTINATION ${CMAKE_INSTALL_INCLUDEDIR}/${PROJECT_NAME}/cuda) endif() if(WITH_CUDA) set_property(TARGET torch_cuda PROPERTY INTERFACE_COMPILE_OPTIONS "") set_property(TARGET torch_cpu PROPERTY INTERFACE_COMPILE_OPTIONS "") endif() pytorch_cluster-1.6.3/LICENSE000066400000000000000000000020761451151623700157710ustar00rootroot00000000000000Copyright (c) 2020 Matthias Fey Permission is hereby granted, free of charge, to any person obtaining a copy of this software and associated documentation files (the "Software"), to deal in the Software without restriction, including without limitation the rights to use, copy, modify, merge, publish, distribute, sublicense, and/or sell copies of the Software, and to permit persons to whom the Software is furnished to do so, subject to the following conditions: The above copyright notice and this permission notice shall be included in all copies or substantial portions of the Software. THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. pytorch_cluster-1.6.3/MANIFEST.in000066400000000000000000000001251451151623700165130ustar00rootroot00000000000000include README.md include LICENSE recursive-exclude test * recursive-include csrc * pytorch_cluster-1.6.3/README.md000066400000000000000000000237551451151623700162520ustar00rootroot00000000000000[pypi-image]: https://badge.fury.io/py/torch-cluster.svg [pypi-url]: https://pypi.python.org/pypi/torch-cluster [testing-image]: https://github.com/rusty1s/pytorch_cluster/actions/workflows/testing.yml/badge.svg [testing-url]: https://github.com/rusty1s/pytorch_cluster/actions/workflows/testing.yml [linting-image]: https://github.com/rusty1s/pytorch_cluster/actions/workflows/linting.yml/badge.svg [linting-url]: https://github.com/rusty1s/pytorch_cluster/actions/workflows/linting.yml [coverage-image]: https://codecov.io/gh/rusty1s/pytorch_cluster/branch/master/graph/badge.svg [coverage-url]: https://codecov.io/github/rusty1s/pytorch_cluster?branch=master # PyTorch Cluster [![PyPI Version][pypi-image]][pypi-url] [![Testing Status][testing-image]][testing-url] [![Linting Status][linting-image]][linting-url] [![Code Coverage][coverage-image]][coverage-url] -------------------------------------------------------------------------------- This package consists of a small extension library of highly optimized graph cluster algorithms for the use in [PyTorch](http://pytorch.org/). The package consists of the following clustering algorithms: * **[Graclus](#graclus)** from Dhillon *et al.*: [Weighted Graph Cuts without Eigenvectors: A Multilevel Approach](http://www.cs.utexas.edu/users/inderjit/public_papers/multilevel_pami.pdf) (PAMI 2007) * **[Voxel Grid Pooling](#voxelgrid)** from, *e.g.*, Simonovsky and Komodakis: [Dynamic Edge-Conditioned Filters in Convolutional Neural Networks on Graphs](https://arxiv.org/abs/1704.02901) (CVPR 2017) * **[Iterative Farthest Point Sampling](#farthestpointsampling)** from, *e.g.* Qi *et al.*: [PointNet++: Deep Hierarchical Feature Learning on Point Sets in a Metric Space](https://arxiv.org/abs/1706.02413) (NIPS 2017) * **[k-NN](#knn-graph)** and **[Radius](#radius-graph)** graph generation * Clustering based on **[Nearest](#nearest)** points * **[Random Walk Sampling](#randomwalk-sampling)** from, *e.g.*, Grover and Leskovec: [node2vec: Scalable Feature Learning for Networks](https://arxiv.org/abs/1607.00653) (KDD 2016) All included operations work on varying data types and are implemented both for CPU and GPU. ## Installation ### Anaconda **Update:** You can now install `pytorch-cluster` via [Anaconda](https://anaconda.org/pyg/pytorch-cluster) for all major OS/PyTorch/CUDA combinations 🤗 Given that you have [`pytorch >= 1.8.0` installed](https://pytorch.org/get-started/locally/), simply run ``` conda install pytorch-cluster -c pyg ``` ### Binaries We alternatively provide pip wheels for all major OS/PyTorch/CUDA combinations, see [here](https://data.pyg.org/whl). #### PyTorch 2.1 To install the binaries for PyTorch 2.1.0, simply run ``` pip install torch-cluster -f https://data.pyg.org/whl/torch-2.1.0+${CUDA}.html ``` where `${CUDA}` should be replaced by either `cpu`, `cu118`, or `cu121` depending on your PyTorch installation. | | `cpu` | `cu118` | `cu121` | |-------------|-------|---------|---------| | **Linux** | ✅ | ✅ | ✅ | | **Windows** | ✅ | ✅ | ✅ | | **macOS** | ✅ | | | #### PyTorch 2.0 To install the binaries for PyTorch 2.0.0, simply run ``` pip install torch-cluster -f https://data.pyg.org/whl/torch-2.0.0+${CUDA}.html ``` where `${CUDA}` should be replaced by either `cpu`, `cu117`, or `cu118` depending on your PyTorch installation. | | `cpu` | `cu117` | `cu118` | |-------------|-------|---------|---------| | **Linux** | ✅ | ✅ | ✅ | | **Windows** | ✅ | ✅ | ✅ | | **macOS** | ✅ | | | **Note:** Binaries of older versions are also provided for PyTorch 1.4.0, PyTorch 1.5.0, PyTorch 1.6.0, PyTorch 1.7.0/1.7.1, PyTorch 1.8.0/1.8.1, PyTorch 1.9.0, PyTorch 1.10.0/1.10.1/1.10.2, PyTorch 1.11.0, PyTorch 1.12.0/1.12.1 and PyTorch 1.13.0/1.13.1 (following the same procedure). For older versions, you need to explicitly specify the latest supported version number or install via `pip install --no-index` in order to prevent a manual installation from source. You can look up the latest supported version number [here](https://data.pyg.org/whl). ### From source Ensure that at least PyTorch 1.4.0 is installed and verify that `cuda/bin` and `cuda/include` are in your `$PATH` and `$CPATH` respectively, *e.g.*: ``` $ python -c "import torch; print(torch.__version__)" >>> 1.4.0 $ python -c "import torch; print(torch.__version__)" >>> 1.1.0 $ echo $PATH >>> /usr/local/cuda/bin:... $ echo $CPATH >>> /usr/local/cuda/include:... ``` Then run: ``` pip install torch-cluster ``` When running in a docker container without NVIDIA driver, PyTorch needs to evaluate the compute capabilities and may fail. In this case, ensure that the compute capabilities are set via `TORCH_CUDA_ARCH_LIST`, *e.g.*: ``` export TORCH_CUDA_ARCH_LIST = "6.0 6.1 7.2+PTX 7.5+PTX" ``` ## Functions ### Graclus A greedy clustering algorithm of picking an unmarked vertex and matching it with one its unmarked neighbors (that maximizes its edge weight). The GPU algorithm is adapted from Fagginger Auer and Bisseling: [A GPU Algorithm for Greedy Graph Matching](http://www.staff.science.uu.nl/~bisse101/Articles/match12.pdf) (LNCS 2012) ```python import torch from torch_cluster import graclus_cluster row = torch.tensor([0, 1, 1, 2]) col = torch.tensor([1, 0, 2, 1]) weight = torch.tensor([1., 1., 1., 1.]) # Optional edge weights. cluster = graclus_cluster(row, col, weight) ``` ``` print(cluster) tensor([0, 0, 1]) ``` ### VoxelGrid A clustering algorithm, which overlays a regular grid of user-defined size over a point cloud and clusters all points within a voxel. ```python import torch from torch_cluster import grid_cluster pos = torch.tensor([[0., 0.], [11., 9.], [2., 8.], [2., 2.], [8., 3.]]) size = torch.Tensor([5, 5]) cluster = grid_cluster(pos, size) ``` ``` print(cluster) tensor([0, 5, 3, 0, 1]) ``` ### FarthestPointSampling A sampling algorithm, which iteratively samples the most distant point with regard to the rest points. ```python import torch from torch_cluster import fps x = torch.tensor([[-1., -1.], [-1., 1.], [1., -1.], [1., 1.]]) batch = torch.tensor([0, 0, 0, 0]) index = fps(x, batch, ratio=0.5, random_start=False) ``` ``` print(index) tensor([0, 3]) ``` ### kNN-Graph Computes graph edges to the nearest *k* points. **Args:** * **x** *(Tensor)*: Node feature matrix of shape `[N, F]`. * **k** *(int)*: The number of neighbors. * **batch** *(LongTensor, optional)*: Batch vector of shape `[N]`, which assigns each node to a specific example. `batch` needs to be sorted. (default: `None`) * **loop** *(bool, optional)*: If `True`, the graph will contain self-loops. (default: `False`) * **flow** *(string, optional)*: The flow direction when using in combination with message passing (`"source_to_target"` or `"target_to_source"`). (default: `"source_to_target"`) * **cosine** *(boolean, optional)*: If `True`, will use the Cosine distance instead of Euclidean distance to find nearest neighbors. (default: `False`) * **num_workers** *(int)*: Number of workers to use for computation. Has no effect in case `batch` is not `None`, or the input lies on the GPU. (default: `1`) ```python import torch from torch_cluster import knn_graph x = torch.tensor([[-1., -1.], [-1., 1.], [1., -1.], [1., 1.]]) batch = torch.tensor([0, 0, 0, 0]) edge_index = knn_graph(x, k=2, batch=batch, loop=False) ``` ``` print(edge_index) tensor([[1, 2, 0, 3, 0, 3, 1, 2], [0, 0, 1, 1, 2, 2, 3, 3]]) ``` ### Radius-Graph Computes graph edges to all points within a given distance. **Args:** * **x** *(Tensor)*: Node feature matrix of shape `[N, F]`. * **r** *(float)*: The radius. * **batch** *(LongTensor, optional)*: Batch vector of shape `[N]`, which assigns each node to a specific example. `batch` needs to be sorted. (default: `None`) * **loop** *(bool, optional)*: If `True`, the graph will contain self-loops. (default: `False`) * **max_num_neighbors** *(int, optional)*: The maximum number of neighbors to return for each element. If the number of actual neighbors is greater than `max_num_neighbors`, returned neighbors are picked randomly. (default: `32`) * **flow** *(string, optional)*: The flow direction when using in combination with message passing (`"source_to_target"` or `"target_to_source"`). (default: `"source_to_target"`) * **num_workers** *(int)*: Number of workers to use for computation. Has no effect in case `batch` is not `None`, or the input lies on the GPU. (default: `1`) ```python import torch from torch_cluster import radius_graph x = torch.tensor([[-1., -1.], [-1., 1.], [1., -1.], [1., 1.]]) batch = torch.tensor([0, 0, 0, 0]) edge_index = radius_graph(x, r=2.5, batch=batch, loop=False) ``` ``` print(edge_index) tensor([[1, 2, 0, 3, 0, 3, 1, 2], [0, 0, 1, 1, 2, 2, 3, 3]]) ``` ### Nearest Clusters points in *x* together which are nearest to a given query point in *y*. `batch_{x,y}` vectors need to be sorted. ```python import torch from torch_cluster import nearest x = torch.Tensor([[-1, -1], [-1, 1], [1, -1], [1, 1]]) batch_x = torch.tensor([0, 0, 0, 0]) y = torch.Tensor([[-1, 0], [1, 0]]) batch_y = torch.tensor([0, 0]) cluster = nearest(x, y, batch_x, batch_y) ``` ``` print(cluster) tensor([0, 0, 1, 1]) ``` ### RandomWalk-Sampling Samples random walks of length `walk_length` from all node indices in `start` in the graph given by `(row, col)`. ```python import torch from torch_cluster import random_walk row = torch.tensor([0, 1, 1, 1, 2, 2, 3, 3, 4, 4]) col = torch.tensor([1, 0, 2, 3, 1, 4, 1, 4, 2, 3]) start = torch.tensor([0, 1, 2, 3, 4]) walk = random_walk(row, col, start, walk_length=3) ``` ``` print(walk) tensor([[0, 1, 2, 4], [1, 3, 4, 2], [2, 4, 2, 1], [3, 4, 2, 4], [4, 3, 1, 0]]) ``` ## Running tests ``` pytest ``` ## C++ API `torch-cluster` also offers a C++ API that contains C++ equivalent of python models. ``` export Torch_DIR=`python -c 'import torch;print(torch.utils.cmake_prefix_path)'` mkdir build cd build # Add -DWITH_CUDA=on support for the CUDA if needed cmake .. make make install ``` pytorch_cluster-1.6.3/cmake/000077500000000000000000000000001451151623700160375ustar00rootroot00000000000000pytorch_cluster-1.6.3/cmake/TorchClusterConfig.cmake.in000066400000000000000000000020511451151623700232130ustar00rootroot00000000000000# TorchClusterConfig.cmake # -------------------- # # Exported targets:: Cluster # @PACKAGE_INIT@ set(PN TorchCluster) set(${PN}_INCLUDE_DIR "${PACKAGE_PREFIX_DIR}/@CMAKE_INSTALL_INCLUDEDIR@") set(${PN}_LIBRARY "") set(${PN}_DEFINITIONS USING_${PN}) check_required_components(${PN}) if(NOT (CMAKE_VERSION VERSION_LESS 3.0)) #----------------------------------------------------------------------------- # Don't include targets if this file is being picked up by another # project which has already built this as a subproject #----------------------------------------------------------------------------- if(NOT TARGET ${PN}::TorchCluster) include("${CMAKE_CURRENT_LIST_DIR}/${PN}Targets.cmake") if(NOT TARGET torch_library) find_package(Torch REQUIRED) endif() if(NOT TARGET Python3::Python) find_package(Python3 COMPONENTS Development) endif() target_link_libraries(TorchCluster::TorchCluster INTERFACE ${TORCH_LIBRARIES} Python3::Python) if(@WITH_CUDA@) target_compile_definitions(TorchCluster::TorchCluster INTERFACE WITH_CUDA) endif() endif() endif() pytorch_cluster-1.6.3/conda/000077500000000000000000000000001451151623700160435ustar00rootroot00000000000000pytorch_cluster-1.6.3/conda/pytorch-cluster/000077500000000000000000000000001451151623700212125ustar00rootroot00000000000000pytorch_cluster-1.6.3/conda/pytorch-cluster/README.md000066400000000000000000000001151451151623700224660ustar00rootroot00000000000000``` ./build_conda.sh 3.9 2.1.0 cu118 # python, pytorch and cuda version ``` pytorch_cluster-1.6.3/conda/pytorch-cluster/build_conda.sh000077500000000000000000000031671451151623700240230ustar00rootroot00000000000000#!/bin/bash export PYTHON_VERSION=$1 export TORCH_VERSION=$2 export CUDA_VERSION=$3 export CONDA_PYTORCH_CONSTRAINT="pytorch==${TORCH_VERSION%.*}.*" if [ "${CUDA_VERSION}" = "cpu" ]; then export CONDA_CUDATOOLKIT_CONSTRAINT="cpuonly # [not osx]" else case $CUDA_VERSION in cu121) export CONDA_CUDATOOLKIT_CONSTRAINT="pytorch-cuda==12.1.*" ;; cu118) export CONDA_CUDATOOLKIT_CONSTRAINT="pytorch-cuda==11.8.*" ;; cu117) export CONDA_CUDATOOLKIT_CONSTRAINT="pytorch-cuda==11.7.*" ;; cu116) if [ "${TORCH_VERSION}" = "1.12.0" ]; then export CONDA_CUDATOOLKIT_CONSTRAINT="cudatoolkit==11.6.*" else export CONDA_CUDATOOLKIT_CONSTRAINT="pytorch-cuda==11.6.*" fi ;; cu115) export CONDA_CUDATOOLKIT_CONSTRAINT="cudatoolkit==11.5.*" ;; cu113) export CONDA_CUDATOOLKIT_CONSTRAINT="cudatoolkit==11.3.*" ;; cu111) export CONDA_CUDATOOLKIT_CONSTRAINT="cudatoolkit==11.1.*" ;; cu102) export CONDA_CUDATOOLKIT_CONSTRAINT="cudatoolkit==10.2.*" ;; cu101) export CONDA_CUDATOOLKIT_CONSTRAINT="cudatoolkit==10.1.*" ;; *) echo "Unrecognized CUDA_VERSION=$CUDA_VERSION" exit 1 ;; esac fi echo "PyTorch $TORCH_VERSION+$CUDA_VERSION" echo "- $CONDA_PYTORCH_CONSTRAINT" echo "- $CONDA_CUDATOOLKIT_CONSTRAINT" if [ "${TORCH_VERSION}" = "1.12.0" ] && [ "${CUDA_VERSION}" = "cu116" ]; then conda build . -c pytorch -c default -c nvidia -c conda-forge --output-folder "$HOME/conda-bld" else conda build . -c pytorch -c default -c nvidia --output-folder "$HOME/conda-bld" fi pytorch_cluster-1.6.3/conda/pytorch-cluster/meta.yaml000066400000000000000000000016121451151623700230240ustar00rootroot00000000000000package: name: pytorch-cluster version: 1.6.3 source: path: ../.. requirements: build: - {{ compiler('c') }} # [win] host: - pip - python {{ environ.get('PYTHON_VERSION') }} - {{ environ.get('CONDA_PYTORCH_CONSTRAINT') }} - {{ environ.get('CONDA_CUDATOOLKIT_CONSTRAINT') }} run: - python {{ environ.get('PYTHON_VERSION') }} - {{ environ.get('CONDA_PYTORCH_CONSTRAINT') }} - {{ environ.get('CONDA_CUDATOOLKIT_CONSTRAINT') }} build: string: py{{ environ.get('PYTHON_VERSION').replace('.', '') }}_torch_{{ environ['TORCH_VERSION'] }}_{{ environ['CUDA_VERSION'] }} script: pip install . script_env: - FORCE_CUDA - TORCH_CUDA_ARCH_LIST test: imports: - torch_cluster requires: - scipy about: home: https://github.com/rusty1s/pytorch_cluster license: MIT summary: PyTorch Extension Library of Optimized Graph Cluster Algorithms pytorch_cluster-1.6.3/csrc/000077500000000000000000000000001451151623700157115ustar00rootroot00000000000000pytorch_cluster-1.6.3/csrc/cluster.h000066400000000000000000000027171451151623700175520ustar00rootroot00000000000000#pragma once #include "extensions.h" namespace cluster { CLUSTER_API int64_t cuda_version() noexcept; namespace detail { CLUSTER_INLINE_VARIABLE int64_t _cuda_version = cuda_version(); } // namespace detail } // namespace cluster CLUSTER_API torch::Tensor fps(torch::Tensor src, torch::Tensor ptr, double ratio, bool random_start); CLUSTER_API torch::Tensor graclus(torch::Tensor rowptr, torch::Tensor col, torch::optional optional_weight); CLUSTER_API torch::Tensor grid(torch::Tensor pos, torch::Tensor size, torch::optional optional_start, torch::optional optional_end); CLUSTER_API torch::Tensor knn(torch::Tensor x, torch::Tensor y, torch::Tensor ptr_x, torch::Tensor ptr_y, int64_t k, bool cosine); CLUSTER_API torch::Tensor nearest(torch::Tensor x, torch::Tensor y, torch::Tensor ptr_x, torch::Tensor ptr_y); CLUSTER_API torch::Tensor radius(torch::Tensor x, torch::Tensor y, torch::Tensor ptr_x, torch::Tensor ptr_y, double r, int64_t max_num_neighbors); CLUSTER_API std::tuple random_walk(torch::Tensor rowptr, torch::Tensor col, torch::Tensor start, int64_t walk_length, double p, double q); CLUSTER_API torch::Tensor neighbor_sampler(torch::Tensor start, torch::Tensor rowptr, int64_t count, double factor); pytorch_cluster-1.6.3/csrc/cpu/000077500000000000000000000000001451151623700165005ustar00rootroot00000000000000pytorch_cluster-1.6.3/csrc/cpu/fps_cpu.cpp000066400000000000000000000034161451151623700206470ustar00rootroot00000000000000#include "fps_cpu.h" #include #include "utils.h" inline torch::Tensor get_dist(torch::Tensor x, int64_t idx) { return (x - x[idx]).pow_(2).sum(1); } torch::Tensor fps_cpu(torch::Tensor src, torch::Tensor ptr, torch::Tensor ratio, bool random_start) { CHECK_CPU(src); CHECK_CPU(ptr); CHECK_CPU(ratio); CHECK_INPUT(ptr.dim() == 1); src = src.view({src.size(0), -1}).contiguous(); ptr = ptr.contiguous(); auto batch_size = ptr.numel() - 1; auto deg = ptr.narrow(0, 1, batch_size) - ptr.narrow(0, 0, batch_size); auto out_ptr = deg.toType(torch::kFloat) * ratio; out_ptr = out_ptr.ceil().toType(torch::kLong).cumsum(0); auto out = torch::empty({out_ptr[-1].data_ptr()[0]}, ptr.options()); auto ptr_data = ptr.data_ptr(); auto out_ptr_data = out_ptr.data_ptr(); auto out_data = out.data_ptr(); int64_t grain_size = 1; // Always parallelize over batch dimension. at::parallel_for(0, batch_size, grain_size, [&](int64_t begin, int64_t end) { int64_t src_start, src_end, out_start, out_end; for (int64_t b = begin; b < end; b++) { src_start = ptr_data[b], src_end = ptr_data[b + 1]; out_start = b == 0 ? 0 : out_ptr_data[b - 1], out_end = out_ptr_data[b]; auto y = src.narrow(0, src_start, src_end - src_start); int64_t start_idx = 0; if (random_start) start_idx = rand() % y.size(0); out_data[out_start] = src_start + start_idx; auto dist = get_dist(y, start_idx); for (int64_t i = 1; i < out_end - out_start; i++) { int64_t argmax = dist.argmax().data_ptr()[0]; out_data[out_start + i] = src_start + argmax; dist = torch::min(dist, get_dist(y, argmax)); } } }); return out; } pytorch_cluster-1.6.3/csrc/cpu/fps_cpu.h000066400000000000000000000002451451151623700203110ustar00rootroot00000000000000#pragma once #include "../extensions.h" torch::Tensor fps_cpu(torch::Tensor src, torch::Tensor ptr, torch::Tensor ratio, bool random_start); pytorch_cluster-1.6.3/csrc/cpu/graclus_cpu.cpp000066400000000000000000000042601451151623700215150ustar00rootroot00000000000000#include "graclus_cpu.h" #include "utils.h" torch::Tensor graclus_cpu(torch::Tensor rowptr, torch::Tensor col, torch::optional optional_weight) { CHECK_CPU(rowptr); CHECK_CPU(col); CHECK_INPUT(rowptr.dim() == 1 && col.dim() == 1); if (optional_weight.has_value()) { CHECK_CPU(optional_weight.value()); CHECK_INPUT(optional_weight.value().dim() == 1); CHECK_INPUT(optional_weight.value().numel() == col.numel()); } int64_t num_nodes = rowptr.numel() - 1; auto out = torch::full(num_nodes, -1, rowptr.options()); auto node_perm = torch::randperm(num_nodes, rowptr.options()); auto rowptr_data = rowptr.data_ptr(); auto col_data = col.data_ptr(); auto node_perm_data = node_perm.data_ptr(); auto out_data = out.data_ptr(); if (!optional_weight.has_value()) { for (int64_t n = 0; n < num_nodes; n++) { auto u = node_perm_data[n]; if (out_data[u] >= 0) continue; out_data[u] = u; int64_t row_start = rowptr_data[u], row_end = rowptr_data[u + 1]; for (auto e = 0; e < row_end - row_start; e++) { auto v = col_data[row_start + e]; if (out_data[v] >= 0) continue; out_data[u] = std::min(u, v); out_data[v] = std::min(u, v); break; } } } else { auto weight = optional_weight.value(); auto scalar_type = weight.scalar_type(); AT_DISPATCH_ALL_TYPES_AND2(at::ScalarType::Half, at::ScalarType::BFloat16, scalar_type, "graclus_cpu", [&] { auto weight_data = weight.data_ptr(); for (auto n = 0; n < num_nodes; n++) { auto u = node_perm_data[n]; if (out_data[u] >= 0) continue; auto v_max = u; scalar_t w_max = (scalar_t)0.; for (auto e = rowptr_data[u]; e < rowptr_data[u + 1]; e++) { auto v = col_data[e]; if (out_data[v] >= 0) continue; if (weight_data[e] >= w_max) { v_max = v; w_max = weight_data[e]; } } out_data[u] = std::min(u, v_max); out_data[v_max] = std::min(u, v_max); } }); } return out; } pytorch_cluster-1.6.3/csrc/cpu/graclus_cpu.h000066400000000000000000000002701451151623700211570ustar00rootroot00000000000000#pragma once #include "../extensions.h" torch::Tensor graclus_cpu(torch::Tensor rowptr, torch::Tensor col, torch::optional optional_weight); pytorch_cluster-1.6.3/csrc/cpu/grid_cpu.cpp000066400000000000000000000024611451151623700210030ustar00rootroot00000000000000#include "grid_cpu.h" #include "utils.h" torch::Tensor grid_cpu(torch::Tensor pos, torch::Tensor size, torch::optional optional_start, torch::optional optional_end) { CHECK_CPU(pos); CHECK_CPU(size); if (optional_start.has_value()) CHECK_CPU(optional_start.value()); if (optional_start.has_value()) CHECK_CPU(optional_start.value()); pos = pos.view({pos.size(0), -1}); CHECK_INPUT(size.numel() == pos.size(1)); if (!optional_start.has_value()) optional_start = std::get<0>(pos.min(0)); else CHECK_INPUT(optional_start.value().numel() == pos.size(1)); if (!optional_end.has_value()) optional_end = std::get<0>(pos.max(0)); else CHECK_INPUT(optional_start.value().numel() == pos.size(1)); auto start = optional_start.value(); auto end = optional_end.value(); pos = pos - start.unsqueeze(0); auto num_voxels = (end - start).true_divide(size).toType(torch::kLong) + 1; num_voxels = num_voxels.cumprod(0); num_voxels = torch::cat({torch::ones({1}, num_voxels.options()), num_voxels}, 0); num_voxels = num_voxels.narrow(0, 0, size.size(0)); auto out = pos.true_divide(size.view({1, -1})).toType(torch::kLong); out *= num_voxels.view({1, -1}); out = out.sum(1); return out; } pytorch_cluster-1.6.3/csrc/cpu/grid_cpu.h000066400000000000000000000003621451151623700204460ustar00rootroot00000000000000#pragma once #include "../extensions.h" torch::Tensor grid_cpu(torch::Tensor pos, torch::Tensor size, torch::optional optional_start, torch::optional optional_end); pytorch_cluster-1.6.3/csrc/cpu/knn_cpu.cpp000066400000000000000000000065141451151623700206470ustar00rootroot00000000000000#include "knn_cpu.h" #include "utils.h" #include "utils/KDTreeVectorOfVectorsAdaptor.h" #include "utils/nanoflann.hpp" torch::Tensor knn_cpu(torch::Tensor x, torch::Tensor y, torch::optional ptr_x, torch::optional ptr_y, int64_t k, int64_t num_workers) { CHECK_CPU(x); CHECK_INPUT(x.dim() == 2); CHECK_CPU(y); CHECK_INPUT(y.dim() == 2); if (ptr_x.has_value()) { CHECK_CPU(ptr_x.value()); CHECK_INPUT(ptr_x.value().dim() == 1); } if (ptr_y.has_value()) { CHECK_CPU(ptr_y.value()); CHECK_INPUT(ptr_y.value().dim() == 1); } std::vector out_vec = std::vector(); AT_DISPATCH_ALL_TYPES_AND2(at::ScalarType::Half, at::ScalarType::BFloat16, x.scalar_type(), "knn_cpu", [&] { // See: nanoflann/examples/vector_of_vectors_example.cpp auto x_data = x.data_ptr(); auto y_data = y.data_ptr(); typedef std::vector> vec_t; if (!ptr_x.has_value()) { // Single example. vec_t pts(x.size(0)); for (int64_t i = 0; i < x.size(0); i++) { pts[i].resize(x.size(1)); for (int64_t j = 0; j < x.size(1); j++) { pts[i][j] = x_data[i * x.size(1) + j]; } } typedef KDTreeVectorOfVectorsAdaptor my_kd_tree_t; my_kd_tree_t mat_index(x.size(1), pts, 10); mat_index.index->buildIndex(); std::vector ret_index(k); std::vector out_dist_sqr(k); for (int64_t i = 0; i < y.size(0); i++) { size_t num_matches = mat_index.index->knnSearch( y_data + i * y.size(1), k, &ret_index[0], &out_dist_sqr[0]); for (size_t j = 0; j < num_matches; j++) { out_vec.push_back(ret_index[j]); out_vec.push_back(i); } } } else { // Batch-wise. auto ptr_x_data = ptr_x.value().data_ptr(); auto ptr_y_data = ptr_y.value().data_ptr(); for (int64_t b = 0; b < ptr_x.value().size(0) - 1; b++) { auto x_start = ptr_x_data[b], x_end = ptr_x_data[b + 1]; auto y_start = ptr_y_data[b], y_end = ptr_y_data[b + 1]; if (x_start == x_end || y_start == y_end) continue; vec_t pts(x_end - x_start); for (int64_t i = 0; i < x_end - x_start; i++) { pts[i].resize(x.size(1)); for (int64_t j = 0; j < x.size(1); j++) { pts[i][j] = x_data[(i + x_start) * x.size(1) + j]; } } typedef KDTreeVectorOfVectorsAdaptor my_kd_tree_t; my_kd_tree_t mat_index(x.size(1), pts, 10); mat_index.index->buildIndex(); std::vector ret_index(k); std::vector out_dist_sqr(k); for (int64_t i = y_start; i < y_end; i++) { size_t num_matches = mat_index.index->knnSearch( y_data + i * y.size(1), k, &ret_index[0], &out_dist_sqr[0]); for (size_t j = 0; j < num_matches; j++) { out_vec.push_back(x_start + ret_index[j]); out_vec.push_back(i); } } } } }); const int64_t size = out_vec.size() / 2; auto out = torch::from_blob(out_vec.data(), {size, 2}, x.options().dtype(torch::kLong)); return out.t().index_select(0, torch::tensor({1, 0})); } pytorch_cluster-1.6.3/csrc/cpu/knn_cpu.h000066400000000000000000000004211451151623700203030ustar00rootroot00000000000000#pragma once #include "../extensions.h" torch::Tensor knn_cpu(torch::Tensor x, torch::Tensor y, torch::optional ptr_x, torch::optional ptr_y, int64_t k, int64_t num_workers); pytorch_cluster-1.6.3/csrc/cpu/radius_cpu.cpp000066400000000000000000000067641451151623700213570ustar00rootroot00000000000000#include "radius_cpu.h" #include "utils.h" #include "utils/KDTreeVectorOfVectorsAdaptor.h" #include "utils/nanoflann.hpp" torch::Tensor radius_cpu(torch::Tensor x, torch::Tensor y, torch::optional ptr_x, torch::optional ptr_y, double r, int64_t max_num_neighbors, int64_t num_workers) { CHECK_CPU(x); CHECK_INPUT(x.dim() == 2); CHECK_CPU(y); CHECK_INPUT(y.dim() == 2); if (ptr_x.has_value()) { CHECK_CPU(ptr_x.value()); CHECK_INPUT(ptr_x.value().dim() == 1); } if (ptr_y.has_value()) { CHECK_CPU(ptr_y.value()); CHECK_INPUT(ptr_y.value().dim() == 1); } std::vector out_vec = std::vector(); AT_DISPATCH_ALL_TYPES_AND2(at::ScalarType::Half, at::ScalarType::BFloat16, x.scalar_type(), "radius_cpu", [&] { // See: nanoflann/examples/vector_of_vectors_example.cpp auto x_data = x.data_ptr(); auto y_data = y.data_ptr(); typedef std::vector> vec_t; nanoflann::SearchParams params; params.sorted = false; if (!ptr_x.has_value()) { // Single example. vec_t pts(x.size(0)); for (int64_t i = 0; i < x.size(0); i++) { pts[i].resize(x.size(1)); for (int64_t j = 0; j < x.size(1); j++) { pts[i][j] = x_data[i * x.size(1) + j]; } } typedef KDTreeVectorOfVectorsAdaptor my_kd_tree_t; my_kd_tree_t mat_index(x.size(1), pts, 10); mat_index.index->buildIndex(); for (int64_t i = 0; i < y.size(0); i++) { std::vector> ret_matches; size_t num_matches = mat_index.index->radiusSearch( y_data + i * y.size(1), r * r, ret_matches, params); for (size_t j = 0; j < std::min(num_matches, (size_t)max_num_neighbors); j++) { out_vec.push_back(ret_matches[j].first); out_vec.push_back(i); } } } else { // Batch-wise. auto ptr_x_data = ptr_x.value().data_ptr(); auto ptr_y_data = ptr_y.value().data_ptr(); for (int64_t b = 0; b < ptr_x.value().size(0) - 1; b++) { auto x_start = ptr_x_data[b], x_end = ptr_x_data[b + 1]; auto y_start = ptr_y_data[b], y_end = ptr_y_data[b + 1]; if (x_start == x_end || y_start == y_end) continue; vec_t pts(x_end - x_start); for (int64_t i = 0; i < x_end - x_start; i++) { pts[i].resize(x.size(1)); for (int64_t j = 0; j < x.size(1); j++) { pts[i][j] = x_data[(i + x_start) * x.size(1) + j]; } } typedef KDTreeVectorOfVectorsAdaptor my_kd_tree_t; my_kd_tree_t mat_index(x.size(1), pts, 10); mat_index.index->buildIndex(); for (int64_t i = y_start; i < y_end; i++) { std::vector> ret_matches; size_t num_matches = mat_index.index->radiusSearch( y_data + i * y.size(1), r * r, ret_matches, params); for (size_t j = 0; j < std::min(num_matches, (size_t)max_num_neighbors); j++) { out_vec.push_back(x_start + ret_matches[j].first); out_vec.push_back(i); } } } } }); const int64_t size = out_vec.size() / 2; auto out = torch::from_blob(out_vec.data(), {size, 2}, x.options().dtype(torch::kLong)); return out.t().index_select(0, torch::tensor({1, 0})); } pytorch_cluster-1.6.3/csrc/cpu/radius_cpu.h000066400000000000000000000004671451151623700210160ustar00rootroot00000000000000#pragma once #include "../extensions.h" torch::Tensor radius_cpu(torch::Tensor x, torch::Tensor y, torch::optional ptr_x, torch::optional ptr_y, double r, int64_t max_num_neighbors, int64_t num_workers); pytorch_cluster-1.6.3/csrc/cpu/rw_cpu.cpp000066400000000000000000000105551451151623700205110ustar00rootroot00000000000000#include "rw_cpu.h" #include #include "utils.h" void uniform_sampling(const int64_t *rowptr, const int64_t *col, const int64_t *start, int64_t *n_out, int64_t *e_out, const int64_t numel, const int64_t walk_length) { auto rand = torch::rand({numel, walk_length}); auto rand_data = rand.data_ptr(); int64_t grain_size = at::internal::GRAIN_SIZE / walk_length; at::parallel_for(0, numel, grain_size, [&](int64_t begin, int64_t end) { for (auto n = begin; n < end; n++) { int64_t n_cur = start[n], e_cur, row_start, row_end, idx; n_out[n * (walk_length + 1)] = n_cur; for (auto l = 0; l < walk_length; l++) { row_start = rowptr[n_cur], row_end = rowptr[n_cur + 1]; if (row_end - row_start == 0) { e_cur = -1; } else { idx = int64_t(rand_data[n * walk_length + l] * (row_end - row_start)); e_cur = row_start + idx; n_cur = col[e_cur]; } n_out[n * (walk_length + 1) + (l + 1)] = n_cur; e_out[n * walk_length + l] = e_cur; } } }); } bool inline is_neighbor(const int64_t *rowptr, const int64_t *col, int64_t v, int64_t w) { int64_t row_start = rowptr[v], row_end = rowptr[v + 1]; for (auto i = row_start; i < row_end; i++) { if (col[i] == w) return true; } return false; } // See: https://louisabraham.github.io/articles/node2vec-sampling.html void rejection_sampling(const int64_t *rowptr, const int64_t *col, int64_t *start, int64_t *n_out, int64_t *e_out, const int64_t numel, const int64_t walk_length, const double p, const double q) { double max_prob = fmax(fmax(1. / p, 1.), 1. / q); double prob_0 = 1. / p / max_prob; double prob_1 = 1. / max_prob; double prob_2 = 1. / q / max_prob; int64_t grain_size = at::internal::GRAIN_SIZE / walk_length; at::parallel_for(0, numel, grain_size, [&](int64_t begin, int64_t end) { for (auto n = begin; n < end; n++) { int64_t t = start[n], v, x, e_cur, row_start, row_end; n_out[n * (walk_length + 1)] = t; row_start = rowptr[t], row_end = rowptr[t + 1]; if (row_end - row_start == 0) { e_cur = -1; v = t; } else { e_cur = row_start + (rand() % (row_end - row_start)); v = col[e_cur]; } n_out[n * (walk_length + 1) + 1] = v; e_out[n * walk_length] = e_cur; for (auto l = 1; l < walk_length; l++) { row_start = rowptr[v], row_end = rowptr[v + 1]; if (row_end - row_start == 0) { e_cur = -1; x = v; } else if (row_end - row_start == 1) { e_cur = row_start; x = col[e_cur]; } else { while (true) { e_cur = row_start + (rand() % (row_end - row_start)); x = col[e_cur]; auto r = ((double)rand() / (RAND_MAX)); // [0, 1) if (x == t && r < prob_0) break; else if (is_neighbor(rowptr, col, x, t) && r < prob_1) break; else if (r < prob_2) break; } } n_out[n * (walk_length + 1) + (l + 1)] = x; e_out[n * walk_length + l] = e_cur; t = v; v = x; } } }); } std::tuple random_walk_cpu(torch::Tensor rowptr, torch::Tensor col, torch::Tensor start, int64_t walk_length, double p, double q) { CHECK_CPU(rowptr); CHECK_CPU(col); CHECK_CPU(start); CHECK_INPUT(rowptr.dim() == 1); CHECK_INPUT(col.dim() == 1); CHECK_INPUT(start.dim() == 1); auto n_out = torch::empty({start.size(0), walk_length + 1}, start.options()); auto e_out = torch::empty({start.size(0), walk_length}, start.options()); auto rowptr_data = rowptr.data_ptr(); auto col_data = col.data_ptr(); auto start_data = start.data_ptr(); auto n_out_data = n_out.data_ptr(); auto e_out_data = e_out.data_ptr(); if (p == 1. && q == 1.) { uniform_sampling(rowptr_data, col_data, start_data, n_out_data, e_out_data, start.numel(), walk_length); } else { rejection_sampling(rowptr_data, col_data, start_data, n_out_data, e_out_data, start.numel(), walk_length, p, q); } return std::make_tuple(n_out, e_out); } pytorch_cluster-1.6.3/csrc/cpu/rw_cpu.h000066400000000000000000000003331451151623700201470ustar00rootroot00000000000000#pragma once #include "../extensions.h" std::tuple random_walk_cpu(torch::Tensor rowptr, torch::Tensor col, torch::Tensor start, int64_t walk_length, double p, double q); pytorch_cluster-1.6.3/csrc/cpu/sampler_cpu.cpp000066400000000000000000000031041451151623700215140ustar00rootroot00000000000000#include "sampler_cpu.h" #include "utils.h" torch::Tensor neighbor_sampler_cpu(torch::Tensor start, torch::Tensor rowptr, int64_t count, double factor) { auto start_data = start.data_ptr(); auto rowptr_data = rowptr.data_ptr(); std::vector e_ids; for (auto i = 0; i < start.size(0); i++) { auto row_start = rowptr_data[start_data[i]]; auto row_end = rowptr_data[start_data[i] + 1]; auto num_neighbors = row_end - row_start; int64_t size = count; if (count < 1) size = int64_t(ceil(factor * float(num_neighbors))); if (size > num_neighbors) size = num_neighbors; // If the number of neighbors is approximately equal to the number of // neighbors which are requested, we use `randperm` to sample without // replacement, otherwise we sample random numbers into a set as long // as necessary. std::unordered_set set; if (size < 0.7 * float(num_neighbors)) { while (int64_t(set.size()) < size) { int64_t sample = rand() % num_neighbors; set.insert(sample + row_start); } std::vector v(set.begin(), set.end()); e_ids.insert(e_ids.end(), v.begin(), v.end()); } else { auto sample = torch::randperm(num_neighbors, start.options()); auto sample_data = sample.data_ptr(); for (auto j = 0; j < size; j++) { e_ids.push_back(sample_data[j] + row_start); } } } int64_t length = e_ids.size(); return torch::from_blob(e_ids.data(), {length}, start.options()).clone(); } pytorch_cluster-1.6.3/csrc/cpu/sampler_cpu.h000066400000000000000000000002721451151623700211640ustar00rootroot00000000000000#pragma once #include "../extensions.h" torch::Tensor neighbor_sampler_cpu(torch::Tensor start, torch::Tensor rowptr, int64_t count, double factor); pytorch_cluster-1.6.3/csrc/cpu/utils.h000066400000000000000000000004731451151623700200150ustar00rootroot00000000000000#pragma once #include "../extensions.h" #define CHECK_CPU(x) AT_ASSERTM(x.device().is_cpu(), #x " must be CPU tensor") #define CHECK_INPUT(x) AT_ASSERTM(x, "Input mismatch") #define CHECK_CONTIGUOUS(x) \ AT_ASSERTM(x.is_contiguous(), #x " must be contiguous") pytorch_cluster-1.6.3/csrc/cpu/utils/000077500000000000000000000000001451151623700176405ustar00rootroot00000000000000pytorch_cluster-1.6.3/csrc/cpu/utils/KDTreeVectorOfVectorsAdaptor.h000066400000000000000000000131361451151623700254640ustar00rootroot00000000000000/*********************************************************************** * Software License Agreement (BSD License) * * Copyright 2011-16 Jose Luis Blanco (joseluisblancoc@gmail.com). * All rights reserved. * * Redistribution and use in source and binary forms, with or without * modification, are permitted provided that the following conditions * are met: * * 1. Redistributions of source code must retain the above copyright * notice, this list of conditions and the following disclaimer. * 2. Redistributions in binary form must reproduce the above copyright * notice, this list of conditions and the following disclaimer in the * documentation and/or other materials provided with the distribution. * * THIS SOFTWARE IS PROVIDED BY THE AUTHOR ``AS IS'' AND ANY EXPRESS OR * IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES * OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. * IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR ANY DIRECT, INDIRECT, * INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT * NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF * THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. *************************************************************************/ #pragma once #include "nanoflann.hpp" #include // ===== This example shows how to use nanoflann with these types of containers: // ======= // typedef std::vector > my_vector_of_vectors_t; // typedef std::vector my_vector_of_vectors_t; // This // requires #include // ===================================================================================== /** A simple vector-of-vectors adaptor for nanoflann, without duplicating the * storage. The i'th vector represents a point in the state space. * * \tparam DIM If set to >0, it specifies a compile-time fixed dimensionality * for the points in the data set, allowing more compiler optimizations. \tparam * num_t The type of the point coordinates (typically, double or float). \tparam * Distance The distance metric to use: nanoflann::metric_L1, * nanoflann::metric_L2, nanoflann::metric_L2_Simple, etc. \tparam IndexType The * type for indices in the KD-tree index (typically, size_t of int) */ template struct KDTreeVectorOfVectorsAdaptor { typedef KDTreeVectorOfVectorsAdaptor self_t; typedef typename Distance::template traits::distance_t metric_t; typedef nanoflann::KDTreeSingleIndexAdaptor index_t; index_t *index; //! The kd-tree index for the user to call its methods as //! usual with any other FLANN index. /// Constructor: takes a const ref to the vector of vectors object with the /// data points KDTreeVectorOfVectorsAdaptor(const size_t /* dimensionality */, const VectorOfVectorsType &mat, const int leaf_max_size = 10) : m_data(mat) { assert(mat.size() != 0 && mat[0].size() != 0); const size_t dims = mat[0].size(); if (DIM > 0 && static_cast(dims) != DIM) throw std::runtime_error( "Data set dimensionality does not match the 'DIM' template argument"); index = new index_t(static_cast(dims), *this /* adaptor */, nanoflann::KDTreeSingleIndexAdaptorParams(leaf_max_size)); index->buildIndex(); } ~KDTreeVectorOfVectorsAdaptor() { delete index; } const VectorOfVectorsType &m_data; /** Query for the \a num_closest closest points to a given point (entered as * query_point[0:dim-1]). Note that this is a short-cut method for * index->findNeighbors(). The user can also call index->... methods as * desired. \note nChecks_IGNORED is ignored but kept for compatibility with * the original FLANN interface. */ inline void query(const num_t *query_point, const size_t num_closest, IndexType *out_indices, num_t *out_distances_sq, const int nChecks_IGNORED = 10) const { nanoflann::KNNResultSet resultSet(num_closest); resultSet.init(out_indices, out_distances_sq); index->findNeighbors(resultSet, query_point, nanoflann::SearchParams()); } /** @name Interface expected by KDTreeSingleIndexAdaptor * @{ */ const self_t &derived() const { return *this; } self_t &derived() { return *this; } // Must return the number of data points inline size_t kdtree_get_point_count() const { return m_data.size(); } // Returns the dim'th component of the idx'th point in the class: inline num_t kdtree_get_pt(const size_t idx, const size_t dim) const { return m_data[idx][dim]; } // Optional bounding-box computation: return false to default to a standard // bbox computation loop. // Return true if the BBOX was already computed by the class and returned in // "bb" so it can be avoided to redo it again. Look at bb.size() to find out // the expected dimensionality (e.g. 2 or 3 for point clouds) template bool kdtree_get_bbox(BBOX & /*bb*/) const { return false; } /** @} */ }; // end of KDTreeVectorOfVectorsAdaptor pytorch_cluster-1.6.3/csrc/cpu/utils/nanoflann.hpp000066400000000000000000002175351451151623700223400ustar00rootroot00000000000000/*********************************************************************** * Software License Agreement (BSD License) * * Copyright 2008-2009 Marius Muja (mariusm@cs.ubc.ca). All rights reserved. * Copyright 2008-2009 David G. Lowe (lowe@cs.ubc.ca). All rights reserved. * Copyright 2011-2016 Jose Luis Blanco (joseluisblancoc@gmail.com). * All rights reserved. * * THE BSD LICENSE * * Redistribution and use in source and binary forms, with or without * modification, are permitted provided that the following conditions * are met: * * 1. Redistributions of source code must retain the above copyright * notice, this list of conditions and the following disclaimer. * 2. Redistributions in binary form must reproduce the above copyright * notice, this list of conditions and the following disclaimer in the * documentation and/or other materials provided with the distribution. * * THIS SOFTWARE IS PROVIDED BY THE AUTHOR ``AS IS'' AND ANY EXPRESS OR * IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES * OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. * IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR ANY DIRECT, INDIRECT, * INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT * NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF * THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. *************************************************************************/ /** \mainpage nanoflann C++ API documentation * nanoflann is a C++ header-only library for building KD-Trees, mostly * optimized for 2D or 3D point clouds. * * nanoflann does not require compiling or installing, just an * #include in your code. * * See: * - C++ API organized by modules * - Online README * - Doxygen * documentation */ #ifndef NANOFLANN_HPP_ #define NANOFLANN_HPP_ #include #include #include #include // for abs() #include // for fwrite() #include // for abs() #include #include // std::reference_wrapper #include #include /** Library version: 0xMmP (M=Major,m=minor,P=patch) */ #define NANOFLANN_VERSION 0x132 // Avoid conflicting declaration of min/max macros in windows headers #if !defined(NOMINMAX) && \ (defined(_WIN32) || defined(_WIN32_) || defined(WIN32) || defined(_WIN64)) #define NOMINMAX #ifdef max #undef max #undef min #endif #endif namespace nanoflann { /** @addtogroup nanoflann_grp nanoflann C++ library for ANN * @{ */ /** the PI constant (required to avoid MSVC missing symbols) */ template T pi_const() { return static_cast(3.14159265358979323846); } /** * Traits if object is resizable and assignable (typically has a resize | assign * method) */ template struct has_resize : std::false_type {}; template struct has_resize().resize(1), 0)> : std::true_type {}; template struct has_assign : std::false_type {}; template struct has_assign().assign(1, 0), 0)> : std::true_type {}; /** * Free function to resize a resizable object */ template inline typename std::enable_if::value, void>::type resize(Container &c, const size_t nElements) { c.resize(nElements); } /** * Free function that has no effects on non resizable containers (e.g. * std::array) It raises an exception if the expected size does not match */ template inline typename std::enable_if::value, void>::type resize(Container &c, const size_t nElements) { if (nElements != c.size()) throw std::logic_error("Try to change the size of a std::array."); } /** * Free function to assign to a container */ template inline typename std::enable_if::value, void>::type assign(Container &c, const size_t nElements, const T &value) { c.assign(nElements, value); } /** * Free function to assign to a std::array */ template inline typename std::enable_if::value, void>::type assign(Container &c, const size_t nElements, const T &value) { for (size_t i = 0; i < nElements; i++) c[i] = value; } /** @addtogroup result_sets_grp Result set classes * @{ */ template class KNNResultSet { public: typedef _DistanceType DistanceType; typedef _IndexType IndexType; typedef _CountType CountType; private: IndexType *indices; DistanceType *dists; CountType capacity; CountType count; public: inline KNNResultSet(CountType capacity_) : indices(0), dists(0), capacity(capacity_), count(0) {} inline void init(IndexType *indices_, DistanceType *dists_) { indices = indices_; dists = dists_; count = 0; if (capacity) dists[capacity - 1] = (std::numeric_limits::max)(); } inline CountType size() const { return count; } inline bool full() const { return count == capacity; } /** * Called during search to add an element matching the criteria. * @return true if the search should be continued, false if the results are * sufficient */ inline bool addPoint(DistanceType dist, IndexType index) { CountType i; for (i = count; i > 0; --i) { #ifdef NANOFLANN_FIRST_MATCH // If defined and two points have the same // distance, the one with the lowest-index will be // returned first. if ((dists[i - 1] > dist) || ((dist == dists[i - 1]) && (indices[i - 1] > index))) { #else if (dists[i - 1] > dist) { #endif if (i < capacity) { dists[i] = dists[i - 1]; indices[i] = indices[i - 1]; } } else break; } if (i < capacity) { dists[i] = dist; indices[i] = index; } if (count < capacity) count++; // tell caller that the search shall continue return true; } inline DistanceType worstDist() const { return dists[capacity - 1]; } }; /** operator "<" for std::sort() */ struct IndexDist_Sorter { /** PairType will be typically: std::pair */ template inline bool operator()(const PairType &p1, const PairType &p2) const { return p1.second < p2.second; } }; /** * A result-set class used when performing a radius based search. */ template class RadiusResultSet { public: typedef _DistanceType DistanceType; typedef _IndexType IndexType; public: const DistanceType radius; std::vector> &m_indices_dists; inline RadiusResultSet( DistanceType radius_, std::vector> &indices_dists) : radius(radius_), m_indices_dists(indices_dists) { init(); } inline void init() { clear(); } inline void clear() { m_indices_dists.clear(); } inline size_t size() const { return m_indices_dists.size(); } inline bool full() const { return true; } /** * Called during search to add an element matching the criteria. * @return true if the search should be continued, false if the results are * sufficient */ inline bool addPoint(DistanceType dist, IndexType index) { if (dist < radius) m_indices_dists.push_back(std::make_pair(index, dist)); return true; } inline DistanceType worstDist() const { return radius; } /** * Find the worst result (furtherest neighbor) without copying or sorting * Pre-conditions: size() > 0 */ std::pair worst_item() const { if (m_indices_dists.empty()) throw std::runtime_error("Cannot invoke RadiusResultSet::worst_item() on " "an empty list of results."); typedef typename std::vector>::const_iterator DistIt; DistIt it = std::max_element(m_indices_dists.begin(), m_indices_dists.end(), IndexDist_Sorter()); return *it; } }; /** @} */ /** @addtogroup loadsave_grp Load/save auxiliary functions * @{ */ template void save_value(FILE *stream, const T &value, size_t count = 1) { fwrite(&value, sizeof(value), count, stream); } template void save_value(FILE *stream, const std::vector &value) { size_t size = value.size(); fwrite(&size, sizeof(size_t), 1, stream); fwrite(&value[0], sizeof(T), size, stream); } template void load_value(FILE *stream, T &value, size_t count = 1) { size_t read_cnt = fread(&value, sizeof(value), count, stream); if (read_cnt != count) { throw std::runtime_error("Cannot read from file"); } } template void load_value(FILE *stream, std::vector &value) { size_t size; size_t read_cnt = fread(&size, sizeof(size_t), 1, stream); if (read_cnt != 1) { throw std::runtime_error("Cannot read from file"); } value.resize(size); read_cnt = fread(&value[0], sizeof(T), size, stream); if (read_cnt != size) { throw std::runtime_error("Cannot read from file"); } } /** @} */ /** @addtogroup metric_grp Metric (distance) classes * @{ */ struct Metric {}; /** Manhattan distance functor (generic version, optimized for * high-dimensionality data sets). Corresponding distance traits: * nanoflann::metric_L1 \tparam T Type of the elements (e.g. double, float, * uint8_t) \tparam _DistanceType Type of distance variables (must be signed) * (e.g. float, double, int64_t) */ template struct L1_Adaptor { typedef T ElementType; typedef _DistanceType DistanceType; const DataSource &data_source; L1_Adaptor(const DataSource &_data_source) : data_source(_data_source) {} inline DistanceType evalMetric(const T *a, const size_t b_idx, size_t size, DistanceType worst_dist = -1) const { DistanceType result = DistanceType(); const T *last = a + size; const T *lastgroup = last - 3; size_t d = 0; /* Process 4 items with each loop for efficiency. */ while (a < lastgroup) { const DistanceType diff0 = std::abs(a[0] - data_source.kdtree_get_pt(b_idx, d++)); const DistanceType diff1 = std::abs(a[1] - data_source.kdtree_get_pt(b_idx, d++)); const DistanceType diff2 = std::abs(a[2] - data_source.kdtree_get_pt(b_idx, d++)); const DistanceType diff3 = std::abs(a[3] - data_source.kdtree_get_pt(b_idx, d++)); result += diff0 + diff1 + diff2 + diff3; a += 4; if ((worst_dist > 0) && (result > worst_dist)) { return result; } } /* Process last 0-3 components. Not needed for standard vector lengths. */ while (a < last) { result += std::abs(*a++ - data_source.kdtree_get_pt(b_idx, d++)); } return result; } template inline DistanceType accum_dist(const U a, const V b, const size_t) const { return std::abs(a - b); } }; /** Squared Euclidean distance functor (generic version, optimized for * high-dimensionality data sets). Corresponding distance traits: * nanoflann::metric_L2 \tparam T Type of the elements (e.g. double, float, * uint8_t) \tparam _DistanceType Type of distance variables (must be signed) * (e.g. float, double, int64_t) */ template struct L2_Adaptor { typedef T ElementType; typedef _DistanceType DistanceType; const DataSource &data_source; L2_Adaptor(const DataSource &_data_source) : data_source(_data_source) {} inline DistanceType evalMetric(const T *a, const size_t b_idx, size_t size, DistanceType worst_dist = -1) const { DistanceType result = DistanceType(); const T *last = a + size; const T *lastgroup = last - 3; size_t d = 0; /* Process 4 items with each loop for efficiency. */ while (a < lastgroup) { const DistanceType diff0 = a[0] - data_source.kdtree_get_pt(b_idx, d++); const DistanceType diff1 = a[1] - data_source.kdtree_get_pt(b_idx, d++); const DistanceType diff2 = a[2] - data_source.kdtree_get_pt(b_idx, d++); const DistanceType diff3 = a[3] - data_source.kdtree_get_pt(b_idx, d++); result += diff0 * diff0 + diff1 * diff1 + diff2 * diff2 + diff3 * diff3; a += 4; if ((worst_dist > 0) && (result > worst_dist)) { return result; } } /* Process last 0-3 components. Not needed for standard vector lengths. */ while (a < last) { const DistanceType diff0 = *a++ - data_source.kdtree_get_pt(b_idx, d++); result += diff0 * diff0; } return result; } template inline DistanceType accum_dist(const U a, const V b, const size_t) const { return (a - b) * (a - b); } }; /** Squared Euclidean (L2) distance functor (suitable for low-dimensionality * datasets, like 2D or 3D point clouds) Corresponding distance traits: * nanoflann::metric_L2_Simple \tparam T Type of the elements (e.g. double, * float, uint8_t) \tparam _DistanceType Type of distance variables (must be * signed) (e.g. float, double, int64_t) */ template struct L2_Simple_Adaptor { typedef T ElementType; typedef _DistanceType DistanceType; const DataSource &data_source; L2_Simple_Adaptor(const DataSource &_data_source) : data_source(_data_source) {} inline DistanceType evalMetric(const T *a, const size_t b_idx, size_t size) const { DistanceType result = DistanceType(); for (size_t i = 0; i < size; ++i) { const DistanceType diff = a[i] - data_source.kdtree_get_pt(b_idx, i); result += diff * diff; } return result; } template inline DistanceType accum_dist(const U a, const V b, const size_t) const { return (a - b) * (a - b); } }; /** SO2 distance functor * Corresponding distance traits: nanoflann::metric_SO2 * \tparam T Type of the elements (e.g. double, float) * \tparam _DistanceType Type of distance variables (must be signed) (e.g. * float, double) orientation is constrained to be in [-pi, pi] */ template struct SO2_Adaptor { typedef T ElementType; typedef _DistanceType DistanceType; const DataSource &data_source; SO2_Adaptor(const DataSource &_data_source) : data_source(_data_source) {} inline DistanceType evalMetric(const T *a, const size_t b_idx, size_t size) const { return accum_dist(a[size - 1], data_source.kdtree_get_pt(b_idx, size - 1), size - 1); } /** Note: this assumes that input angles are already in the range [-pi,pi] */ template inline DistanceType accum_dist(const U a, const V b, const size_t) const { DistanceType result = DistanceType(); DistanceType PI = pi_const(); result = b - a; if (result > PI) result -= 2 * PI; else if (result < -PI) result += 2 * PI; return result; } }; /** SO3 distance functor (Uses L2_Simple) * Corresponding distance traits: nanoflann::metric_SO3 * \tparam T Type of the elements (e.g. double, float) * \tparam _DistanceType Type of distance variables (must be signed) (e.g. * float, double) */ template struct SO3_Adaptor { typedef T ElementType; typedef _DistanceType DistanceType; L2_Simple_Adaptor distance_L2_Simple; SO3_Adaptor(const DataSource &_data_source) : distance_L2_Simple(_data_source) {} inline DistanceType evalMetric(const T *a, const size_t b_idx, size_t size) const { return distance_L2_Simple.evalMetric(a, b_idx, size); } template inline DistanceType accum_dist(const U a, const V b, const size_t idx) const { return distance_L2_Simple.accum_dist(a, b, idx); } }; /** Metaprogramming helper traits class for the L1 (Manhattan) metric */ struct metric_L1 : public Metric { template struct traits { typedef L1_Adaptor distance_t; }; }; /** Metaprogramming helper traits class for the L2 (Euclidean) metric */ struct metric_L2 : public Metric { template struct traits { typedef L2_Adaptor distance_t; }; }; /** Metaprogramming helper traits class for the L2_simple (Euclidean) metric */ struct metric_L2_Simple : public Metric { template struct traits { typedef L2_Simple_Adaptor distance_t; }; }; /** Metaprogramming helper traits class for the SO3_InnerProdQuat metric */ struct metric_SO2 : public Metric { template struct traits { typedef SO2_Adaptor distance_t; }; }; /** Metaprogramming helper traits class for the SO3_InnerProdQuat metric */ struct metric_SO3 : public Metric { template struct traits { typedef SO3_Adaptor distance_t; }; }; /** @} */ /** @addtogroup param_grp Parameter structs * @{ */ /** Parameters (see README.md) */ struct KDTreeSingleIndexAdaptorParams { KDTreeSingleIndexAdaptorParams(size_t _leaf_max_size = 10) : leaf_max_size(_leaf_max_size) {} size_t leaf_max_size; }; /** Search options for KDTreeSingleIndexAdaptor::findNeighbors() */ struct SearchParams { /** Note: The first argument (checks_IGNORED_) is ignored, but kept for * compatibility with the FLANN interface */ SearchParams(int checks_IGNORED_ = 32, float eps_ = 0, bool sorted_ = true) : checks(checks_IGNORED_), eps(eps_), sorted(sorted_) {} int checks; //!< Ignored parameter (Kept for compatibility with the FLANN //!< interface). float eps; //!< search for eps-approximate neighbours (default: 0) bool sorted; //!< only for radius search, require neighbours sorted by //!< distance (default: true) }; /** @} */ /** @addtogroup memalloc_grp Memory allocation * @{ */ /** * Allocates (using C's malloc) a generic type T. * * Params: * count = number of instances to allocate. * Returns: pointer (of type T*) to memory buffer */ template inline T *allocate(size_t count = 1) { T *mem = static_cast(::malloc(sizeof(T) * count)); return mem; } /** * Pooled storage allocator * * The following routines allow for the efficient allocation of storage in * small chunks from a specified pool. Rather than allowing each structure * to be freed individually, an entire pool of storage is freed at once. * This method has two advantages over just using malloc() and free(). First, * it is far more efficient for allocating small objects, as there is * no overhead for remembering all the information needed to free each * object or consolidating fragmented memory. Second, the decision about * how long to keep an object is made at the time of allocation, and there * is no need to track down all the objects to free them. * */ const size_t WORDSIZE = 16; const size_t BLOCKSIZE = 8192; class PooledAllocator { /* We maintain memory alignment to word boundaries by requiring that all allocations be in multiples of the machine wordsize. */ /* Size of machine word in bytes. Must be power of 2. */ /* Minimum number of bytes requested at a time from the system. Must be * multiple of WORDSIZE. */ size_t remaining; /* Number of bytes left in current block of storage. */ void *base; /* Pointer to base of current block of storage. */ void *loc; /* Current location in block to next allocate memory. */ void internal_init() { remaining = 0; base = NULL; usedMemory = 0; wastedMemory = 0; } public: size_t usedMemory; size_t wastedMemory; /** Default constructor. Initializes a new pool. */ PooledAllocator() { internal_init(); } /** * Destructor. Frees all the memory allocated in this pool. */ ~PooledAllocator() { free_all(); } /** Frees all allocated memory chunks */ void free_all() { while (base != NULL) { void *prev = *(static_cast(base)); /* Get pointer to prev block. */ ::free(base); base = prev; } internal_init(); } /** * Returns a pointer to a piece of new memory of the given size in bytes * allocated from the pool. */ void *malloc(const size_t req_size) { /* Round size up to a multiple of wordsize. The following expression only works for WORDSIZE that is a power of 2, by masking last bits of incremented size to zero. */ const size_t size = (req_size + (WORDSIZE - 1)) & ~(WORDSIZE - 1); /* Check whether a new block must be allocated. Note that the first word of a block is reserved for a pointer to the previous block. */ if (size > remaining) { wastedMemory += remaining; /* Allocate new storage. */ const size_t blocksize = (size + sizeof(void *) + (WORDSIZE - 1) > BLOCKSIZE) ? size + sizeof(void *) + (WORDSIZE - 1) : BLOCKSIZE; // use the standard C malloc to allocate memory void *m = ::malloc(blocksize); if (!m) { fprintf(stderr, "Failed to allocate memory.\n"); return NULL; } /* Fill first word of new block with pointer to previous block. */ static_cast(m)[0] = base; base = m; size_t shift = 0; // int size_t = (WORDSIZE - ( (((size_t)m) + sizeof(void*)) & // (WORDSIZE-1))) & (WORDSIZE-1); remaining = blocksize - sizeof(void *) - shift; loc = (static_cast(m) + sizeof(void *) + shift); } void *rloc = loc; loc = static_cast(loc) + size; remaining -= size; usedMemory += size; return rloc; } /** * Allocates (using this pool) a generic type T. * * Params: * count = number of instances to allocate. * Returns: pointer (of type T*) to memory buffer */ template T *allocate(const size_t count = 1) { T *mem = static_cast(this->malloc(sizeof(T) * count)); return mem; } }; /** @} */ /** @addtogroup nanoflann_metaprog_grp Auxiliary metaprogramming stuff * @{ */ /** Used to declare fixed-size arrays when DIM>0, dynamically-allocated vectors * when DIM=-1. Fixed size version for a generic DIM: */ template struct array_or_vector_selector { typedef std::array container_t; }; /** Dynamic size version */ template struct array_or_vector_selector<-1, T> { typedef std::vector container_t; }; /** @} */ /** kd-tree base-class * * Contains the member functions common to the classes KDTreeSingleIndexAdaptor * and KDTreeSingleIndexDynamicAdaptor_. * * \tparam Derived The name of the class which inherits this class. * \tparam DatasetAdaptor The user-provided adaptor (see comments above). * \tparam Distance The distance metric to use, these are all classes derived * from nanoflann::Metric \tparam DIM Dimensionality of data points (e.g. 3 for * 3D points) \tparam IndexType Will be typically size_t or int */ template class KDTreeBaseClass { public: /** Frees the previously-built index. Automatically called within * buildIndex(). */ void freeIndex(Derived &obj) { obj.pool.free_all(); obj.root_node = NULL; obj.m_size_at_index_build = 0; } typedef typename Distance::ElementType ElementType; typedef typename Distance::DistanceType DistanceType; /*--------------------- Internal Data Structures --------------------------*/ struct Node { /** Union used because a node can be either a LEAF node or a non-leaf node, * so both data fields are never used simultaneously */ union { struct leaf { IndexType left, right; //!< Indices of points in leaf node } lr; struct nonleaf { int divfeat; //!< Dimension used for subdivision. DistanceType divlow, divhigh; //!< The values used for subdivision. } sub; } node_type; Node *child1, *child2; //!< Child nodes (both=NULL mean its a leaf node) }; typedef Node *NodePtr; struct Interval { ElementType low, high; }; /** * Array of indices to vectors in the dataset. */ std::vector vind; NodePtr root_node; size_t m_leaf_max_size; size_t m_size; //!< Number of current points in the dataset size_t m_size_at_index_build; //!< Number of points in the dataset when the //!< index was built int dim; //!< Dimensionality of each data point /** Define "BoundingBox" as a fixed-size or variable-size container depending * on "DIM" */ typedef typename array_or_vector_selector::container_t BoundingBox; /** Define "distance_vector_t" as a fixed-size or variable-size container * depending on "DIM" */ typedef typename array_or_vector_selector::container_t distance_vector_t; /** The KD-tree used to find neighbours */ BoundingBox root_bbox; /** * Pooled memory allocator. * * Using a pooled memory allocator is more efficient * than allocating memory directly when there is a large * number small of memory allocations. */ PooledAllocator pool; /** Returns number of points in dataset */ size_t size(const Derived &obj) const { return obj.m_size; } /** Returns the length of each point in the dataset */ size_t veclen(const Derived &obj) { return static_cast(DIM > 0 ? DIM : obj.dim); } /// Helper accessor to the dataset points: inline ElementType dataset_get(const Derived &obj, size_t idx, int component) const { return obj.dataset.kdtree_get_pt(idx, component); } /** * Computes the inde memory usage * Returns: memory used by the index */ size_t usedMemory(Derived &obj) { return obj.pool.usedMemory + obj.pool.wastedMemory + obj.dataset.kdtree_get_point_count() * sizeof(IndexType); // pool memory and vind array memory } void computeMinMax(const Derived &obj, IndexType *ind, IndexType count, int element, ElementType &min_elem, ElementType &max_elem) { min_elem = dataset_get(obj, ind[0], element); max_elem = dataset_get(obj, ind[0], element); for (IndexType i = 1; i < count; ++i) { ElementType val = dataset_get(obj, ind[i], element); if (val < min_elem) min_elem = val; if (val > max_elem) max_elem = val; } } /** * Create a tree node that subdivides the list of vecs from vind[first] * to vind[last]. The routine is called recursively on each sublist. * * @param left index of the first vector * @param right index of the last vector */ NodePtr divideTree(Derived &obj, const IndexType left, const IndexType right, BoundingBox &bbox) { NodePtr node = obj.pool.template allocate(); // allocate memory /* If too few exemplars remain, then make this a leaf node. */ if ((right - left) <= static_cast(obj.m_leaf_max_size)) { node->child1 = node->child2 = NULL; /* Mark as leaf node. */ node->node_type.lr.left = left; node->node_type.lr.right = right; // compute bounding-box of leaf points for (int i = 0; i < (DIM > 0 ? DIM : obj.dim); ++i) { bbox[i].low = dataset_get(obj, obj.vind[left], i); bbox[i].high = dataset_get(obj, obj.vind[left], i); } for (IndexType k = left + 1; k < right; ++k) { for (int i = 0; i < (DIM > 0 ? DIM : obj.dim); ++i) { if (bbox[i].low > dataset_get(obj, obj.vind[k], i)) bbox[i].low = dataset_get(obj, obj.vind[k], i); if (bbox[i].high < dataset_get(obj, obj.vind[k], i)) bbox[i].high = dataset_get(obj, obj.vind[k], i); } } } else { IndexType idx; int cutfeat; DistanceType cutval; middleSplit_(obj, &obj.vind[0] + left, right - left, idx, cutfeat, cutval, bbox); node->node_type.sub.divfeat = cutfeat; BoundingBox left_bbox(bbox); left_bbox[cutfeat].high = cutval; node->child1 = divideTree(obj, left, left + idx, left_bbox); BoundingBox right_bbox(bbox); right_bbox[cutfeat].low = cutval; node->child2 = divideTree(obj, left + idx, right, right_bbox); node->node_type.sub.divlow = left_bbox[cutfeat].high; node->node_type.sub.divhigh = right_bbox[cutfeat].low; for (int i = 0; i < (DIM > 0 ? DIM : obj.dim); ++i) { bbox[i].low = std::min(left_bbox[i].low, right_bbox[i].low); bbox[i].high = std::max(left_bbox[i].high, right_bbox[i].high); } } return node; } void middleSplit_(Derived &obj, IndexType *ind, IndexType count, IndexType &index, int &cutfeat, DistanceType &cutval, const BoundingBox &bbox) { const DistanceType EPS = static_cast(0.00001); ElementType max_span = bbox[0].high - bbox[0].low; for (int i = 1; i < (DIM > 0 ? DIM : obj.dim); ++i) { ElementType span = bbox[i].high - bbox[i].low; if (span > max_span) { max_span = span; } } ElementType max_spread = -1; cutfeat = 0; for (int i = 0; i < (DIM > 0 ? DIM : obj.dim); ++i) { ElementType span = bbox[i].high - bbox[i].low; if (span > (1 - EPS) * max_span) { ElementType min_elem, max_elem; computeMinMax(obj, ind, count, i, min_elem, max_elem); ElementType spread = max_elem - min_elem; ; if (spread > max_spread) { cutfeat = i; max_spread = spread; } } } // split in the middle DistanceType split_val = (bbox[cutfeat].low + bbox[cutfeat].high) / 2; ElementType min_elem, max_elem; computeMinMax(obj, ind, count, cutfeat, min_elem, max_elem); if (split_val < min_elem) cutval = min_elem; else if (split_val > max_elem) cutval = max_elem; else cutval = split_val; IndexType lim1, lim2; planeSplit(obj, ind, count, cutfeat, cutval, lim1, lim2); if (lim1 > count / 2) index = lim1; else if (lim2 < count / 2) index = lim2; else index = count / 2; } /** * Subdivide the list of points by a plane perpendicular on axe corresponding * to the 'cutfeat' dimension at 'cutval' position. * * On return: * dataset[ind[0..lim1-1]][cutfeat]cutval */ void planeSplit(Derived &obj, IndexType *ind, const IndexType count, int cutfeat, DistanceType &cutval, IndexType &lim1, IndexType &lim2) { /* Move vector indices for left subtree to front of list. */ IndexType left = 0; IndexType right = count - 1; for (;;) { while (left <= right && dataset_get(obj, ind[left], cutfeat) < cutval) ++left; while (right && left <= right && dataset_get(obj, ind[right], cutfeat) >= cutval) --right; if (left > right || !right) break; // "!right" was added to support unsigned Index types std::swap(ind[left], ind[right]); ++left; --right; } /* If either list is empty, it means that all remaining features * are identical. Split in the middle to maintain a balanced tree. */ lim1 = left; right = count - 1; for (;;) { while (left <= right && dataset_get(obj, ind[left], cutfeat) <= cutval) ++left; while (right && left <= right && dataset_get(obj, ind[right], cutfeat) > cutval) --right; if (left > right || !right) break; // "!right" was added to support unsigned Index types std::swap(ind[left], ind[right]); ++left; --right; } lim2 = left; } DistanceType computeInitialDistances(const Derived &obj, const ElementType *vec, distance_vector_t &dists) const { assert(vec); DistanceType distsq = DistanceType(); for (int i = 0; i < (DIM > 0 ? DIM : obj.dim); ++i) { if (vec[i] < obj.root_bbox[i].low) { dists[i] = obj.distance.accum_dist(vec[i], obj.root_bbox[i].low, i); distsq += dists[i]; } if (vec[i] > obj.root_bbox[i].high) { dists[i] = obj.distance.accum_dist(vec[i], obj.root_bbox[i].high, i); distsq += dists[i]; } } return distsq; } void save_tree(Derived &obj, FILE *stream, NodePtr tree) { save_value(stream, *tree); if (tree->child1 != NULL) { save_tree(obj, stream, tree->child1); } if (tree->child2 != NULL) { save_tree(obj, stream, tree->child2); } } void load_tree(Derived &obj, FILE *stream, NodePtr &tree) { tree = obj.pool.template allocate(); load_value(stream, *tree); if (tree->child1 != NULL) { load_tree(obj, stream, tree->child1); } if (tree->child2 != NULL) { load_tree(obj, stream, tree->child2); } } /** Stores the index in a binary file. * IMPORTANT NOTE: The set of data points is NOT stored in the file, so when * loading the index object it must be constructed associated to the same * source of data points used while building it. See the example: * examples/saveload_example.cpp \sa loadIndex */ void saveIndex_(Derived &obj, FILE *stream) { save_value(stream, obj.m_size); save_value(stream, obj.dim); save_value(stream, obj.root_bbox); save_value(stream, obj.m_leaf_max_size); save_value(stream, obj.vind); save_tree(obj, stream, obj.root_node); } /** Loads a previous index from a binary file. * IMPORTANT NOTE: The set of data points is NOT stored in the file, so the * index object must be constructed associated to the same source of data * points used while building the index. See the example: * examples/saveload_example.cpp \sa loadIndex */ void loadIndex_(Derived &obj, FILE *stream) { load_value(stream, obj.m_size); load_value(stream, obj.dim); load_value(stream, obj.root_bbox); load_value(stream, obj.m_leaf_max_size); load_value(stream, obj.vind); load_tree(obj, stream, obj.root_node); } }; /** @addtogroup kdtrees_grp KD-tree classes and adaptors * @{ */ /** kd-tree static index * * Contains the k-d trees and other information for indexing a set of points * for nearest-neighbor matching. * * The class "DatasetAdaptor" must provide the following interface (can be * non-virtual, inlined methods): * * \code * // Must return the number of data poins * inline size_t kdtree_get_point_count() const { ... } * * * // Must return the dim'th component of the idx'th point in the class: * inline T kdtree_get_pt(const size_t idx, const size_t dim) const { ... } * * // Optional bounding-box computation: return false to default to a standard * bbox computation loop. * // Return true if the BBOX was already computed by the class and returned * in "bb" so it can be avoided to redo it again. * // Look at bb.size() to find out the expected dimensionality (e.g. 2 or 3 * for point clouds) template bool kdtree_get_bbox(BBOX &bb) const * { * bb[0].low = ...; bb[0].high = ...; // 0th dimension limits * bb[1].low = ...; bb[1].high = ...; // 1st dimension limits * ... * return true; * } * * \endcode * * \tparam DatasetAdaptor The user-provided adaptor (see comments above). * \tparam Distance The distance metric to use: nanoflann::metric_L1, * nanoflann::metric_L2, nanoflann::metric_L2_Simple, etc. \tparam DIM * Dimensionality of data points (e.g. 3 for 3D points) \tparam IndexType Will * be typically size_t or int */ template class KDTreeSingleIndexAdaptor : public KDTreeBaseClass< KDTreeSingleIndexAdaptor, Distance, DatasetAdaptor, DIM, IndexType> { public: /** Deleted copy constructor*/ KDTreeSingleIndexAdaptor( const KDTreeSingleIndexAdaptor &) = delete; /** * The dataset used by this index */ const DatasetAdaptor &dataset; //!< The source of our data const KDTreeSingleIndexAdaptorParams index_params; Distance distance; typedef typename nanoflann::KDTreeBaseClass< nanoflann::KDTreeSingleIndexAdaptor, Distance, DatasetAdaptor, DIM, IndexType> BaseClassRef; typedef typename BaseClassRef::ElementType ElementType; typedef typename BaseClassRef::DistanceType DistanceType; typedef typename BaseClassRef::Node Node; typedef Node *NodePtr; typedef typename BaseClassRef::Interval Interval; /** Define "BoundingBox" as a fixed-size or variable-size container depending * on "DIM" */ typedef typename BaseClassRef::BoundingBox BoundingBox; /** Define "distance_vector_t" as a fixed-size or variable-size container * depending on "DIM" */ typedef typename BaseClassRef::distance_vector_t distance_vector_t; /** * KDTree constructor * * Refer to docs in README.md or online in * https://github.com/jlblancoc/nanoflann * * The KD-Tree point dimension (the length of each point in the datase, e.g. 3 * for 3D points) is determined by means of: * - The \a DIM template parameter if >0 (highest priority) * - Otherwise, the \a dimensionality parameter of this constructor. * * @param inputData Dataset with the input features * @param params Basically, the maximum leaf node size */ KDTreeSingleIndexAdaptor(const int dimensionality, const DatasetAdaptor &inputData, const KDTreeSingleIndexAdaptorParams ¶ms = KDTreeSingleIndexAdaptorParams()) : dataset(inputData), index_params(params), distance(inputData) { BaseClassRef::root_node = NULL; BaseClassRef::m_size = dataset.kdtree_get_point_count(); BaseClassRef::m_size_at_index_build = BaseClassRef::m_size; BaseClassRef::dim = dimensionality; if (DIM > 0) BaseClassRef::dim = DIM; BaseClassRef::m_leaf_max_size = params.leaf_max_size; // Create a permutable array of indices to the input vectors. init_vind(); } /** * Builds the index */ void buildIndex() { BaseClassRef::m_size = dataset.kdtree_get_point_count(); BaseClassRef::m_size_at_index_build = BaseClassRef::m_size; init_vind(); this->freeIndex(*this); BaseClassRef::m_size_at_index_build = BaseClassRef::m_size; if (BaseClassRef::m_size == 0) return; computeBoundingBox(BaseClassRef::root_bbox); BaseClassRef::root_node = this->divideTree(*this, 0, BaseClassRef::m_size, BaseClassRef::root_bbox); // construct the tree } /** \name Query methods * @{ */ /** * Find set of nearest neighbors to vec[0:dim-1]. Their indices are stored * inside the result object. * * Params: * result = the result object in which the indices of the * nearest-neighbors are stored vec = the vector for which to search the * nearest neighbors * * \tparam RESULTSET Should be any ResultSet * \return True if the requested neighbors could be found. * \sa knnSearch, radiusSearch */ template bool findNeighbors(RESULTSET &result, const ElementType *vec, const SearchParams &searchParams) const { assert(vec); if (this->size(*this) == 0) return false; if (!BaseClassRef::root_node) throw std::runtime_error( "[nanoflann] findNeighbors() called before building the index."); float epsError = 1 + searchParams.eps; distance_vector_t dists; // fixed or variable-sized container (depending on DIM) auto zero = static_cast(0); assign(dists, (DIM > 0 ? DIM : BaseClassRef::dim), zero); // Fill it with zeros. DistanceType distsq = this->computeInitialDistances(*this, vec, dists); searchLevel(result, vec, BaseClassRef::root_node, distsq, dists, epsError); // "count_leaf" parameter removed since was neither // used nor returned to the user. return result.full(); } /** * Find the "num_closest" nearest neighbors to the \a query_point[0:dim-1]. * Their indices are stored inside the result object. \sa radiusSearch, * findNeighbors \note nChecks_IGNORED is ignored but kept for compatibility * with the original FLANN interface. \return Number `N` of valid points in * the result set. Only the first `N` entries in `out_indices` and * `out_distances_sq` will be valid. Return may be less than `num_closest` * only if the number of elements in the tree is less than `num_closest`. */ size_t knnSearch(const ElementType *query_point, const size_t num_closest, IndexType *out_indices, DistanceType *out_distances_sq, const int /* nChecks_IGNORED */ = 10) const { nanoflann::KNNResultSet resultSet(num_closest); resultSet.init(out_indices, out_distances_sq); this->findNeighbors(resultSet, query_point, nanoflann::SearchParams()); return resultSet.size(); } /** * Find all the neighbors to \a query_point[0:dim-1] within a maximum radius. * The output is given as a vector of pairs, of which the first element is a * point index and the second the corresponding distance. Previous contents of * \a IndicesDists are cleared. * * If searchParams.sorted==true, the output list is sorted by ascending * distances. * * For a better performance, it is advisable to do a .reserve() on the vector * if you have any wild guess about the number of expected matches. * * \sa knnSearch, findNeighbors, radiusSearchCustomCallback * \return The number of points within the given radius (i.e. indices.size() * or dists.size() ) */ size_t radiusSearch(const ElementType *query_point, const DistanceType &radius, std::vector> &IndicesDists, const SearchParams &searchParams) const { RadiusResultSet resultSet(radius, IndicesDists); const size_t nFound = radiusSearchCustomCallback(query_point, resultSet, searchParams); if (searchParams.sorted) std::sort(IndicesDists.begin(), IndicesDists.end(), IndexDist_Sorter()); return nFound; } /** * Just like radiusSearch() but with a custom callback class for each point * found in the radius of the query. See the source of RadiusResultSet<> as a * start point for your own classes. \sa radiusSearch */ template size_t radiusSearchCustomCallback( const ElementType *query_point, SEARCH_CALLBACK &resultSet, const SearchParams &searchParams = SearchParams()) const { this->findNeighbors(resultSet, query_point, searchParams); return resultSet.size(); } /** @} */ public: /** Make sure the auxiliary list \a vind has the same size than the current * dataset, and re-generate if size has changed. */ void init_vind() { // Create a permutable array of indices to the input vectors. BaseClassRef::m_size = dataset.kdtree_get_point_count(); if (BaseClassRef::vind.size() != BaseClassRef::m_size) BaseClassRef::vind.resize(BaseClassRef::m_size); for (size_t i = 0; i < BaseClassRef::m_size; i++) BaseClassRef::vind[i] = i; } void computeBoundingBox(BoundingBox &bbox) { resize(bbox, (DIM > 0 ? DIM : BaseClassRef::dim)); if (dataset.kdtree_get_bbox(bbox)) { // Done! It was implemented in derived class } else { const size_t N = dataset.kdtree_get_point_count(); if (!N) throw std::runtime_error("[nanoflann] computeBoundingBox() called but " "no data points found."); for (int i = 0; i < (DIM > 0 ? DIM : BaseClassRef::dim); ++i) { bbox[i].low = bbox[i].high = this->dataset_get(*this, 0, i); } for (size_t k = 1; k < N; ++k) { for (int i = 0; i < (DIM > 0 ? DIM : BaseClassRef::dim); ++i) { if (this->dataset_get(*this, k, i) < bbox[i].low) bbox[i].low = this->dataset_get(*this, k, i); if (this->dataset_get(*this, k, i) > bbox[i].high) bbox[i].high = this->dataset_get(*this, k, i); } } } } /** * Performs an exact search in the tree starting from a node. * \tparam RESULTSET Should be any ResultSet * \return true if the search should be continued, false if the results are * sufficient */ template bool searchLevel(RESULTSET &result_set, const ElementType *vec, const NodePtr node, DistanceType mindistsq, distance_vector_t &dists, const float epsError) const { /* If this is a leaf node, then do check and return. */ if ((node->child1 == NULL) && (node->child2 == NULL)) { // count_leaf += (node->lr.right-node->lr.left); // Removed since was // neither used nor returned to the user. DistanceType worst_dist = result_set.worstDist(); for (IndexType i = node->node_type.lr.left; i < node->node_type.lr.right; ++i) { const IndexType index = BaseClassRef::vind[i]; // reorder... : i; DistanceType dist = distance.evalMetric( vec, index, (DIM > 0 ? DIM : BaseClassRef::dim)); if (dist < worst_dist) { if (!result_set.addPoint(dist, BaseClassRef::vind[i])) { // the resultset doesn't want to receive any more points, we're done // searching! return false; } } } return true; } /* Which child branch should be taken first? */ int idx = node->node_type.sub.divfeat; ElementType val = vec[idx]; DistanceType diff1 = val - node->node_type.sub.divlow; DistanceType diff2 = val - node->node_type.sub.divhigh; NodePtr bestChild; NodePtr otherChild; DistanceType cut_dist; if ((diff1 + diff2) < 0) { bestChild = node->child1; otherChild = node->child2; cut_dist = distance.accum_dist(val, node->node_type.sub.divhigh, idx); } else { bestChild = node->child2; otherChild = node->child1; cut_dist = distance.accum_dist(val, node->node_type.sub.divlow, idx); } /* Call recursively to search next level down. */ if (!searchLevel(result_set, vec, bestChild, mindistsq, dists, epsError)) { // the resultset doesn't want to receive any more points, we're done // searching! return false; } DistanceType dst = dists[idx]; mindistsq = mindistsq + cut_dist - dst; dists[idx] = cut_dist; if (mindistsq * epsError <= result_set.worstDist()) { if (!searchLevel(result_set, vec, otherChild, mindistsq, dists, epsError)) { // the resultset doesn't want to receive any more points, we're done // searching! return false; } } dists[idx] = dst; return true; } public: /** Stores the index in a binary file. * IMPORTANT NOTE: The set of data points is NOT stored in the file, so when * loading the index object it must be constructed associated to the same * source of data points used while building it. See the example: * examples/saveload_example.cpp \sa loadIndex */ void saveIndex(FILE *stream) { this->saveIndex_(*this, stream); } /** Loads a previous index from a binary file. * IMPORTANT NOTE: The set of data points is NOT stored in the file, so the * index object must be constructed associated to the same source of data * points used while building the index. See the example: * examples/saveload_example.cpp \sa loadIndex */ void loadIndex(FILE *stream) { this->loadIndex_(*this, stream); } }; // class KDTree /** kd-tree dynamic index * * Contains the k-d trees and other information for indexing a set of points * for nearest-neighbor matching. * * The class "DatasetAdaptor" must provide the following interface (can be * non-virtual, inlined methods): * * \code * // Must return the number of data poins * inline size_t kdtree_get_point_count() const { ... } * * // Must return the dim'th component of the idx'th point in the class: * inline T kdtree_get_pt(const size_t idx, const size_t dim) const { ... } * * // Optional bounding-box computation: return false to default to a standard * bbox computation loop. * // Return true if the BBOX was already computed by the class and returned * in "bb" so it can be avoided to redo it again. * // Look at bb.size() to find out the expected dimensionality (e.g. 2 or 3 * for point clouds) template bool kdtree_get_bbox(BBOX &bb) const * { * bb[0].low = ...; bb[0].high = ...; // 0th dimension limits * bb[1].low = ...; bb[1].high = ...; // 1st dimension limits * ... * return true; * } * * \endcode * * \tparam DatasetAdaptor The user-provided adaptor (see comments above). * \tparam Distance The distance metric to use: nanoflann::metric_L1, * nanoflann::metric_L2, nanoflann::metric_L2_Simple, etc. \tparam DIM * Dimensionality of data points (e.g. 3 for 3D points) \tparam IndexType Will * be typically size_t or int */ template class KDTreeSingleIndexDynamicAdaptor_ : public KDTreeBaseClass, Distance, DatasetAdaptor, DIM, IndexType> { public: /** * The dataset used by this index */ const DatasetAdaptor &dataset; //!< The source of our data KDTreeSingleIndexAdaptorParams index_params; std::vector &treeIndex; Distance distance; typedef typename nanoflann::KDTreeBaseClass< nanoflann::KDTreeSingleIndexDynamicAdaptor_, Distance, DatasetAdaptor, DIM, IndexType> BaseClassRef; typedef typename BaseClassRef::ElementType ElementType; typedef typename BaseClassRef::DistanceType DistanceType; typedef typename BaseClassRef::Node Node; typedef Node *NodePtr; typedef typename BaseClassRef::Interval Interval; /** Define "BoundingBox" as a fixed-size or variable-size container depending * on "DIM" */ typedef typename BaseClassRef::BoundingBox BoundingBox; /** Define "distance_vector_t" as a fixed-size or variable-size container * depending on "DIM" */ typedef typename BaseClassRef::distance_vector_t distance_vector_t; /** * KDTree constructor * * Refer to docs in README.md or online in * https://github.com/jlblancoc/nanoflann * * The KD-Tree point dimension (the length of each point in the datase, e.g. 3 * for 3D points) is determined by means of: * - The \a DIM template parameter if >0 (highest priority) * - Otherwise, the \a dimensionality parameter of this constructor. * * @param inputData Dataset with the input features * @param params Basically, the maximum leaf node size */ KDTreeSingleIndexDynamicAdaptor_( const int dimensionality, const DatasetAdaptor &inputData, std::vector &treeIndex_, const KDTreeSingleIndexAdaptorParams ¶ms = KDTreeSingleIndexAdaptorParams()) : dataset(inputData), index_params(params), treeIndex(treeIndex_), distance(inputData) { BaseClassRef::root_node = NULL; BaseClassRef::m_size = 0; BaseClassRef::m_size_at_index_build = 0; BaseClassRef::dim = dimensionality; if (DIM > 0) BaseClassRef::dim = DIM; BaseClassRef::m_leaf_max_size = params.leaf_max_size; } /** Assignment operator definiton */ KDTreeSingleIndexDynamicAdaptor_ operator=(const KDTreeSingleIndexDynamicAdaptor_ &rhs) { KDTreeSingleIndexDynamicAdaptor_ tmp(rhs); std::swap(BaseClassRef::vind, tmp.BaseClassRef::vind); std::swap(BaseClassRef::m_leaf_max_size, tmp.BaseClassRef::m_leaf_max_size); std::swap(index_params, tmp.index_params); std::swap(treeIndex, tmp.treeIndex); std::swap(BaseClassRef::m_size, tmp.BaseClassRef::m_size); std::swap(BaseClassRef::m_size_at_index_build, tmp.BaseClassRef::m_size_at_index_build); std::swap(BaseClassRef::root_node, tmp.BaseClassRef::root_node); std::swap(BaseClassRef::root_bbox, tmp.BaseClassRef::root_bbox); std::swap(BaseClassRef::pool, tmp.BaseClassRef::pool); return *this; } /** * Builds the index */ void buildIndex() { BaseClassRef::m_size = BaseClassRef::vind.size(); this->freeIndex(*this); BaseClassRef::m_size_at_index_build = BaseClassRef::m_size; if (BaseClassRef::m_size == 0) return; computeBoundingBox(BaseClassRef::root_bbox); BaseClassRef::root_node = this->divideTree(*this, 0, BaseClassRef::m_size, BaseClassRef::root_bbox); // construct the tree } /** \name Query methods * @{ */ /** * Find set of nearest neighbors to vec[0:dim-1]. Their indices are stored * inside the result object. * * Params: * result = the result object in which the indices of the * nearest-neighbors are stored vec = the vector for which to search the * nearest neighbors * * \tparam RESULTSET Should be any ResultSet * \return True if the requested neighbors could be found. * \sa knnSearch, radiusSearch */ template bool findNeighbors(RESULTSET &result, const ElementType *vec, const SearchParams &searchParams) const { assert(vec); if (this->size(*this) == 0) return false; if (!BaseClassRef::root_node) return false; float epsError = 1 + searchParams.eps; // fixed or variable-sized container (depending on DIM) distance_vector_t dists; // Fill it with zeros. assign(dists, (DIM > 0 ? DIM : BaseClassRef::dim), static_cast(0)); DistanceType distsq = this->computeInitialDistances(*this, vec, dists); searchLevel(result, vec, BaseClassRef::root_node, distsq, dists, epsError); // "count_leaf" parameter removed since was neither // used nor returned to the user. return result.full(); } /** * Find the "num_closest" nearest neighbors to the \a query_point[0:dim-1]. * Their indices are stored inside the result object. \sa radiusSearch, * findNeighbors \note nChecks_IGNORED is ignored but kept for compatibility * with the original FLANN interface. \return Number `N` of valid points in * the result set. Only the first `N` entries in `out_indices` and * `out_distances_sq` will be valid. Return may be less than `num_closest` * only if the number of elements in the tree is less than `num_closest`. */ size_t knnSearch(const ElementType *query_point, const size_t num_closest, IndexType *out_indices, DistanceType *out_distances_sq, const int /* nChecks_IGNORED */ = 10) const { nanoflann::KNNResultSet resultSet(num_closest); resultSet.init(out_indices, out_distances_sq); this->findNeighbors(resultSet, query_point, nanoflann::SearchParams()); return resultSet.size(); } /** * Find all the neighbors to \a query_point[0:dim-1] within a maximum radius. * The output is given as a vector of pairs, of which the first element is a * point index and the second the corresponding distance. Previous contents of * \a IndicesDists are cleared. * * If searchParams.sorted==true, the output list is sorted by ascending * distances. * * For a better performance, it is advisable to do a .reserve() on the vector * if you have any wild guess about the number of expected matches. * * \sa knnSearch, findNeighbors, radiusSearchCustomCallback * \return The number of points within the given radius (i.e. indices.size() * or dists.size() ) */ size_t radiusSearch(const ElementType *query_point, const DistanceType &radius, std::vector> &IndicesDists, const SearchParams &searchParams) const { RadiusResultSet resultSet(radius, IndicesDists); const size_t nFound = radiusSearchCustomCallback(query_point, resultSet, searchParams); if (searchParams.sorted) std::sort(IndicesDists.begin(), IndicesDists.end(), IndexDist_Sorter()); return nFound; } /** * Just like radiusSearch() but with a custom callback class for each point * found in the radius of the query. See the source of RadiusResultSet<> as a * start point for your own classes. \sa radiusSearch */ template size_t radiusSearchCustomCallback( const ElementType *query_point, SEARCH_CALLBACK &resultSet, const SearchParams &searchParams = SearchParams()) const { this->findNeighbors(resultSet, query_point, searchParams); return resultSet.size(); } /** @} */ public: void computeBoundingBox(BoundingBox &bbox) { resize(bbox, (DIM > 0 ? DIM : BaseClassRef::dim)); if (dataset.kdtree_get_bbox(bbox)) { // Done! It was implemented in derived class } else { const size_t N = BaseClassRef::m_size; if (!N) throw std::runtime_error("[nanoflann] computeBoundingBox() called but " "no data points found."); for (int i = 0; i < (DIM > 0 ? DIM : BaseClassRef::dim); ++i) { bbox[i].low = bbox[i].high = this->dataset_get(*this, BaseClassRef::vind[0], i); } for (size_t k = 1; k < N; ++k) { for (int i = 0; i < (DIM > 0 ? DIM : BaseClassRef::dim); ++i) { if (this->dataset_get(*this, BaseClassRef::vind[k], i) < bbox[i].low) bbox[i].low = this->dataset_get(*this, BaseClassRef::vind[k], i); if (this->dataset_get(*this, BaseClassRef::vind[k], i) > bbox[i].high) bbox[i].high = this->dataset_get(*this, BaseClassRef::vind[k], i); } } } } /** * Performs an exact search in the tree starting from a node. * \tparam RESULTSET Should be any ResultSet */ template void searchLevel(RESULTSET &result_set, const ElementType *vec, const NodePtr node, DistanceType mindistsq, distance_vector_t &dists, const float epsError) const { /* If this is a leaf node, then do check and return. */ if ((node->child1 == NULL) && (node->child2 == NULL)) { // count_leaf += (node->lr.right-node->lr.left); // Removed since was // neither used nor returned to the user. DistanceType worst_dist = result_set.worstDist(); for (IndexType i = node->node_type.lr.left; i < node->node_type.lr.right; ++i) { const IndexType index = BaseClassRef::vind[i]; // reorder... : i; if (treeIndex[index] == -1) continue; DistanceType dist = distance.evalMetric( vec, index, (DIM > 0 ? DIM : BaseClassRef::dim)); if (dist < worst_dist) { if (!result_set.addPoint( static_cast(dist), static_cast( BaseClassRef::vind[i]))) { // the resultset doesn't want to receive any more points, we're done // searching! return; // false; } } } return; } /* Which child branch should be taken first? */ int idx = node->node_type.sub.divfeat; ElementType val = vec[idx]; DistanceType diff1 = val - node->node_type.sub.divlow; DistanceType diff2 = val - node->node_type.sub.divhigh; NodePtr bestChild; NodePtr otherChild; DistanceType cut_dist; if ((diff1 + diff2) < 0) { bestChild = node->child1; otherChild = node->child2; cut_dist = distance.accum_dist(val, node->node_type.sub.divhigh, idx); } else { bestChild = node->child2; otherChild = node->child1; cut_dist = distance.accum_dist(val, node->node_type.sub.divlow, idx); } /* Call recursively to search next level down. */ searchLevel(result_set, vec, bestChild, mindistsq, dists, epsError); DistanceType dst = dists[idx]; mindistsq = mindistsq + cut_dist - dst; dists[idx] = cut_dist; if (mindistsq * epsError <= result_set.worstDist()) { searchLevel(result_set, vec, otherChild, mindistsq, dists, epsError); } dists[idx] = dst; } public: /** Stores the index in a binary file. * IMPORTANT NOTE: The set of data points is NOT stored in the file, so when * loading the index object it must be constructed associated to the same * source of data points used while building it. See the example: * examples/saveload_example.cpp \sa loadIndex */ void saveIndex(FILE *stream) { this->saveIndex_(*this, stream); } /** Loads a previous index from a binary file. * IMPORTANT NOTE: The set of data points is NOT stored in the file, so the * index object must be constructed associated to the same source of data * points used while building the index. See the example: * examples/saveload_example.cpp \sa loadIndex */ void loadIndex(FILE *stream) { this->loadIndex_(*this, stream); } }; /** kd-tree dynaimic index * * class to create multiple static index and merge their results to behave as * single dynamic index as proposed in Logarithmic Approach. * * Example of usage: * examples/dynamic_pointcloud_example.cpp * * \tparam DatasetAdaptor The user-provided adaptor (see comments above). * \tparam Distance The distance metric to use: nanoflann::metric_L1, * nanoflann::metric_L2, nanoflann::metric_L2_Simple, etc. \tparam DIM * Dimensionality of data points (e.g. 3 for 3D points) \tparam IndexType Will * be typically size_t or int */ template class KDTreeSingleIndexDynamicAdaptor { public: typedef typename Distance::ElementType ElementType; typedef typename Distance::DistanceType DistanceType; protected: size_t m_leaf_max_size; size_t treeCount; size_t pointCount; /** * The dataset used by this index */ const DatasetAdaptor &dataset; //!< The source of our data std::vector treeIndex; //!< treeIndex[idx] is the index of tree in which //!< point at idx is stored. treeIndex[idx]=-1 //!< means that point has been removed. KDTreeSingleIndexAdaptorParams index_params; int dim; //!< Dimensionality of each data point typedef KDTreeSingleIndexDynamicAdaptor_ index_container_t; std::vector index; public: /** Get a const ref to the internal list of indices; the number of indices is * adapted dynamically as the dataset grows in size. */ const std::vector &getAllIndices() const { return index; } private: /** finds position of least significant unset bit */ int First0Bit(IndexType num) { int pos = 0; while (num & 1) { num = num >> 1; pos++; } return pos; } /** Creates multiple empty trees to handle dynamic support */ void init() { typedef KDTreeSingleIndexDynamicAdaptor_ my_kd_tree_t; std::vector index_( treeCount, my_kd_tree_t(dim /*dim*/, dataset, treeIndex, index_params)); index = index_; } public: Distance distance; /** * KDTree constructor * * Refer to docs in README.md or online in * https://github.com/jlblancoc/nanoflann * * The KD-Tree point dimension (the length of each point in the datase, e.g. 3 * for 3D points) is determined by means of: * - The \a DIM template parameter if >0 (highest priority) * - Otherwise, the \a dimensionality parameter of this constructor. * * @param inputData Dataset with the input features * @param params Basically, the maximum leaf node size */ KDTreeSingleIndexDynamicAdaptor(const int dimensionality, const DatasetAdaptor &inputData, const KDTreeSingleIndexAdaptorParams ¶ms = KDTreeSingleIndexAdaptorParams(), const size_t maximumPointCount = 1000000000U) : dataset(inputData), index_params(params), distance(inputData) { treeCount = static_cast(std::log2(maximumPointCount)); pointCount = 0U; dim = dimensionality; treeIndex.clear(); if (DIM > 0) dim = DIM; m_leaf_max_size = params.leaf_max_size; init(); const size_t num_initial_points = dataset.kdtree_get_point_count(); if (num_initial_points > 0) { addPoints(0, num_initial_points - 1); } } /** Deleted copy constructor*/ KDTreeSingleIndexDynamicAdaptor( const KDTreeSingleIndexDynamicAdaptor &) = delete; /** Add points to the set, Inserts all points from [start, end] */ void addPoints(IndexType start, IndexType end) { size_t count = end - start + 1; treeIndex.resize(treeIndex.size() + count); for (IndexType idx = start; idx <= end; idx++) { int pos = First0Bit(pointCount); index[pos].vind.clear(); treeIndex[pointCount] = pos; for (int i = 0; i < pos; i++) { for (int j = 0; j < static_cast(index[i].vind.size()); j++) { index[pos].vind.push_back(index[i].vind[j]); if (treeIndex[index[i].vind[j]] != -1) treeIndex[index[i].vind[j]] = pos; } index[i].vind.clear(); index[i].freeIndex(index[i]); } index[pos].vind.push_back(idx); index[pos].buildIndex(); pointCount++; } } /** Remove a point from the set (Lazy Deletion) */ void removePoint(size_t idx) { if (idx >= pointCount) return; treeIndex[idx] = -1; } /** * Find set of nearest neighbors to vec[0:dim-1]. Their indices are stored * inside the result object. * * Params: * result = the result object in which the indices of the * nearest-neighbors are stored vec = the vector for which to search the * nearest neighbors * * \tparam RESULTSET Should be any ResultSet * \return True if the requested neighbors could be found. * \sa knnSearch, radiusSearch */ template bool findNeighbors(RESULTSET &result, const ElementType *vec, const SearchParams &searchParams) const { for (size_t i = 0; i < treeCount; i++) { index[i].findNeighbors(result, &vec[0], searchParams); } return result.full(); } }; /** An L2-metric KD-tree adaptor for working with data directly stored in an * Eigen Matrix, without duplicating the data storage. You can select whether a * row or column in the matrix represents a point in the state space. * * Example of usage: * \code * Eigen::Matrix mat; * // Fill out "mat"... * * typedef KDTreeEigenMatrixAdaptor< Eigen::Matrix > * my_kd_tree_t; const int max_leaf = 10; my_kd_tree_t mat_index(mat, max_leaf * ); mat_index.index->buildIndex(); mat_index.index->... \endcode * * \tparam DIM If set to >0, it specifies a compile-time fixed dimensionality * for the points in the data set, allowing more compiler optimizations. \tparam * Distance The distance metric to use: nanoflann::metric_L1, * nanoflann::metric_L2, nanoflann::metric_L2_Simple, etc. \tparam row_major * If set to true the rows of the matrix are used as the points, if set to false * the columns of the matrix are used as the points. */ template struct KDTreeEigenMatrixAdaptor { typedef KDTreeEigenMatrixAdaptor self_t; typedef typename MatrixType::Scalar num_t; typedef typename MatrixType::Index IndexType; typedef typename Distance::template traits::distance_t metric_t; typedef KDTreeSingleIndexAdaptor index_t; index_t *index; //! The kd-tree index for the user to call its methods as //! usual with any other FLANN index. /// Constructor: takes a const ref to the matrix object with the data points KDTreeEigenMatrixAdaptor(const size_t dimensionality, const std::reference_wrapper &mat, const int leaf_max_size = 10) : m_data_matrix(mat) { const auto dims = row_major ? mat.get().cols() : mat.get().rows(); if (size_t(dims) != dimensionality) throw std::runtime_error( "Error: 'dimensionality' must match column count in data matrix"); if (DIM > 0 && int(dims) != DIM) throw std::runtime_error( "Data set dimensionality does not match the 'DIM' template argument"); index = new index_t(static_cast(dims), *this /* adaptor */, nanoflann::KDTreeSingleIndexAdaptorParams(leaf_max_size)); index->buildIndex(); } public: /** Deleted copy constructor */ KDTreeEigenMatrixAdaptor(const self_t &) = delete; ~KDTreeEigenMatrixAdaptor() { delete index; } const std::reference_wrapper m_data_matrix; /** Query for the \a num_closest closest points to a given point (entered as * query_point[0:dim-1]). Note that this is a short-cut method for * index->findNeighbors(). The user can also call index->... methods as * desired. \note nChecks_IGNORED is ignored but kept for compatibility with * the original FLANN interface. */ inline void query(const num_t *query_point, const size_t num_closest, IndexType *out_indices, num_t *out_distances_sq, const int /* nChecks_IGNORED */ = 10) const { nanoflann::KNNResultSet resultSet(num_closest); resultSet.init(out_indices, out_distances_sq); index->findNeighbors(resultSet, query_point, nanoflann::SearchParams()); } /** @name Interface expected by KDTreeSingleIndexAdaptor * @{ */ const self_t &derived() const { return *this; } self_t &derived() { return *this; } // Must return the number of data points inline size_t kdtree_get_point_count() const { if (row_major) return m_data_matrix.get().rows(); else return m_data_matrix.get().cols(); } // Returns the dim'th component of the idx'th point in the class: inline num_t kdtree_get_pt(const IndexType idx, size_t dim) const { if (row_major) return m_data_matrix.get().coeff(idx, IndexType(dim)); else return m_data_matrix.get().coeff(IndexType(dim), idx); } // Optional bounding-box computation: return false to default to a standard // bbox computation loop. // Return true if the BBOX was already computed by the class and returned in // "bb" so it can be avoided to redo it again. Look at bb.size() to find out // the expected dimensionality (e.g. 2 or 3 for point clouds) template bool kdtree_get_bbox(BBOX & /*bb*/) const { return false; } /** @} */ }; // end of KDTreeEigenMatrixAdaptor /** @} */ /** @} */ // end of grouping } // namespace nanoflann #endif /* NANOFLANN_HPP_ */ pytorch_cluster-1.6.3/csrc/cuda/000077500000000000000000000000001451151623700166255ustar00rootroot00000000000000pytorch_cluster-1.6.3/csrc/cuda/fps_cuda.cu000066400000000000000000000064351451151623700207520ustar00rootroot00000000000000#include "fps_cuda.h" #include #include "utils.cuh" #define THREADS 256 template __global__ void fps_kernel(const scalar_t *src, const int64_t *ptr, const int64_t *out_ptr, const int64_t *start, scalar_t *dist, int64_t *out, int64_t dim) { const int64_t thread_idx = threadIdx.x; const int64_t batch_idx = blockIdx.x; const int64_t start_idx = ptr[batch_idx]; const int64_t end_idx = ptr[batch_idx + 1]; __shared__ scalar_t best_dist[THREADS]; __shared__ int64_t best_dist_idx[THREADS]; if (thread_idx == 0) { out[out_ptr[batch_idx]] = start_idx + start[batch_idx]; } for (int64_t m = out_ptr[batch_idx] + 1; m < out_ptr[batch_idx + 1]; m++) { __syncthreads(); int64_t old = out[m - 1]; scalar_t best = (scalar_t)-1.; int64_t best_idx = 0; for (int64_t n = start_idx + thread_idx; n < end_idx; n += THREADS) { scalar_t tmp, dd = (scalar_t)0.; for (int64_t d = 0; d < dim; d++) { tmp = src[dim * old + d] - src[dim * n + d]; dd += tmp * tmp; } dd = min(dist[n], dd); dist[n] = dd; if (dd > best) { best = dd; best_idx = n; } } best_dist[thread_idx] = best; best_dist_idx[thread_idx] = best_idx; for (int64_t i = 1; i < THREADS; i *= 2) { __syncthreads(); if ((thread_idx + i) < THREADS && best_dist[thread_idx] < best_dist[thread_idx + i]) { best_dist[thread_idx] = best_dist[thread_idx + i]; best_dist_idx[thread_idx] = best_dist_idx[thread_idx + i]; } } __syncthreads(); if (thread_idx == 0) { out[m] = best_dist_idx[0]; } } } torch::Tensor fps_cuda(torch::Tensor src, torch::Tensor ptr, torch::Tensor ratio, bool random_start) { CHECK_CUDA(src); CHECK_CUDA(ptr); CHECK_CUDA(ratio); CHECK_INPUT(ptr.dim() == 1); cudaSetDevice(src.get_device()); src = src.view({src.size(0), -1}).contiguous(); ptr = ptr.contiguous(); auto batch_size = ptr.numel() - 1; auto deg = ptr.narrow(0, 1, batch_size) - ptr.narrow(0, 0, batch_size); auto out_ptr = deg.toType(ratio.scalar_type()) * ratio; out_ptr = out_ptr.ceil().toType(torch::kLong).cumsum(0); out_ptr = torch::cat({torch::zeros({1}, ptr.options()), out_ptr}, 0); torch::Tensor start; if (random_start) { start = torch::rand(batch_size, src.options()); start = (start * deg.toType(ratio.scalar_type())).toType(torch::kLong); } else { start = torch::zeros({batch_size}, ptr.options()); } auto dist = torch::full(src.size(0), 5e4, src.options()); auto out_size = (int64_t *)malloc(sizeof(int64_t)); cudaMemcpy(out_size, out_ptr[-1].data_ptr(), sizeof(int64_t), cudaMemcpyDeviceToHost); auto out = torch::empty({out_size[0]}, out_ptr.options()); auto stream = at::cuda::getCurrentCUDAStream(); auto scalar_type = src.scalar_type(); AT_DISPATCH_FLOATING_TYPES_AND(at::ScalarType::Half, scalar_type, "_", [&] { fps_kernel<<>>( src.data_ptr(), ptr.data_ptr(), out_ptr.data_ptr(), start.data_ptr(), dist.data_ptr(), out.data_ptr(), src.size(1)); }); return out; } pytorch_cluster-1.6.3/csrc/cuda/fps_cuda.h000066400000000000000000000002471451151623700205650ustar00rootroot00000000000000#pragma once #include "../extensions.h" torch::Tensor fps_cuda(torch::Tensor src, torch::Tensor ptr, torch::Tensor ratio, bool random_start); pytorch_cluster-1.6.3/csrc/cuda/graclus_cuda.cu000066400000000000000000000170711451151623700216200ustar00rootroot00000000000000#include "graclus_cuda.h" #include #include "utils.cuh" #define THREADS 1024 #define BLOCKS(N) (N + THREADS - 1) / THREADS #define BLUE_P 0.53406 __device__ bool done_d; __global__ void init_done_kernel() { done_d = true; } __global__ void colorize_kernel(int64_t *out, const float *bernoulli, int64_t numel) { const int64_t thread_idx = blockIdx.x * blockDim.x + threadIdx.x; if (thread_idx < numel) { if (out[thread_idx] < 0) { out[thread_idx] = (int64_t)bernoulli[thread_idx] - 2; done_d = false; } } } bool colorize(torch::Tensor out) { auto stream = at::cuda::getCurrentCUDAStream(); init_done_kernel<<<1, 1, 0, stream>>>(); auto numel = out.size(0); auto props = torch::full(numel, BLUE_P, out.options().dtype(torch::kFloat)); auto bernoulli = props.bernoulli(); colorize_kernel<<>>( out.data_ptr(), bernoulli.data_ptr(), numel); bool done_h; cudaMemcpyFromSymbol(&done_h, done_d, sizeof(done_h), 0, cudaMemcpyDeviceToHost); return done_h; } __global__ void propose_kernel(int64_t *out, int64_t *proposal, const int64_t *rowptr, const int64_t *col, int64_t numel) { const int64_t thread_idx = blockIdx.x * blockDim.x + threadIdx.x; if (thread_idx < numel) { if (out[thread_idx] != -1) return; // Only vist blue nodes. bool has_unmatched_neighbor = false; for (int64_t i = rowptr[thread_idx]; i < rowptr[thread_idx + 1]; i++) { auto v = col[i]; if (out[v] < 0) has_unmatched_neighbor = true; // Unmatched neighbor found. if (out[v] == -2) { proposal[thread_idx] = v; // Propose to first red neighbor. break; } } if (!has_unmatched_neighbor) out[thread_idx] = thread_idx; } } template __global__ void weighted_propose_kernel(int64_t *out, int64_t *proposal, const int64_t *rowptr, const int64_t *col, const scalar_t *weight, int64_t numel) { const int64_t thread_idx = blockIdx.x * blockDim.x + threadIdx.x; if (thread_idx < numel) { if (out[thread_idx] != -1) return; // Only vist blue nodes. bool has_unmatched_neighbor = false; int64_t v_max = -1; scalar_t w_max = 0; for (int64_t i = rowptr[thread_idx]; i < rowptr[thread_idx + 1]; i++) { auto v = col[i]; if (out[v] < 0) has_unmatched_neighbor = true; // Unmatched neighbor found. // Find maximum weighted red neighbor. if (out[v] == -2 && weight[i] >= w_max) { v_max = v; w_max = weight[i]; } } proposal[thread_idx] = v_max; // Propose. if (!has_unmatched_neighbor) out[thread_idx] = thread_idx; } } void propose(torch::Tensor out, torch::Tensor proposal, torch::Tensor rowptr, torch::Tensor col, torch::optional optional_weight) { auto stream = at::cuda::getCurrentCUDAStream(); if (!optional_weight.has_value()) { propose_kernel<<>>( out.data_ptr(), proposal.data_ptr(), rowptr.data_ptr(), col.data_ptr(), out.numel()); } else { auto weight = optional_weight.value(); auto scalar_type = weight.scalar_type(); AT_DISPATCH_ALL_TYPES_AND(at::ScalarType::Half, scalar_type, "_", [&] { weighted_propose_kernel <<>>( out.data_ptr(), proposal.data_ptr(), rowptr.data_ptr(), col.data_ptr(), weight.data_ptr(), out.numel()); }); } } __global__ void respond_kernel(int64_t *out, const int64_t *proposal, const int64_t *rowptr, const int64_t *col, int64_t numel) { const int64_t thread_idx = blockIdx.x * blockDim.x + threadIdx.x; if (thread_idx < numel) { if (out[thread_idx] != -2) return; // Only vist red nodes. bool has_unmatched_neighbor = false; for (int64_t i = rowptr[thread_idx]; i < rowptr[thread_idx + 1]; i++) { auto v = col[i]; if (out[v] < 0) has_unmatched_neighbor = true; // Unmatched neighbor found. if (out[v] == -1 && proposal[v] == thread_idx) { // Match first blue neighbhor v which proposed to u. out[thread_idx] = min(thread_idx, v); out[v] = min(thread_idx, v); break; } } if (!has_unmatched_neighbor) out[thread_idx] = thread_idx; } } template __global__ void weighted_respond_kernel(int64_t *out, const int64_t *proposal, const int64_t *rowptr, const int64_t *col, const scalar_t *weight, int64_t numel) { const int64_t thread_idx = blockIdx.x * blockDim.x + threadIdx.x; if (thread_idx < numel) { if (out[thread_idx] != -2) return; // Only vist red nodes. bool has_unmatched_neighbor = false; int64_t v_max = -1; scalar_t w_max = 0; for (int64_t i = rowptr[thread_idx]; i < rowptr[thread_idx + 1]; i++) { auto v = col[i]; if (out[v] < 0) has_unmatched_neighbor = true; // Unmatched neighbor found. if (out[v] == -1 && proposal[v] == thread_idx && weight[i] >= w_max) { // Find maximum weighted blue neighbhor v which proposed to u. v_max = v; w_max = weight[i]; } } if (v_max >= 0) { out[thread_idx] = min(thread_idx, v_max); // Match neighbors. out[v_max] = min(thread_idx, v_max); } if (!has_unmatched_neighbor) out[thread_idx] = thread_idx; } } void respond(torch::Tensor out, torch::Tensor proposal, torch::Tensor rowptr, torch::Tensor col, torch::optional optional_weight) { auto stream = at::cuda::getCurrentCUDAStream(); if (!optional_weight.has_value()) { respond_kernel<<>>( out.data_ptr(), proposal.data_ptr(), rowptr.data_ptr(), col.data_ptr(), out.numel()); } else { auto weight = optional_weight.value(); auto scalar_type = weight.scalar_type(); AT_DISPATCH_ALL_TYPES_AND(at::ScalarType::Half, scalar_type, "_", [&] { weighted_respond_kernel <<>>( out.data_ptr(), proposal.data_ptr(), rowptr.data_ptr(), col.data_ptr(), weight.data_ptr(), out.numel()); }); } } torch::Tensor graclus_cuda(torch::Tensor rowptr, torch::Tensor col, torch::optional optional_weight) { CHECK_CUDA(rowptr); CHECK_CUDA(col); CHECK_INPUT(rowptr.dim() == 1 && col.dim() == 1); if (optional_weight.has_value()) { CHECK_CUDA(optional_weight.value()); CHECK_INPUT(optional_weight.value().dim() == 1); CHECK_INPUT(optional_weight.value().numel() == col.numel()); } cudaSetDevice(rowptr.get_device()); int64_t num_nodes = rowptr.numel() - 1; auto out = torch::full(num_nodes, -1, rowptr.options()); auto proposal = torch::full(num_nodes, -1, rowptr.options()); while (!colorize(out)) { propose(out, proposal, rowptr, col, optional_weight); respond(out, proposal, rowptr, col, optional_weight); } return out; } pytorch_cluster-1.6.3/csrc/cuda/graclus_cuda.h000066400000000000000000000002721451151623700214330ustar00rootroot00000000000000#pragma once #include "../extensions.h" torch::Tensor graclus_cuda(torch::Tensor rowptr, torch::Tensor col, torch::optional optional_weight); pytorch_cluster-1.6.3/csrc/cuda/grid_cuda.cu000066400000000000000000000043561451151623700211070ustar00rootroot00000000000000#include "grid_cuda.h" #include #include "utils.cuh" #define THREADS 1024 #define BLOCKS(N) (N + THREADS - 1) / THREADS template __global__ void grid_kernel(const scalar_t *pos, const scalar_t *size, const scalar_t *start, const scalar_t *end, int64_t *out, int64_t D, int64_t numel) { const int64_t thread_idx = blockIdx.x * blockDim.x + threadIdx.x; if (thread_idx < numel) { int64_t c = 0, k = 1; for (int64_t d = 0; d < D; d++) { scalar_t p = pos[thread_idx * D + d] - start[d]; c += (int64_t)(p / size[d]) * k; k *= (int64_t)((end[d] - start[d]) / size[d]) + 1; } out[thread_idx] = c; } } torch::Tensor grid_cuda(torch::Tensor pos, torch::Tensor size, torch::optional optional_start, torch::optional optional_end) { CHECK_CUDA(pos); CHECK_CUDA(size); cudaSetDevice(pos.get_device()); if (optional_start.has_value()) CHECK_CUDA(optional_start.value()); if (optional_start.has_value()) CHECK_CUDA(optional_start.value()); pos = pos.view({pos.size(0), -1}).contiguous(); size = size.contiguous(); CHECK_INPUT(size.numel() == pos.size(1)); if (!optional_start.has_value()) optional_start = std::get<0>(pos.min(0)); else { optional_start = optional_start.value().contiguous(); CHECK_INPUT(optional_start.value().numel() == pos.size(1)); } if (!optional_end.has_value()) optional_end = std::get<0>(pos.max(0)); else { optional_start = optional_start.value().contiguous(); CHECK_INPUT(optional_start.value().numel() == pos.size(1)); } auto start = optional_start.value(); auto end = optional_end.value(); auto out = torch::empty({pos.size(0)}, pos.options().dtype(torch::kLong)); auto stream = at::cuda::getCurrentCUDAStream(); AT_DISPATCH_ALL_TYPES_AND(at::ScalarType::Half, pos.scalar_type(), "_", [&] { grid_kernel<<>>( pos.data_ptr(), size.data_ptr(), start.data_ptr(), end.data_ptr(), out.data_ptr(), pos.size(1), out.numel()); }); return out; } pytorch_cluster-1.6.3/csrc/cuda/grid_cuda.h000066400000000000000000000003661451151623700207240ustar00rootroot00000000000000#pragma once #include "../extensions.h" torch::Tensor grid_cuda(torch::Tensor pos, torch::Tensor size, torch::optional optional_start, torch::optional optional_end); pytorch_cluster-1.6.3/csrc/cuda/knn_cuda.cu000066400000000000000000000103171451151623700207420ustar00rootroot00000000000000#include "radius_cuda.h" #include #include "utils.cuh" #define THREADS 256 template struct Cosine { static inline __device__ scalar_t dot(const scalar_t *a, const scalar_t *b, int64_t n_a, int64_t n_b, int64_t size) { scalar_t result = 0; for (int64_t i = 0; i < size; i++) { result += a[n_a * size + i] * b[n_b * size + i]; } return result; } static inline __device__ scalar_t norm(const scalar_t *a, int64_t n_a, int64_t size) { scalar_t result = 0; for (int64_t i = 0; i < size; i++) { result += a[n_a * size + i] * a[n_a * size + i]; } return sqrt(result); } }; template __global__ void knn_kernel(const scalar_t *__restrict__ x, const scalar_t *__restrict__ y, const int64_t *__restrict__ ptr_x, const int64_t *__restrict__ ptr_y, int64_t *__restrict__ row, int64_t *__restrict__ col, const int64_t k, const int64_t n, const int64_t m, const int64_t dim, const int64_t num_examples, const bool cosine) { const int64_t n_y = blockIdx.x * blockDim.x + threadIdx.x; if (n_y >= m) return; const int64_t example_idx = get_example_idx(n_y, ptr_y, num_examples); scalar_t best_dist[100]; int64_t best_idx[100]; for (int e = 0; e < k; e++) { best_dist[e] = 1e10; best_idx[e] = -1; } for (int64_t n_x = ptr_x[example_idx]; n_x < ptr_x[example_idx + 1]; n_x++) { scalar_t tmp_dist = 0; if (cosine) { tmp_dist = Cosine::dot(x, y, n_x, n_y, dim) / (Cosine::norm(x, n_x, dim) * Cosine::norm(y, n_y, dim)); tmp_dist = 1. - tmp_dist; } else { for (int64_t d = 0; d < dim; d++) { tmp_dist += (x[n_x * dim + d] - y[n_y * dim + d]) * (x[n_x * dim + d] - y[n_y * dim + d]); } } for (int64_t e1 = 0; e1 < k; e1++) { if (best_dist[e1] > tmp_dist) { for (int64_t e2 = k - 1; e2 > e1; e2--) { best_dist[e2] = best_dist[e2 - 1]; best_idx[e2] = best_idx[e2 - 1]; } best_dist[e1] = tmp_dist; best_idx[e1] = n_x; break; } } } for (int64_t e = 0; e < k; e++) { row[n_y * k + e] = n_y; col[n_y * k + e] = best_idx[e]; } } torch::Tensor knn_cuda(const torch::Tensor x, const torch::Tensor y, torch::optional ptr_x, torch::optional ptr_y, const int64_t k, const bool cosine) { CHECK_CUDA(x); CHECK_CONTIGUOUS(x); CHECK_INPUT(x.dim() == 2); CHECK_CUDA(y); CHECK_CONTIGUOUS(y); CHECK_INPUT(y.dim() == 2); CHECK_INPUT(x.size(1) == y.size(1)); AT_ASSERTM(k <= 100, "`k` needs to smaller than or equal to 100"); if (ptr_x.has_value()) { CHECK_CUDA(ptr_x.value()); CHECK_INPUT(ptr_x.value().dim() == 1); } else ptr_x = torch::arange(0, x.size(0) + 1, x.size(0), x.options().dtype(torch::kLong)); if (ptr_y.has_value()) { CHECK_CUDA(ptr_y.value()); CHECK_INPUT(ptr_y.value().dim() == 1); } else ptr_y = torch::arange(0, y.size(0) + 1, y.size(0), y.options().dtype(torch::kLong)); CHECK_INPUT(ptr_x.value().numel() == ptr_y.value().numel()); cudaSetDevice(x.get_device()); auto row = torch::empty({y.size(0) * k}, ptr_y.value().options()); auto col = torch::full(y.size(0) * k, -1, ptr_y.value().options()); dim3 BLOCKS((y.size(0) + THREADS - 1) / THREADS); auto stream = at::cuda::getCurrentCUDAStream(); auto scalar_type = x.scalar_type(); AT_DISPATCH_FLOATING_TYPES_AND(at::ScalarType::Half, scalar_type, "_", [&] { knn_kernel<<>>( x.data_ptr(), y.data_ptr(), ptr_x.value().data_ptr(), ptr_y.value().data_ptr(), row.data_ptr(), col.data_ptr(), k, x.size(0), y.size(0), x.size(1), ptr_x.value().numel() - 1, cosine); }); auto mask = col != -1; return torch::stack({row.masked_select(mask), col.masked_select(mask)}, 0); } pytorch_cluster-1.6.3/csrc/cuda/knn_cuda.h000066400000000000000000000004151451151623700205600ustar00rootroot00000000000000#pragma once #include "../extensions.h" torch::Tensor knn_cuda(torch::Tensor x, torch::Tensor y, torch::optional ptr_x, torch::optional ptr_y, int64_t k, bool cosine); pytorch_cluster-1.6.3/csrc/cuda/nearest_cuda.cu000066400000000000000000000047441451151623700216240ustar00rootroot00000000000000#include "nearest_cuda.h" #include #include "utils.cuh" #define THREADS 1024 template __global__ void nearest_kernel(const scalar_t *x, const scalar_t *y, const int64_t *ptr_x, const int64_t *ptr_y, int64_t *out, int64_t batch_size, int64_t dim) { const int64_t thread_idx = threadIdx.x; const int64_t n_x = blockIdx.x; int64_t batch_idx; for (int64_t b = 0; b < batch_size; b++) { if (n_x >= ptr_x[b] && n_x < ptr_x[b + 1]) { batch_idx = b; break; } } const int64_t y_start_idx = ptr_y[batch_idx]; const int64_t y_end_idx = ptr_y[batch_idx + 1]; __shared__ scalar_t best_dist[THREADS]; __shared__ int64_t best_dist_idx[THREADS]; scalar_t best = 1e38; int64_t best_idx = 0; for (int64_t n_y = y_start_idx + thread_idx; n_y < y_end_idx; n_y += THREADS) { scalar_t dist = 0; for (int64_t d = 0; d < dim; d++) { dist += (x[n_x * dim + d] - y[n_y * dim + d]) * (x[n_x * dim + d] - y[n_y * dim + d]); } if (dist < best) { best = dist; best_idx = n_y; } } best_dist[thread_idx] = best; best_dist_idx[thread_idx] = best_idx; for (int64_t u = 0; (1 << u) < THREADS; u++) { __syncthreads(); if (thread_idx < (THREADS >> (u + 1))) { int64_t idx_1 = (thread_idx * 2) << u; int64_t idx_2 = (thread_idx * 2 + 1) << u; if (best_dist[idx_1] > best_dist[idx_2]) { best_dist[idx_1] = best_dist[idx_2]; best_dist_idx[idx_1] = best_dist_idx[idx_2]; } } } __syncthreads(); if (thread_idx == 0) { out[n_x] = best_dist_idx[0]; } } torch::Tensor nearest_cuda(torch::Tensor x, torch::Tensor y, torch::Tensor ptr_x, torch::Tensor ptr_y) { CHECK_CUDA(x); CHECK_CUDA(y); CHECK_CUDA(ptr_x); CHECK_CUDA(ptr_y); cudaSetDevice(x.get_device()); x = x.view({x.size(0), -1}).contiguous(); y = y.view({y.size(0), -1}).contiguous(); auto out = torch::empty({x.size(0)}, ptr_x.options()); auto stream = at::cuda::getCurrentCUDAStream(); auto scalar_type = x.scalar_type(); AT_DISPATCH_FLOATING_TYPES_AND(at::ScalarType::Half, scalar_type, "_", [&] { nearest_kernel<<>>( x.data_ptr(), y.data_ptr(), ptr_x.data_ptr(), ptr_y.data_ptr(), out.data_ptr(), ptr_x.size(0) - 1, x.size(1)); }); return out; } pytorch_cluster-1.6.3/csrc/cuda/nearest_cuda.h000066400000000000000000000002551451151623700214350ustar00rootroot00000000000000#pragma once #include "../extensions.h" torch::Tensor nearest_cuda(torch::Tensor x, torch::Tensor y, torch::Tensor ptr_x, torch::Tensor ptr_y); pytorch_cluster-1.6.3/csrc/cuda/radius_cuda.cu000066400000000000000000000060351451151623700214450ustar00rootroot00000000000000#include "radius_cuda.h" #include #include "utils.cuh" #define THREADS 256 template __global__ void radius_kernel(const scalar_t *__restrict__ x, const scalar_t *__restrict__ y, const int64_t *__restrict__ ptr_x, const int64_t *__restrict__ ptr_y, int64_t *__restrict__ row, int64_t *__restrict__ col, const scalar_t r, const int64_t n, const int64_t m, const int64_t dim, const int64_t num_examples, const int64_t max_num_neighbors) { const int64_t n_y = blockIdx.x * blockDim.x + threadIdx.x; if (n_y >= m) return; int64_t count = 0; const int64_t example_idx = get_example_idx(n_y, ptr_y, num_examples); for (int64_t n_x = ptr_x[example_idx]; n_x < ptr_x[example_idx + 1]; n_x++) { scalar_t dist = 0; for (int64_t d = 0; d < dim; d++) { dist += (x[n_x * dim + d] - y[n_y * dim + d]) * (x[n_x * dim + d] - y[n_y * dim + d]); } if (dist < r) { row[n_y * max_num_neighbors + count] = n_y; col[n_y * max_num_neighbors + count] = n_x; count++; } if (count >= max_num_neighbors) break; } } torch::Tensor radius_cuda(const torch::Tensor x, const torch::Tensor y, torch::optional ptr_x, torch::optional ptr_y, const double r, const int64_t max_num_neighbors) { CHECK_CUDA(x); CHECK_CONTIGUOUS(x); CHECK_INPUT(x.dim() == 2); CHECK_CUDA(y); CHECK_CONTIGUOUS(y); CHECK_INPUT(y.dim() == 2); CHECK_INPUT(x.size(1) == y.size(1)); cudaSetDevice(x.get_device()); if (ptr_x.has_value()) { CHECK_CUDA(ptr_x.value()); CHECK_INPUT(ptr_x.value().dim() == 1); } else ptr_x = torch::arange(0, x.size(0) + 1, x.size(0), x.options().dtype(torch::kLong)); if (ptr_y.has_value()) { CHECK_CUDA(ptr_y.value()); CHECK_INPUT(ptr_y.value().dim() == 1); } else ptr_y = torch::arange(0, y.size(0) + 1, y.size(0), y.options().dtype(torch::kLong)); CHECK_INPUT(ptr_x.value().numel() == ptr_y.value().numel()); cudaSetDevice(x.get_device()); auto row = torch::full(y.size(0) * max_num_neighbors, -1, ptr_y.value().options()); auto col = torch::full(y.size(0) * max_num_neighbors, -1, ptr_y.value().options()); dim3 BLOCKS((y.size(0) + THREADS - 1) / THREADS); auto stream = at::cuda::getCurrentCUDAStream(); auto scalar_type = x.scalar_type(); AT_DISPATCH_FLOATING_TYPES_AND(at::ScalarType::Half, scalar_type, "_", [&] { radius_kernel<<>>( x.data_ptr(), y.data_ptr(), ptr_x.value().data_ptr(), ptr_y.value().data_ptr(), row.data_ptr(), col.data_ptr(), r * r, x.size(0), y.size(0), x.size(1), ptr_x.value().numel() - 1, max_num_neighbors); }); auto mask = row != -1; return torch::stack({row.masked_select(mask), col.masked_select(mask)}, 0); } pytorch_cluster-1.6.3/csrc/cuda/radius_cuda.h000066400000000000000000000004461451151623700212650ustar00rootroot00000000000000#pragma once #include "../extensions.h" torch::Tensor radius_cuda(torch::Tensor x, torch::Tensor y, torch::optional ptr_x, torch::optional ptr_y, double r, int64_t max_num_neighbors); pytorch_cluster-1.6.3/csrc/cuda/rw_cuda.cu000066400000000000000000000112211451151623700205770ustar00rootroot00000000000000#include "rw_cuda.h" #include #include #include #include "utils.cuh" #define THREADS 1024 #define BLOCKS(N) (N + THREADS - 1) / THREADS __global__ void uniform_sampling_kernel(const int64_t *rowptr, const int64_t *col, const int64_t *start, const float *rand, int64_t *n_out, int64_t *e_out, const int64_t walk_length, const int64_t numel) { const int64_t thread_idx = blockIdx.x * blockDim.x + threadIdx.x; if (thread_idx < numel) { int64_t n_cur = start[thread_idx], e_cur, row_start, row_end, rnd; n_out[thread_idx] = n_cur; for (int64_t l = 0; l < walk_length; l++) { row_start = rowptr[n_cur], row_end = rowptr[n_cur + 1]; if (row_end - row_start == 0) { e_cur = -1; } else { rnd = int64_t(rand[l * numel + thread_idx] * (row_end - row_start)); e_cur = row_start + rnd; n_cur = col[e_cur]; } n_out[(l + 1) * numel + thread_idx] = n_cur; e_out[l * numel + thread_idx] = e_cur; } } } __global__ void rejection_sampling_kernel(unsigned int seed, const int64_t *rowptr, const int64_t *col, const int64_t *start, int64_t *n_out, int64_t *e_out, const int64_t walk_length, const int64_t numel, const double p, const double q) { curandState_t state; curand_init(seed, 0, 0, &state); double max_prob = fmax(fmax(1. / p, 1.), 1. / q); double prob_0 = 1. / p / max_prob; double prob_1 = 1. / max_prob; double prob_2 = 1. / q / max_prob; const int64_t thread_idx = blockIdx.x * blockDim.x + threadIdx.x; if (thread_idx < numel) { int64_t t = start[thread_idx], v, x, e_cur, row_start, row_end; n_out[thread_idx] = t; row_start = rowptr[t], row_end = rowptr[t + 1]; if (row_end - row_start == 0) { e_cur = -1; v = t; } else { e_cur = row_start + (curand(&state) % (row_end - row_start)); v = col[e_cur]; } n_out[numel + thread_idx] = v; e_out[thread_idx] = e_cur; for (int64_t l = 1; l < walk_length; l++) { row_start = rowptr[v], row_end = rowptr[v + 1]; if (row_end - row_start == 0) { e_cur = -1; x = v; } else if (row_end - row_start == 1) { e_cur = row_start; x = col[e_cur]; } else { while (true) { e_cur = row_start + (curand(&state) % (row_end - row_start)); x = col[e_cur]; double r = curand_uniform(&state); // (0, 1] if (x == t && r < prob_0) break; bool is_neighbor = false; row_start = rowptr[x], row_end = rowptr[x + 1]; for (int64_t i = row_start; i < row_end; i++) { if (col[i] == t) { is_neighbor = true; break; } } if (is_neighbor && r < prob_1) break; else if (r < prob_2) break; } } n_out[(l + 1) * numel + thread_idx] = x; e_out[l * numel + thread_idx] = e_cur; t = v; v = x; } } } std::tuple random_walk_cuda(torch::Tensor rowptr, torch::Tensor col, torch::Tensor start, int64_t walk_length, double p, double q) { CHECK_CUDA(rowptr); CHECK_CUDA(col); CHECK_CUDA(start); cudaSetDevice(rowptr.get_device()); CHECK_INPUT(rowptr.dim() == 1); CHECK_INPUT(col.dim() == 1); CHECK_INPUT(start.dim() == 1); auto n_out = torch::empty({walk_length + 1, start.size(0)}, start.options()); auto e_out = torch::empty({walk_length, start.size(0)}, start.options()); auto stream = at::cuda::getCurrentCUDAStream(); if (p == 1. && q == 1.) { auto rand = torch::rand({start.size(0), walk_length}, start.options().dtype(torch::kFloat)); uniform_sampling_kernel<<>>( rowptr.data_ptr(), col.data_ptr(), start.data_ptr(), rand.data_ptr(), n_out.data_ptr(), e_out.data_ptr(), walk_length, start.numel()); } else { rejection_sampling_kernel<<>>( time(NULL), rowptr.data_ptr(), col.data_ptr(), start.data_ptr(), n_out.data_ptr(), e_out.data_ptr(), walk_length, start.numel(), p, q); } return std::make_tuple(n_out.t().contiguous(), e_out.t().contiguous()); } pytorch_cluster-1.6.3/csrc/cuda/rw_cuda.h000066400000000000000000000003351451151623700204230ustar00rootroot00000000000000#pragma once #include "../extensions.h" std::tuple random_walk_cuda(torch::Tensor rowptr, torch::Tensor col, torch::Tensor start, int64_t walk_length, double p, double q); pytorch_cluster-1.6.3/csrc/cuda/utils.cuh000066400000000000000000000013161451151623700204670ustar00rootroot00000000000000#pragma once #include "../extensions.h" #define CHECK_CUDA(x) \ AT_ASSERTM(x.device().is_cuda(), #x " must be CUDA tensor") #define CHECK_INPUT(x) AT_ASSERTM(x, "Input mismatch") #define CHECK_CONTIGUOUS(x) \ AT_ASSERTM(x.is_contiguous(), #x " must be contiguous") __forceinline__ __device__ int64_t get_example_idx(int64_t idx, const int64_t *ptr, const int64_t num_examples) { for (int64_t i = 0; i < num_examples; i++) { if (ptr[i + 1] > idx) return i; } return num_examples - 1; } pytorch_cluster-1.6.3/csrc/extensions.h000066400000000000000000000000551451151623700202610ustar00rootroot00000000000000#include "macros.h" #include pytorch_cluster-1.6.3/csrc/fps.cpp000066400000000000000000000014171451151623700172100ustar00rootroot00000000000000#ifdef WITH_PYTHON #include #endif #include #include "cpu/fps_cpu.h" #ifdef WITH_CUDA #include "cuda/fps_cuda.h" #endif #ifdef _WIN32 #ifdef WITH_PYTHON #ifdef WITH_CUDA PyMODINIT_FUNC PyInit__fps_cuda(void) { return NULL; } #else PyMODINIT_FUNC PyInit__fps_cpu(void) { return NULL; } #endif #endif #endif CLUSTER_API torch::Tensor fps(torch::Tensor src, torch::Tensor ptr, torch::Tensor ratio, bool random_start) { if (src.device().is_cuda()) { #ifdef WITH_CUDA return fps_cuda(src, ptr, ratio, random_start); #else AT_ERROR("Not compiled with CUDA support"); #endif } else { return fps_cpu(src, ptr, ratio, random_start); } } static auto registry = torch::RegisterOperators().op("torch_cluster::fps", &fps); pytorch_cluster-1.6.3/csrc/graclus.cpp000066400000000000000000000015031451151623700200540ustar00rootroot00000000000000#ifdef WITH_PYTHON #include #endif #include #include "cpu/graclus_cpu.h" #ifdef WITH_CUDA #include "cuda/graclus_cuda.h" #endif #ifdef _WIN32 #ifdef WITH_PYTHON #ifdef WITH_CUDA PyMODINIT_FUNC PyInit__graclus_cuda(void) { return NULL; } #else PyMODINIT_FUNC PyInit__graclus_cpu(void) { return NULL; } #endif #endif #endif CLUSTER_API torch::Tensor graclus(torch::Tensor rowptr, torch::Tensor col, torch::optional optional_weight) { if (rowptr.device().is_cuda()) { #ifdef WITH_CUDA return graclus_cuda(rowptr, col, optional_weight); #else AT_ERROR("Not compiled with CUDA support"); #endif } else { return graclus_cpu(rowptr, col, optional_weight); } } static auto registry = torch::RegisterOperators().op("torch_cluster::graclus", &graclus); pytorch_cluster-1.6.3/csrc/grid.cpp000066400000000000000000000015651451151623700173510ustar00rootroot00000000000000#ifdef WITH_PYTHON #include #endif #include #include "cpu/grid_cpu.h" #ifdef WITH_CUDA #include "cuda/grid_cuda.h" #endif #ifdef _WIN32 #ifdef WITH_PYTHON #ifdef WITH_CUDA PyMODINIT_FUNC PyInit__grid_cuda(void) { return NULL; } #else PyMODINIT_FUNC PyInit__grid_cpu(void) { return NULL; } #endif #endif #endif CLUSTER_API torch::Tensor grid(torch::Tensor pos, torch::Tensor size, torch::optional optional_start, torch::optional optional_end) { if (pos.device().is_cuda()) { #ifdef WITH_CUDA return grid_cuda(pos, size, optional_start, optional_end); #else AT_ERROR("Not compiled with CUDA support"); #endif } else { return grid_cpu(pos, size, optional_start, optional_end); } } static auto registry = torch::RegisterOperators().op("torch_cluster::grid", &grid); pytorch_cluster-1.6.3/csrc/knn.cpp000066400000000000000000000017151451151623700172070ustar00rootroot00000000000000#ifdef WITH_PYTHON #include #endif #include #include "cpu/knn_cpu.h" #ifdef WITH_CUDA #include "cuda/knn_cuda.h" #endif #ifdef _WIN32 #ifdef WITH_PYTHON #ifdef WITH_CUDA PyMODINIT_FUNC PyInit__knn_cuda(void) { return NULL; } #else PyMODINIT_FUNC PyInit__knn_cpu(void) { return NULL; } #endif #endif #endif CLUSTER_API torch::Tensor knn(torch::Tensor x, torch::Tensor y, torch::optional ptr_x, torch::optional ptr_y, int64_t k, bool cosine, int64_t num_workers) { if (x.device().is_cuda()) { #ifdef WITH_CUDA return knn_cuda(x, y, ptr_x, ptr_y, k, cosine); #else AT_ERROR("Not compiled with CUDA support"); #endif } else { if (cosine) AT_ERROR("`cosine` argument not supported on CPU"); return knn_cpu(x, y, ptr_x, ptr_y, k, num_workers); } } static auto registry = torch::RegisterOperators().op("torch_cluster::knn", &knn); pytorch_cluster-1.6.3/csrc/macros.h000066400000000000000000000006751451151623700173560ustar00rootroot00000000000000#pragma once #ifdef _WIN32 #if defined(torchcluster_EXPORTS) #define CLUSTER_API __declspec(dllexport) #else #define CLUSTER_API __declspec(dllimport) #endif #else #define CLUSTER_API #endif #if (defined __cpp_inline_variables) || __cplusplus >= 201703L #define CLUSTER_INLINE_VARIABLE inline #else #ifdef _MSC_VER #define CLUSTER_INLINE_VARIABLE __declspec(selectany) #else #define CLUSTER_INLINE_VARIABLE __attribute__((weak)) #endif #endif pytorch_cluster-1.6.3/csrc/nearest.cpp000066400000000000000000000014261451151623700200610ustar00rootroot00000000000000#ifdef WITH_PYTHON #include #endif #include #include "extensions.h" #ifdef WITH_CUDA #include "cuda/nearest_cuda.h" #endif #ifdef _WIN32 #ifdef WITH_PYTHON #ifdef WITH_CUDA PyMODINIT_FUNC PyInit__nearest_cuda(void) { return NULL; } #else PyMODINIT_FUNC PyInit__nearest_cpu(void) { return NULL; } #endif #endif #endif CLUSTER_API torch::Tensor nearest(torch::Tensor x, torch::Tensor y, torch::Tensor ptr_x, torch::Tensor ptr_y) { if (x.device().is_cuda()) { #ifdef WITH_CUDA return nearest_cuda(x, y, ptr_x, ptr_y); #else AT_ERROR("Not compiled with CUDA support"); #endif } else { AT_ERROR("No CPU version supported"); } } static auto registry = torch::RegisterOperators().op("torch_cluster::nearest", &nearest); pytorch_cluster-1.6.3/csrc/radius.cpp000066400000000000000000000017221451151623700177060ustar00rootroot00000000000000#ifdef WITH_PYTHON #include #endif #include #include "cpu/radius_cpu.h" #ifdef WITH_CUDA #include "cuda/radius_cuda.h" #endif #ifdef _WIN32 #ifdef WITH_PYTHON #ifdef WITH_CUDA PyMODINIT_FUNC PyInit__radius_cuda(void) { return NULL; } #else PyMODINIT_FUNC PyInit__radius_cpu(void) { return NULL; } #endif #endif #endif CLUSTER_API torch::Tensor radius(torch::Tensor x, torch::Tensor y, torch::optional ptr_x, torch::optional ptr_y, double r, int64_t max_num_neighbors, int64_t num_workers) { if (x.device().is_cuda()) { #ifdef WITH_CUDA return radius_cuda(x, y, ptr_x, ptr_y, r, max_num_neighbors); #else AT_ERROR("Not compiled with CUDA support"); #endif } else { return radius_cpu(x, y, ptr_x, ptr_y, r, max_num_neighbors, num_workers); } } static auto registry = torch::RegisterOperators().op("torch_cluster::radius", &radius); pytorch_cluster-1.6.3/csrc/rw.cpp000066400000000000000000000015641451151623700170530ustar00rootroot00000000000000#ifdef WITH_PYTHON #include #endif #include #include "cpu/rw_cpu.h" #ifdef WITH_CUDA #include "cuda/rw_cuda.h" #endif #ifdef _WIN32 #ifdef WITH_PYTHON #ifdef WITH_CUDA PyMODINIT_FUNC PyInit__rw_cuda(void) { return NULL; } #else PyMODINIT_FUNC PyInit__rw_cpu(void) { return NULL; } #endif #endif #endif CLUSTER_API std::tuple random_walk(torch::Tensor rowptr, torch::Tensor col, torch::Tensor start, int64_t walk_length, double p, double q) { if (rowptr.device().is_cuda()) { #ifdef WITH_CUDA return random_walk_cuda(rowptr, col, start, walk_length, p, q); #else AT_ERROR("Not compiled with CUDA support"); #endif } else { return random_walk_cpu(rowptr, col, start, walk_length, p, q); } } static auto registry = torch::RegisterOperators().op("torch_cluster::random_walk", &random_walk); pytorch_cluster-1.6.3/csrc/sampler.cpp000066400000000000000000000014351451151623700200630ustar00rootroot00000000000000#ifdef WITH_PYTHON #include #endif #include #include "cpu/sampler_cpu.h" #ifdef _WIN32 #ifdef WITH_PYTHON #ifdef WITH_CUDA PyMODINIT_FUNC PyInit__sampler_cuda(void) { return NULL; } #else PyMODINIT_FUNC PyInit__sampler_cpu(void) { return NULL; } #endif #endif #endif CLUSTER_API torch::Tensor neighbor_sampler(torch::Tensor start, torch::Tensor rowptr, int64_t count, double factor) { if (rowptr.device().is_cuda()) { #ifdef WITH_CUDA AT_ERROR("No CUDA version supported"); #else AT_ERROR("Not compiled with CUDA support"); #endif } else { return neighbor_sampler_cpu(start, rowptr, count, factor); } } static auto registry = torch::RegisterOperators().op( "torch_cluster::neighbor_sampler", &neighbor_sampler); pytorch_cluster-1.6.3/csrc/version.cpp000066400000000000000000000013551451151623700201060ustar00rootroot00000000000000#ifdef WITH_PYTHON #include #endif #include "cluster.h" #include "macros.h" #include #ifdef WITH_CUDA #ifdef USE_ROCM #include #else #include #endif #endif #ifdef _WIN32 #ifdef WITH_PYTHON #ifdef WITH_CUDA PyMODINIT_FUNC PyInit__version_cuda(void) { return NULL; } #else PyMODINIT_FUNC PyInit__version_cpu(void) { return NULL; } #endif #endif #endif namespace cluster { CLUSTER_API int64_t cuda_version() noexcept { #ifdef WITH_CUDA #ifdef USE_ROCM return HIP_VERSION; #else return CUDA_VERSION; #endif #else return -1; #endif } } // namespace cluster static auto registry = torch::RegisterOperators().op( "torch_cluster::cuda_version", [] { return cluster::cuda_version(); }); pytorch_cluster-1.6.3/setup.cfg000066400000000000000000000007761451151623700166120ustar00rootroot00000000000000[metadata] long_description=file: README.md long_description_content_type=text/markdown classifiers = Development Status :: 5 - Production/Stable License :: OSI Approved :: MIT License Programming Language :: Python Programming Language :: Python :: 3.8 Programming Language :: Python :: 3.9 Programming Language :: Python :: 3.10 Programming Language :: Python :: 3.11 Programming Language :: Python :: 3 :: Only [aliases] test = pytest [tool:pytest] addopts = --capture=no pytorch_cluster-1.6.3/setup.py000066400000000000000000000111671451151623700164770ustar00rootroot00000000000000import glob import os import os.path as osp import platform import sys from itertools import product import torch from setuptools import find_packages, setup from torch.__config__ import parallel_info from torch.utils.cpp_extension import (CUDA_HOME, BuildExtension, CppExtension, CUDAExtension) __version__ = '1.6.3' URL = 'https://github.com/rusty1s/pytorch_cluster' WITH_CUDA = False if torch.cuda.is_available(): WITH_CUDA = CUDA_HOME is not None or torch.version.hip suffices = ['cpu', 'cuda'] if WITH_CUDA else ['cpu'] if os.getenv('FORCE_CUDA', '0') == '1': suffices = ['cuda', 'cpu'] if os.getenv('FORCE_ONLY_CUDA', '0') == '1': suffices = ['cuda'] if os.getenv('FORCE_ONLY_CPU', '0') == '1': suffices = ['cpu'] BUILD_DOCS = os.getenv('BUILD_DOCS', '0') == '1' def get_extensions(): extensions = [] extensions_dir = osp.join('csrc') main_files = glob.glob(osp.join(extensions_dir, '*.cpp')) # remove generated 'hip' files, in case of rebuilds main_files = [path for path in main_files if 'hip' not in path] for main, suffix in product(main_files, suffices): define_macros = [('WITH_PYTHON', None)] undef_macros = [] if sys.platform == 'win32': define_macros += [('torchcluster_EXPORTS', None)] extra_compile_args = {'cxx': ['-O2']} if not os.name == 'nt': # Not on Windows: extra_compile_args['cxx'] += ['-Wno-sign-compare'] extra_link_args = ['-s'] info = parallel_info() if ('backend: OpenMP' in info and 'OpenMP not found' not in info and sys.platform != 'darwin'): extra_compile_args['cxx'] += ['-DAT_PARALLEL_OPENMP'] if sys.platform == 'win32': extra_compile_args['cxx'] += ['/openmp'] else: extra_compile_args['cxx'] += ['-fopenmp'] else: print('Compiling without OpenMP...') # Compile for mac arm64 if (sys.platform == 'darwin' and platform.machine() == 'arm64'): extra_compile_args['cxx'] += ['-arch', 'arm64'] extra_link_args += ['-arch', 'arm64'] if suffix == 'cuda': define_macros += [('WITH_CUDA', None)] nvcc_flags = os.getenv('NVCC_FLAGS', '') nvcc_flags = [] if nvcc_flags == '' else nvcc_flags.split(' ') nvcc_flags += ['-O2'] extra_compile_args['nvcc'] = nvcc_flags if torch.version.hip: # USE_ROCM was added to later versions of PyTorch # Define here to support older PyTorch versions as well: define_macros += [('USE_ROCM', None)] undef_macros += ['__HIP_NO_HALF_CONVERSIONS__'] else: nvcc_flags += ['--expt-relaxed-constexpr'] name = main.split(os.sep)[-1][:-4] sources = [main] path = osp.join(extensions_dir, 'cpu', f'{name}_cpu.cpp') if osp.exists(path): sources += [path] path = osp.join(extensions_dir, 'cuda', f'{name}_cuda.cu') if suffix == 'cuda' and osp.exists(path): sources += [path] Extension = CppExtension if suffix == 'cpu' else CUDAExtension extension = Extension( f'torch_cluster._{name}_{suffix}', sources, include_dirs=[extensions_dir], define_macros=define_macros, undef_macros=undef_macros, extra_compile_args=extra_compile_args, extra_link_args=extra_link_args, ) extensions += [extension] return extensions install_requires = [ 'scipy', ] test_requires = [ 'pytest', 'pytest-cov', ] # work-around hipify abs paths include_package_data = True if torch.cuda.is_available() and torch.version.hip: include_package_data = False setup( name='torch_cluster', version=__version__, description=('PyTorch Extension Library of Optimized Graph Cluster ' 'Algorithms'), author='Matthias Fey', author_email='matthias.fey@tu-dortmund.de', url=URL, download_url=f'{URL}/archive/{__version__}.tar.gz', keywords=[ 'pytorch', 'geometric-deep-learning', 'graph-neural-networks', 'cluster-algorithms', ], python_requires='>=3.8', install_requires=install_requires, extras_require={ 'test': test_requires, }, ext_modules=get_extensions() if not BUILD_DOCS else [], cmdclass={ 'build_ext': BuildExtension.with_options(no_python_abi_suffix=True, use_ninja=False) }, packages=find_packages(), include_package_data=include_package_data, ) pytorch_cluster-1.6.3/test/000077500000000000000000000000001451151623700157365ustar00rootroot00000000000000pytorch_cluster-1.6.3/test/test_fps.py000066400000000000000000000044731451151623700201470ustar00rootroot00000000000000from itertools import product import pytest import torch from torch import Tensor from torch_cluster import fps from torch_cluster.testing import devices, grad_dtypes, tensor @torch.jit.script def fps2(x: Tensor, ratio: Tensor) -> Tensor: return fps(x, None, ratio, False) @pytest.mark.parametrize('dtype,device', product(grad_dtypes, devices)) def test_fps(dtype, device): x = tensor([ [-1, -1], [-1, +1], [+1, +1], [+1, -1], [-2, -2], [-2, +2], [+2, +2], [+2, -2], ], dtype, device) batch = tensor([0, 0, 0, 0, 1, 1, 1, 1], torch.long, device) ptr_list = [0, 4, 8] ptr = torch.tensor(ptr_list, device=device) out = fps(x, batch, random_start=False) assert out.tolist() == [0, 2, 4, 6] out = fps(x, batch, ratio=0.5, random_start=False) assert out.tolist() == [0, 2, 4, 6] ratio = torch.tensor(0.5, device=device) out = fps(x, batch, ratio=ratio, random_start=False) assert out.tolist() == [0, 2, 4, 6] out = fps(x, ptr=ptr_list, ratio=0.5, random_start=False) assert out.tolist() == [0, 2, 4, 6] out = fps(x, ptr=ptr, ratio=0.5, random_start=False) assert out.tolist() == [0, 2, 4, 6] ratio = torch.tensor([0.5, 0.5], device=device) out = fps(x, batch, ratio=ratio, random_start=False) assert out.tolist() == [0, 2, 4, 6] out = fps(x, random_start=False) assert out.sort()[0].tolist() == [0, 5, 6, 7] out = fps(x, ratio=0.5, random_start=False) assert out.sort()[0].tolist() == [0, 5, 6, 7] out = fps(x, ratio=torch.tensor(0.5, device=device), random_start=False) assert out.sort()[0].tolist() == [0, 5, 6, 7] out = fps(x, ratio=torch.tensor([0.5], device=device), random_start=False) assert out.sort()[0].tolist() == [0, 5, 6, 7] out = fps2(x, torch.tensor([0.5], device=device)) assert out.sort()[0].tolist() == [0, 5, 6, 7] @pytest.mark.parametrize('device', devices) def test_random_fps(device): N = 1024 for _ in range(5): pos = torch.randn((2 * N, 3), device=device) batch_1 = torch.zeros(N, dtype=torch.long, device=device) batch_2 = torch.ones(N, dtype=torch.long, device=device) batch = torch.cat([batch_1, batch_2]) idx = fps(pos, batch, ratio=0.5) assert idx.min() >= 0 and idx.max() < 2 * N pytorch_cluster-1.6.3/test/test_graclus.py000066400000000000000000000034441451151623700210140ustar00rootroot00000000000000from itertools import product import pytest import torch from torch_cluster import graclus_cluster from torch_cluster.testing import devices, dtypes, tensor tests = [{ 'row': [0, 0, 1, 1, 1, 2, 2, 2, 3, 3], 'col': [1, 2, 0, 2, 3, 0, 1, 3, 1, 2], }, { 'row': [0, 0, 1, 1, 1, 2, 2, 2, 3, 3], 'col': [1, 2, 0, 2, 3, 0, 1, 3, 1, 2], 'weight': [1, 2, 1, 3, 2, 2, 3, 1, 2, 1], }] def assert_correct(row, col, cluster): row, col, cluster = row.to('cpu'), col.to('cpu'), cluster.to('cpu') n = cluster.size(0) # Every node was assigned a cluster. assert cluster.min() >= 0 # There are no more than two nodes in each cluster. _, index = torch.unique(cluster, return_inverse=True) count = torch.zeros_like(cluster) count.scatter_add_(0, index, torch.ones_like(cluster)) assert (count > 2).max() == 0 # Cluster value is minimal. assert (cluster <= torch.arange(n, dtype=cluster.dtype)).sum() == n # Corresponding clusters must be adjacent. for i in range(n): x = cluster[col[row == i]] == cluster[i] # Neighbors with same cluster y = cluster == cluster[i] # Nodes with same cluster. y[i] = 0 # Do not look at cluster of `i`. assert x.sum() == y.sum() @pytest.mark.parametrize('test,dtype,device', product(tests, dtypes, devices)) def test_graclus_cluster(test, dtype, device): if dtype == torch.bfloat16 and device == torch.device('cuda:0'): return row = tensor(test['row'], torch.long, device) col = tensor(test['col'], torch.long, device) weight = tensor(test.get('weight'), dtype, device) cluster = graclus_cluster(row, col, weight) assert_correct(row, col, cluster) jit = torch.jit.script(graclus_cluster) cluster = jit(row, col, weight) assert_correct(row, col, cluster) pytorch_cluster-1.6.3/test/test_grid.py000066400000000000000000000022061451151623700202740ustar00rootroot00000000000000from itertools import product import pytest import torch from torch_cluster import grid_cluster from torch_cluster.testing import devices, dtypes, tensor tests = [{ 'pos': [2, 6], 'size': [5], 'cluster': [0, 0], }, { 'pos': [2, 6], 'size': [5], 'start': [0], 'cluster': [0, 1], }, { 'pos': [[0, 0], [11, 9], [2, 8], [2, 2], [8, 3]], 'size': [5, 5], 'cluster': [0, 5, 3, 0, 1], }, { 'pos': [[0, 0], [11, 9], [2, 8], [2, 2], [8, 3]], 'size': [5, 5], 'end': [19, 19], 'cluster': [0, 6, 4, 0, 1], }] @pytest.mark.parametrize('test,dtype,device', product(tests, dtypes, devices)) def test_grid_cluster(test, dtype, device): if dtype == torch.bfloat16 and device == torch.device('cuda:0'): return pos = tensor(test['pos'], dtype, device) size = tensor(test['size'], dtype, device) start = tensor(test.get('start'), dtype, device) end = tensor(test.get('end'), dtype, device) cluster = grid_cluster(pos, size, start, end) assert cluster.tolist() == test['cluster'] jit = torch.jit.script(grid_cluster) assert torch.equal(jit(pos, size, start, end), cluster) pytorch_cluster-1.6.3/test/test_knn.py000066400000000000000000000054101451151623700201350ustar00rootroot00000000000000from itertools import product import pytest import scipy.spatial import torch from torch_cluster import knn, knn_graph from torch_cluster.testing import devices, grad_dtypes, tensor def to_set(edge_index): return set([(i, j) for i, j in edge_index.t().tolist()]) @pytest.mark.parametrize('dtype,device', product(grad_dtypes, devices)) def test_knn(dtype, device): x = tensor([ [-1, -1], [-1, +1], [+1, +1], [+1, -1], [-1, -1], [-1, +1], [+1, +1], [+1, -1], ], dtype, device) y = tensor([ [1, 0], [-1, 0], ], dtype, device) batch_x = tensor([0, 0, 0, 0, 1, 1, 1, 1], torch.long, device) batch_y = tensor([0, 1], torch.long, device) edge_index = knn(x, y, 2) assert to_set(edge_index) == set([(0, 2), (0, 3), (1, 0), (1, 1)]) jit = torch.jit.script(knn) edge_index = jit(x, y, 2) assert to_set(edge_index) == set([(0, 2), (0, 3), (1, 0), (1, 1)]) edge_index = knn(x, y, 2, batch_x, batch_y) assert to_set(edge_index) == set([(0, 2), (0, 3), (1, 4), (1, 5)]) if x.is_cuda: edge_index = knn(x, y, 2, batch_x, batch_y, cosine=True) assert to_set(edge_index) == set([(0, 2), (0, 3), (1, 4), (1, 5)]) # Skipping a batch batch_x = tensor([0, 0, 0, 0, 2, 2, 2, 2], torch.long, device) batch_y = tensor([0, 2], torch.long, device) edge_index = knn(x, y, 2, batch_x, batch_y) assert to_set(edge_index) == set([(0, 2), (0, 3), (1, 4), (1, 5)]) @pytest.mark.parametrize('dtype,device', product(grad_dtypes, devices)) def test_knn_graph(dtype, device): x = tensor([ [-1, -1], [-1, +1], [+1, +1], [+1, -1], ], dtype, device) edge_index = knn_graph(x, k=2, flow='target_to_source') assert to_set(edge_index) == set([(0, 1), (0, 3), (1, 0), (1, 2), (2, 1), (2, 3), (3, 0), (3, 2)]) edge_index = knn_graph(x, k=2, flow='source_to_target') assert to_set(edge_index) == set([(1, 0), (3, 0), (0, 1), (2, 1), (1, 2), (3, 2), (0, 3), (2, 3)]) jit = torch.jit.script(knn_graph) edge_index = jit(x, k=2, flow='source_to_target') assert to_set(edge_index) == set([(1, 0), (3, 0), (0, 1), (2, 1), (1, 2), (3, 2), (0, 3), (2, 3)]) @pytest.mark.parametrize('dtype,device', product([torch.float], devices)) def test_knn_graph_large(dtype, device): x = torch.randn(1000, 3, dtype=dtype, device=device) edge_index = knn_graph(x, k=5, flow='target_to_source', loop=True) tree = scipy.spatial.cKDTree(x.cpu().numpy()) _, col = tree.query(x.cpu(), k=5) truth = set([(i, j) for i, ns in enumerate(col) for j in ns]) assert to_set(edge_index.cpu()) == truth pytorch_cluster-1.6.3/test/test_nearest.py000066400000000000000000000037361451151623700210210ustar00rootroot00000000000000from itertools import product import pytest import torch from torch_cluster import nearest from torch_cluster.testing import devices, grad_dtypes, tensor @pytest.mark.parametrize('dtype,device', product(grad_dtypes, devices)) def test_nearest(dtype, device): x = tensor([ [-1, -1], [-1, +1], [+1, +1], [+1, -1], [-2, -2], [-2, +2], [+2, +2], [+2, -2], ], dtype, device) y = tensor([ [-1, 0], [+1, 0], [-2, 0], [+2, 0], ], dtype, device) batch_x = tensor([0, 0, 0, 0, 1, 1, 1, 1], torch.long, device) batch_y = tensor([0, 0, 1, 1], torch.long, device) out = nearest(x, y, batch_x, batch_y) assert out.tolist() == [0, 0, 1, 1, 2, 2, 3, 3] out = nearest(x, y) assert out.tolist() == [0, 0, 1, 1, 2, 2, 3, 3] # Invalid input: instance 1 only in batch_x batch_x = tensor([0, 0, 0, 0, 1, 1, 1, 1], torch.long, device) batch_y = tensor([0, 0, 0, 0], torch.long, device) with pytest.raises(ValueError): nearest(x, y, batch_x, batch_y) # Invalid input: instance 1 only in batch_x (implicitly as batch_y=None) with pytest.raises(ValueError): nearest(x, y, batch_x, batch_y=None) # Invalid input: instance 2 only in batch_x # (i.e.instance in the middle missing) batch_x = tensor([0, 0, 1, 1, 2, 2, 3, 3], torch.long, device) batch_y = tensor([0, 1, 3, 3], torch.long, device) with pytest.raises(ValueError): nearest(x, y, batch_x, batch_y) # Invalid input: batch_x unsorted batch_x = tensor([0, 0, 1, 0, 0, 0, 0], torch.long, device) batch_y = tensor([0, 0, 1, 1], torch.long, device) with pytest.raises(ValueError): nearest(x, y, batch_x, batch_y) # Invalid input: batch_y unsorted batch_x = tensor([0, 0, 0, 0, 1, 1, 1, 1], torch.long, device) batch_y = tensor([0, 0, 1, 0], torch.long, device) with pytest.raises(ValueError): nearest(x, y, batch_x, batch_y) pytorch_cluster-1.6.3/test/test_radius.py000066400000000000000000000061751451151623700206470ustar00rootroot00000000000000from itertools import product import pytest import scipy.spatial import torch from torch_cluster import radius, radius_graph from torch_cluster.testing import devices, grad_dtypes, tensor def to_set(edge_index): return set([(i, j) for i, j in edge_index.t().tolist()]) @pytest.mark.parametrize('dtype,device', product(grad_dtypes, devices)) def test_radius(dtype, device): x = tensor([ [-1, -1], [-1, +1], [+1, +1], [+1, -1], [-1, -1], [-1, +1], [+1, +1], [+1, -1], ], dtype, device) y = tensor([ [0, 0], [0, 1], ], dtype, device) batch_x = tensor([0, 0, 0, 0, 1, 1, 1, 1], torch.long, device) batch_y = tensor([0, 1], torch.long, device) edge_index = radius(x, y, 2, max_num_neighbors=4) assert to_set(edge_index) == set([(0, 0), (0, 1), (0, 2), (0, 3), (1, 1), (1, 2), (1, 5), (1, 6)]) jit = torch.jit.script(radius) edge_index = jit(x, y, 2, max_num_neighbors=4) assert to_set(edge_index) == set([(0, 0), (0, 1), (0, 2), (0, 3), (1, 1), (1, 2), (1, 5), (1, 6)]) edge_index = radius(x, y, 2, batch_x, batch_y, max_num_neighbors=4) assert to_set(edge_index) == set([(0, 0), (0, 1), (0, 2), (0, 3), (1, 5), (1, 6)]) # Skipping a batch batch_x = tensor([0, 0, 0, 0, 2, 2, 2, 2], torch.long, device) batch_y = tensor([0, 2], torch.long, device) edge_index = radius(x, y, 2, batch_x, batch_y, max_num_neighbors=4) assert to_set(edge_index) == set([(0, 0), (0, 1), (0, 2), (0, 3), (1, 5), (1, 6)]) @pytest.mark.parametrize('dtype,device', product(grad_dtypes, devices)) def test_radius_graph(dtype, device): x = tensor([ [-1, -1], [-1, +1], [+1, +1], [+1, -1], ], dtype, device) edge_index = radius_graph(x, r=2.5, flow='target_to_source') assert to_set(edge_index) == set([(0, 1), (0, 3), (1, 0), (1, 2), (2, 1), (2, 3), (3, 0), (3, 2)]) edge_index = radius_graph(x, r=2.5, flow='source_to_target') assert to_set(edge_index) == set([(1, 0), (3, 0), (0, 1), (2, 1), (1, 2), (3, 2), (0, 3), (2, 3)]) jit = torch.jit.script(radius_graph) edge_index = jit(x, r=2.5, flow='source_to_target') assert to_set(edge_index) == set([(1, 0), (3, 0), (0, 1), (2, 1), (1, 2), (3, 2), (0, 3), (2, 3)]) @pytest.mark.parametrize('dtype,device', product([torch.float], devices)) def test_radius_graph_large(dtype, device): x = torch.randn(1000, 3, dtype=dtype, device=device) edge_index = radius_graph(x, r=0.5, flow='target_to_source', loop=True, max_num_neighbors=2000) tree = scipy.spatial.cKDTree(x.cpu().numpy()) col = tree.query_ball_point(x.cpu(), r=0.5) truth = set([(i, j) for i, ns in enumerate(col) for j in ns]) assert to_set(edge_index.cpu()) == truth pytorch_cluster-1.6.3/test/test_rw.py000066400000000000000000000050401451151623700177760ustar00rootroot00000000000000import pytest import torch from torch_cluster import random_walk from torch_cluster.testing import devices, tensor @pytest.mark.parametrize('device', devices) def test_rw_large(device): row = tensor([0, 1, 1, 1, 2, 2, 3, 3, 4, 4], torch.long, device) col = tensor([1, 0, 2, 3, 1, 4, 1, 4, 2, 3], torch.long, device) start = tensor([0, 1, 2, 3, 4], torch.long, device) walk_length = 10 out = random_walk(row, col, start, walk_length) assert out[:, 0].tolist() == start.tolist() for n in range(start.size(0)): cur = start[n].item() for i in range(1, walk_length): assert out[n, i].item() in col[row == cur].tolist() cur = out[n, i].item() @pytest.mark.parametrize('device', devices) def test_rw_small(device): row = tensor([0, 1], torch.long, device) col = tensor([1, 0], torch.long, device) start = tensor([0, 1, 2], torch.long, device) walk_length = 4 out = random_walk(row, col, start, walk_length, num_nodes=3) assert out.tolist() == [[0, 1, 0, 1, 0], [1, 0, 1, 0, 1], [2, 2, 2, 2, 2]] jit = torch.jit.script(random_walk) assert torch.equal(jit(row, col, start, walk_length, num_nodes=3), out) @pytest.mark.parametrize('device', devices) def test_rw_large_with_edge_indices(device): row = tensor([0, 1, 1, 1, 2, 2, 3, 3, 4, 4], torch.long, device) col = tensor([1, 0, 2, 3, 1, 4, 1, 4, 2, 3], torch.long, device) start = tensor([0, 1, 2, 3, 4], torch.long, device) walk_length = 10 node_seq, edge_seq = random_walk( row, col, start, walk_length, return_edge_indices=True, ) assert node_seq[:, 0].tolist() == start.tolist() for n in range(start.size(0)): cur = start[n].item() for i in range(1, walk_length): assert node_seq[n, i].item() in col[row == cur].tolist() cur = node_seq[n, i].item() assert (edge_seq != -1).all() @pytest.mark.parametrize('device', devices) def test_rw_small_with_edge_indices(device): row = tensor([0, 1], torch.long, device) col = tensor([1, 0], torch.long, device) start = tensor([0, 1, 2], torch.long, device) walk_length = 4 node_seq, edge_seq = random_walk( row, col, start, walk_length, num_nodes=3, return_edge_indices=True, ) assert node_seq.tolist() == [ [0, 1, 0, 1, 0], [1, 0, 1, 0, 1], [2, 2, 2, 2, 2], ] assert edge_seq.tolist() == [ [0, 1, 0, 1], [1, 0, 1, 0], [-1, -1, -1, -1], ] pytorch_cluster-1.6.3/test/test_sampler.py000066400000000000000000000006061451151623700210140ustar00rootroot00000000000000import torch from torch_cluster import neighbor_sampler def test_neighbor_sampler(): torch.manual_seed(1234) start = torch.tensor([0, 1]) cumdeg = torch.tensor([0, 3, 7]) e_id = neighbor_sampler(start, cumdeg, size=1.0) assert e_id.tolist() == [0, 2, 1, 5, 6, 3, 4] e_id = neighbor_sampler(start, cumdeg, size=3) assert e_id.tolist() == [1, 0, 2, 4, 5, 6] pytorch_cluster-1.6.3/torch_cluster/000077500000000000000000000000001451151623700176375ustar00rootroot00000000000000pytorch_cluster-1.6.3/torch_cluster/__init__.py000066400000000000000000000037121451151623700217530ustar00rootroot00000000000000import importlib import os.path as osp import torch __version__ = '1.6.3' for library in [ '_version', '_grid', '_graclus', '_fps', '_rw', '_sampler', '_nearest', '_knn', '_radius' ]: cuda_spec = importlib.machinery.PathFinder().find_spec( f'{library}_cuda', [osp.dirname(__file__)]) cpu_spec = importlib.machinery.PathFinder().find_spec( f'{library}_cpu', [osp.dirname(__file__)]) spec = cuda_spec or cpu_spec if spec is not None: torch.ops.load_library(spec.origin) else: # pragma: no cover raise ImportError(f"Could not find module '{library}_cpu' in " f"{osp.dirname(__file__)}") cuda_version = torch.ops.torch_cluster.cuda_version() if torch.version.cuda is not None and cuda_version != -1: # pragma: no cover if cuda_version < 10000: major, minor = int(str(cuda_version)[0]), int(str(cuda_version)[2]) else: major, minor = int(str(cuda_version)[0:2]), int(str(cuda_version)[3]) t_major, t_minor = [int(x) for x in torch.version.cuda.split('.')] if t_major != major: raise RuntimeError( f'Detected that PyTorch and torch_cluster were compiled with ' f'different CUDA versions. PyTorch has CUDA version ' f'{t_major}.{t_minor} and torch_cluster has CUDA version ' f'{major}.{minor}. Please reinstall the torch_cluster that ' f'matches your PyTorch install.') from .fps import fps # noqa from .graclus import graclus_cluster # noqa from .grid import grid_cluster # noqa from .knn import knn, knn_graph # noqa from .nearest import nearest # noqa from .radius import radius, radius_graph # noqa from .rw import random_walk # noqa from .sampler import neighbor_sampler # noqa __all__ = [ 'graclus_cluster', 'grid_cluster', 'fps', 'nearest', 'knn', 'knn_graph', 'radius', 'radius_graph', 'random_walk', 'neighbor_sampler', '__version__', ] pytorch_cluster-1.6.3/torch_cluster/fps.py000066400000000000000000000077401451151623700210110ustar00rootroot00000000000000from typing import List, Optional, Union import torch from torch import Tensor import torch_cluster.typing @torch.jit._overload # noqa def fps(src, batch, ratio, random_start, batch_size, ptr): # noqa # type: (Tensor, Optional[Tensor], Optional[float], bool, Optional[int], Optional[Tensor]) -> Tensor # noqa pass # pragma: no cover @torch.jit._overload # noqa def fps(src, batch, ratio, random_start, batch_size, ptr): # noqa # type: (Tensor, Optional[Tensor], Optional[Tensor], bool, Optional[int], Optional[Tensor]) -> Tensor # noqa pass # pragma: no cover @torch.jit._overload # noqa def fps(src, batch, ratio, random_start, batch_size, ptr): # noqa # type: (Tensor, Optional[Tensor], Optional[float], bool, Optional[int], Optional[List[int]]) -> Tensor # noqa pass # pragma: no cover @torch.jit._overload # noqa def fps(src, batch, ratio, random_start, batch_size, ptr): # noqa # type: (Tensor, Optional[Tensor], Optional[Tensor], bool, Optional[int], Optional[List[int]]) -> Tensor # noqa pass # pragma: no cover def fps( # noqa src: torch.Tensor, batch: Optional[Tensor] = None, ratio: Optional[Union[Tensor, float]] = None, random_start: bool = True, batch_size: Optional[int] = None, ptr: Optional[Union[Tensor, List[int]]] = None, ): r""""A sampling algorithm from the `"PointNet++: Deep Hierarchical Feature Learning on Point Sets in a Metric Space" `_ paper, which iteratively samples the most distant point with regard to the rest points. Args: src (Tensor): Point feature matrix :math:`\mathbf{X} \in \mathbb{R}^{N \times F}`. batch (LongTensor, optional): Batch vector :math:`\mathbf{b} \in {\{ 0, \ldots, B-1\}}^N`, which assigns each node to a specific example. (default: :obj:`None`) ratio (float or Tensor, optional): Sampling ratio. (default: :obj:`0.5`) random_start (bool, optional): If set to :obj:`False`, use the first node in :math:`\mathbf{X}` as starting node. (default: obj:`True`) batch_size (int, optional): The number of examples :math:`B`. Automatically calculated if not given. (default: :obj:`None`) ptr (torch.Tensor or [int], optional): If given, batch assignment will be determined based on boundaries in CSR representation, *e.g.*, :obj:`batch=[0,0,1,1,1,2]` translates to :obj:`ptr=[0,2,5,6]`. (default: :obj:`None`) :rtype: :class:`LongTensor` .. code-block:: python import torch from torch_cluster import fps src = torch.Tensor([[-1, -1], [-1, 1], [1, -1], [1, 1]]) batch = torch.tensor([0, 0, 0, 0]) index = fps(src, batch, ratio=0.5) """ r: Optional[Tensor] = None if ratio is None: r = torch.tensor(0.5, dtype=src.dtype, device=src.device) elif isinstance(ratio, float): r = torch.tensor(ratio, dtype=src.dtype, device=src.device) else: r = ratio assert r is not None if ptr is not None: if isinstance(ptr, list) and torch_cluster.typing.WITH_PTR_LIST: return torch.ops.torch_cluster.fps_ptr_list( src, ptr, r, random_start) if isinstance(ptr, list): return torch.ops.torch_cluster.fps( src, torch.tensor(ptr, device=src.device), r, random_start) else: return torch.ops.torch_cluster.fps(src, ptr, r, random_start) if batch is not None: assert src.size(0) == batch.numel() if batch_size is None: batch_size = int(batch.max()) + 1 deg = src.new_zeros(batch_size, dtype=torch.long) deg.scatter_add_(0, batch, torch.ones_like(batch)) ptr_vec = deg.new_zeros(batch_size + 1) torch.cumsum(deg, 0, out=ptr_vec[1:]) else: ptr_vec = torch.tensor([0, src.size(0)], device=src.device) return torch.ops.torch_cluster.fps(src, ptr_vec, r, random_start) pytorch_cluster-1.6.3/torch_cluster/graclus.py000066400000000000000000000032461451151623700216560ustar00rootroot00000000000000from typing import Optional import torch def graclus_cluster( row: torch.Tensor, col: torch.Tensor, weight: Optional[torch.Tensor] = None, num_nodes: Optional[int] = None, ) -> torch.Tensor: """A greedy clustering algorithm of picking an unmarked vertex and matching it with one its unmarked neighbors (that maximizes its edge weight). Args: row (LongTensor): Source nodes. col (LongTensor): Target nodes. weight (Tensor, optional): Edge weights. (default: :obj:`None`) num_nodes (int, optional): The number of nodes. (default: :obj:`None`) :rtype: :class:`LongTensor` .. code-block:: python import torch from torch_cluster import graclus_cluster row = torch.tensor([0, 1, 1, 2]) col = torch.tensor([1, 0, 2, 1]) weight = torch.Tensor([1, 1, 1, 1]) cluster = graclus_cluster(row, col, weight) """ if num_nodes is None: num_nodes = max(int(row.max()), int(col.max())) + 1 # Remove self-loops. mask = row != col row, col = row[mask], col[mask] if weight is not None: weight = weight[mask] # Randomly shuffle nodes. if weight is None: perm = torch.randperm(row.size(0), dtype=torch.long, device=row.device) row, col = row[perm], col[perm] # To CSR. perm = torch.argsort(row) row, col = row[perm], col[perm] if weight is not None: weight = weight[perm] deg = row.new_zeros(num_nodes) deg.scatter_add_(0, row, torch.ones_like(row)) rowptr = row.new_zeros(num_nodes + 1) torch.cumsum(deg, 0, out=rowptr[1:]) return torch.ops.torch_cluster.graclus(rowptr, col, weight) pytorch_cluster-1.6.3/torch_cluster/grid.py000066400000000000000000000020441451151623700211360ustar00rootroot00000000000000from typing import Optional import torch def grid_cluster( pos: torch.Tensor, size: torch.Tensor, start: Optional[torch.Tensor] = None, end: Optional[torch.Tensor] = None, ) -> torch.Tensor: """A clustering algorithm, which overlays a regular grid of user-defined size over a point cloud and clusters all points within a voxel. Args: pos (Tensor): D-dimensional position of points. size (Tensor): Size of a voxel in each dimension. start (Tensor, optional): Start position of the grid (in each dimension). (default: :obj:`None`) end (Tensor, optional): End position of the grid (in each dimension). (default: :obj:`None`) :rtype: :class:`LongTensor` .. code-block:: python import torch from torch_cluster import grid_cluster pos = torch.Tensor([[0, 0], [11, 9], [2, 8], [2, 2], [8, 3]]) size = torch.Tensor([5, 5]) cluster = grid_cluster(pos, size) """ return torch.ops.torch_cluster.grid(pos, size, start, end) pytorch_cluster-1.6.3/torch_cluster/knn.py000066400000000000000000000124751451151623700210100ustar00rootroot00000000000000from typing import Optional import torch def knn( x: torch.Tensor, y: torch.Tensor, k: int, batch_x: Optional[torch.Tensor] = None, batch_y: Optional[torch.Tensor] = None, cosine: bool = False, num_workers: int = 1, batch_size: Optional[int] = None, ) -> torch.Tensor: r"""Finds for each element in :obj:`y` the :obj:`k` nearest points in :obj:`x`. Args: x (Tensor): Node feature matrix :math:`\mathbf{X} \in \mathbb{R}^{N \times F}`. y (Tensor): Node feature matrix :math:`\mathbf{X} \in \mathbb{R}^{M \times F}`. k (int): The number of neighbors. batch_x (LongTensor, optional): Batch vector :math:`\mathbf{b} \in {\{ 0, \ldots, B-1\}}^N`, which assigns each node to a specific example. :obj:`batch_x` needs to be sorted. (default: :obj:`None`) batch_y (LongTensor, optional): Batch vector :math:`\mathbf{b} \in {\{ 0, \ldots, B-1\}}^M`, which assigns each node to a specific example. :obj:`batch_y` needs to be sorted. (default: :obj:`None`) cosine (boolean, optional): If :obj:`True`, will use the Cosine distance instead of the Euclidean distance to find nearest neighbors. (default: :obj:`False`) num_workers (int): Number of workers to use for computation. Has no effect in case :obj:`batch_x` or :obj:`batch_y` is not :obj:`None`, or the input lies on the GPU. (default: :obj:`1`) batch_size (int, optional): The number of examples :math:`B`. Automatically calculated if not given. (default: :obj:`None`) :rtype: :class:`LongTensor` .. code-block:: python import torch from torch_cluster import knn x = torch.Tensor([[-1, -1], [-1, 1], [1, -1], [1, 1]]) batch_x = torch.tensor([0, 0, 0, 0]) y = torch.Tensor([[-1, 0], [1, 0]]) batch_y = torch.tensor([0, 0]) assign_index = knn(x, y, 2, batch_x, batch_y) """ if x.numel() == 0 or y.numel() == 0: return torch.empty(2, 0, dtype=torch.long, device=x.device) x = x.view(-1, 1) if x.dim() == 1 else x y = y.view(-1, 1) if y.dim() == 1 else y x, y = x.contiguous(), y.contiguous() if batch_size is None: batch_size = 1 if batch_x is not None: assert x.size(0) == batch_x.numel() batch_size = int(batch_x.max()) + 1 if batch_y is not None: assert y.size(0) == batch_y.numel() batch_size = max(batch_size, int(batch_y.max()) + 1) assert batch_size > 0 ptr_x: Optional[torch.Tensor] = None ptr_y: Optional[torch.Tensor] = None if batch_size > 1: assert batch_x is not None assert batch_y is not None arange = torch.arange(batch_size + 1, device=x.device) ptr_x = torch.bucketize(arange, batch_x) ptr_y = torch.bucketize(arange, batch_y) return torch.ops.torch_cluster.knn(x, y, ptr_x, ptr_y, k, cosine, num_workers) def knn_graph( x: torch.Tensor, k: int, batch: Optional[torch.Tensor] = None, loop: bool = False, flow: str = 'source_to_target', cosine: bool = False, num_workers: int = 1, batch_size: Optional[int] = None, ) -> torch.Tensor: r"""Computes graph edges to the nearest :obj:`k` points. Args: x (Tensor): Node feature matrix :math:`\mathbf{X} \in \mathbb{R}^{N \times F}`. k (int): The number of neighbors. batch (LongTensor, optional): Batch vector :math:`\mathbf{b} \in {\{ 0, \ldots, B-1\}}^N`, which assigns each node to a specific example. :obj:`batch` needs to be sorted. (default: :obj:`None`) loop (bool, optional): If :obj:`True`, the graph will contain self-loops. (default: :obj:`False`) flow (string, optional): The flow direction when used in combination with message passing (:obj:`"source_to_target"` or :obj:`"target_to_source"`). (default: :obj:`"source_to_target"`) cosine (boolean, optional): If :obj:`True`, will use the Cosine distance instead of Euclidean distance to find nearest neighbors. (default: :obj:`False`) num_workers (int): Number of workers to use for computation. Has no effect in case :obj:`batch` is not :obj:`None`, or the input lies on the GPU. (default: :obj:`1`) batch_size (int, optional): The number of examples :math:`B`. Automatically calculated if not given. (default: :obj:`None`) :rtype: :class:`LongTensor` .. code-block:: python import torch from torch_cluster import knn_graph x = torch.Tensor([[-1, -1], [-1, 1], [1, -1], [1, 1]]) batch = torch.tensor([0, 0, 0, 0]) edge_index = knn_graph(x, k=2, batch=batch, loop=False) """ assert flow in ['source_to_target', 'target_to_source'] edge_index = knn(x, x, k if loop else k + 1, batch, batch, cosine, num_workers, batch_size) if flow == 'source_to_target': row, col = edge_index[1], edge_index[0] else: row, col = edge_index[0], edge_index[1] if not loop: mask = row != col row, col = row[mask], col[mask] return torch.stack([row, col], dim=0) pytorch_cluster-1.6.3/torch_cluster/nearest.py000066400000000000000000000112201451151623700216460ustar00rootroot00000000000000from typing import Optional import scipy.cluster import torch def nearest( x: torch.Tensor, y: torch.Tensor, batch_x: Optional[torch.Tensor] = None, batch_y: Optional[torch.Tensor] = None, ) -> torch.Tensor: r"""Clusters points in :obj:`x` together which are nearest to a given query point in :obj:`y`. Args: x (Tensor): Node feature matrix :math:`\mathbf{X} \in \mathbb{R}^{N \times F}`. y (Tensor): Node feature matrix :math:`\mathbf{Y} \in \mathbb{R}^{M \times F}`. batch_x (LongTensor, optional): Batch vector :math:`\mathbf{b} \in {\{ 0, \ldots, B-1\}}^N`, which assigns each node to a specific example. :obj:`batch_x` needs to be sorted. (default: :obj:`None`) batch_y (LongTensor, optional): Batch vector :math:`\mathbf{b} \in {\{ 0, \ldots, B-1\}}^M`, which assigns each node to a specific example. :obj:`batch_y` needs to be sorted. (default: :obj:`None`) :rtype: :class:`LongTensor` .. code-block:: python import torch from torch_cluster import nearest x = torch.Tensor([[-1, -1], [-1, 1], [1, -1], [1, 1]]) batch_x = torch.tensor([0, 0, 0, 0]) y = torch.Tensor([[-1, 0], [1, 0]]) batch_y = torch.tensor([0, 0]) cluster = nearest(x, y, batch_x, batch_y) """ x = x.view(-1, 1) if x.dim() == 1 else x y = y.view(-1, 1) if y.dim() == 1 else y assert x.size(1) == y.size(1) if batch_x is not None and (batch_x[1:] - batch_x[:-1] < 0).any(): raise ValueError("'batch_x' is not sorted") if batch_y is not None and (batch_y[1:] - batch_y[:-1] < 0).any(): raise ValueError("'batch_y' is not sorted") if x.is_cuda: if batch_x is not None: assert x.size(0) == batch_x.numel() batch_size = int(batch_x.max()) + 1 deg = x.new_zeros(batch_size, dtype=torch.long) deg.scatter_add_(0, batch_x, torch.ones_like(batch_x)) ptr_x = deg.new_zeros(batch_size + 1) torch.cumsum(deg, 0, out=ptr_x[1:]) else: ptr_x = torch.tensor([0, x.size(0)], device=x.device) if batch_y is not None: assert y.size(0) == batch_y.numel() batch_size = int(batch_y.max()) + 1 deg = y.new_zeros(batch_size, dtype=torch.long) deg.scatter_add_(0, batch_y, torch.ones_like(batch_y)) ptr_y = deg.new_zeros(batch_size + 1) torch.cumsum(deg, 0, out=ptr_y[1:]) else: ptr_y = torch.tensor([0, y.size(0)], device=y.device) # If an instance in `batch_x` is non-empty, it must be non-empty in # `batch_y `as well: nonempty_ptr_x = (ptr_x[1:] - ptr_x[:-1]) > 0 nonempty_ptr_y = (ptr_y[1:] - ptr_y[:-1]) > 0 if not torch.equal(nonempty_ptr_x, nonempty_ptr_y): raise ValueError("Some batch indices occur in 'batch_x' " "that do not occur in 'batch_y'") return torch.ops.torch_cluster.nearest(x, y, ptr_x, ptr_y) else: if batch_x is None and batch_y is not None: batch_x = x.new_zeros(x.size(0), dtype=torch.long) if batch_y is None and batch_x is not None: batch_y = y.new_zeros(y.size(0), dtype=torch.long) # Translate and rescale x and y to [0, 1]. if batch_x is not None and batch_y is not None: # If an instance in `batch_x` is non-empty, it must be non-empty in # `batch_y `as well: unique_batch_x = batch_x.unique_consecutive() unique_batch_y = batch_y.unique_consecutive() if not torch.equal(unique_batch_x, unique_batch_y): raise ValueError("Some batch indices occur in 'batch_x' " "that do not occur in 'batch_y'") assert x.dim() == 2 and batch_x.dim() == 1 assert y.dim() == 2 and batch_y.dim() == 1 assert x.size(0) == batch_x.size(0) assert y.size(0) == batch_y.size(0) min_xy = min(x.min().item(), y.min().item()) x, y = x - min_xy, y - min_xy max_xy = max(x.max().item(), y.max().item()) x.div_(max_xy) y.div_(max_xy) # Concat batch/features to ensure no cross-links between examples. D = x.size(-1) x = torch.cat([x, 2 * D * batch_x.view(-1, 1).to(x.dtype)], -1) y = torch.cat([y, 2 * D * batch_y.view(-1, 1).to(y.dtype)], -1) return torch.from_numpy( scipy.cluster.vq.vq(x.detach().cpu(), y.detach().cpu())[0]).to(torch.long) pytorch_cluster-1.6.3/torch_cluster/radius.py000066400000000000000000000131331451151623700215010ustar00rootroot00000000000000from typing import Optional import torch def radius( x: torch.Tensor, y: torch.Tensor, r: float, batch_x: Optional[torch.Tensor] = None, batch_y: Optional[torch.Tensor] = None, max_num_neighbors: int = 32, num_workers: int = 1, batch_size: Optional[int] = None, ) -> torch.Tensor: r"""Finds for each element in :obj:`y` all points in :obj:`x` within distance :obj:`r`. Args: x (Tensor): Node feature matrix :math:`\mathbf{X} \in \mathbb{R}^{N \times F}`. y (Tensor): Node feature matrix :math:`\mathbf{Y} \in \mathbb{R}^{M \times F}`. r (float): The radius. batch_x (LongTensor, optional): Batch vector :math:`\mathbf{b} \in {\{ 0, \ldots, B-1\}}^N`, which assigns each node to a specific example. :obj:`batch_x` needs to be sorted. (default: :obj:`None`) batch_y (LongTensor, optional): Batch vector :math:`\mathbf{b} \in {\{ 0, \ldots, B-1\}}^M`, which assigns each node to a specific example. :obj:`batch_y` needs to be sorted. (default: :obj:`None`) max_num_neighbors (int, optional): The maximum number of neighbors to return for each element in :obj:`y`. If the number of actual neighbors is greater than :obj:`max_num_neighbors`, returned neighbors are picked randomly. (default: :obj:`32`) num_workers (int): Number of workers to use for computation. Has no effect in case :obj:`batch_x` or :obj:`batch_y` is not :obj:`None`, or the input lies on the GPU. (default: :obj:`1`) batch_size (int, optional): The number of examples :math:`B`. Automatically calculated if not given. (default: :obj:`None`) .. code-block:: python import torch from torch_cluster import radius x = torch.Tensor([[-1, -1], [-1, 1], [1, -1], [1, 1]]) batch_x = torch.tensor([0, 0, 0, 0]) y = torch.Tensor([[-1, 0], [1, 0]]) batch_y = torch.tensor([0, 0]) assign_index = radius(x, y, 1.5, batch_x, batch_y) """ if x.numel() == 0 or y.numel() == 0: return torch.empty(2, 0, dtype=torch.long, device=x.device) x = x.view(-1, 1) if x.dim() == 1 else x y = y.view(-1, 1) if y.dim() == 1 else y x, y = x.contiguous(), y.contiguous() if batch_size is None: batch_size = 1 if batch_x is not None: assert x.size(0) == batch_x.numel() batch_size = int(batch_x.max()) + 1 if batch_y is not None: assert y.size(0) == batch_y.numel() batch_size = max(batch_size, int(batch_y.max()) + 1) assert batch_size > 0 ptr_x: Optional[torch.Tensor] = None ptr_y: Optional[torch.Tensor] = None if batch_size > 1: assert batch_x is not None assert batch_y is not None arange = torch.arange(batch_size + 1, device=x.device) ptr_x = torch.bucketize(arange, batch_x) ptr_y = torch.bucketize(arange, batch_y) return torch.ops.torch_cluster.radius(x, y, ptr_x, ptr_y, r, max_num_neighbors, num_workers) def radius_graph( x: torch.Tensor, r: float, batch: Optional[torch.Tensor] = None, loop: bool = False, max_num_neighbors: int = 32, flow: str = 'source_to_target', num_workers: int = 1, batch_size: Optional[int] = None, ) -> torch.Tensor: r"""Computes graph edges to all points within a given distance. Args: x (Tensor): Node feature matrix :math:`\mathbf{X} \in \mathbb{R}^{N \times F}`. r (float): The radius. batch (LongTensor, optional): Batch vector :math:`\mathbf{b} \in {\{ 0, \ldots, B-1\}}^N`, which assigns each node to a specific example. :obj:`batch` needs to be sorted. (default: :obj:`None`) loop (bool, optional): If :obj:`True`, the graph will contain self-loops. (default: :obj:`False`) max_num_neighbors (int, optional): The maximum number of neighbors to return for each element. If the number of actual neighbors is greater than :obj:`max_num_neighbors`, returned neighbors are picked randomly. (default: :obj:`32`) flow (string, optional): The flow direction when used in combination with message passing (:obj:`"source_to_target"` or :obj:`"target_to_source"`). (default: :obj:`"source_to_target"`) num_workers (int): Number of workers to use for computation. Has no effect in case :obj:`batch` is not :obj:`None`, or the input lies on the GPU. (default: :obj:`1`) batch_size (int, optional): The number of examples :math:`B`. Automatically calculated if not given. (default: :obj:`None`) :rtype: :class:`LongTensor` .. code-block:: python import torch from torch_cluster import radius_graph x = torch.Tensor([[-1, -1], [-1, 1], [1, -1], [1, 1]]) batch = torch.tensor([0, 0, 0, 0]) edge_index = radius_graph(x, r=1.5, batch=batch, loop=False) """ assert flow in ['source_to_target', 'target_to_source'] edge_index = radius(x, x, r, batch, batch, max_num_neighbors if loop else max_num_neighbors + 1, num_workers, batch_size) if flow == 'source_to_target': row, col = edge_index[1], edge_index[0] else: row, col = edge_index[0], edge_index[1] if not loop: mask = row != col row, col = row[mask], col[mask] return torch.stack([row, col], dim=0) pytorch_cluster-1.6.3/torch_cluster/rw.py000066400000000000000000000043651451151623700206510ustar00rootroot00000000000000from typing import Optional, Tuple, Union import torch from torch import Tensor def random_walk( row: Tensor, col: Tensor, start: Tensor, walk_length: int, p: float = 1, q: float = 1, coalesced: bool = True, num_nodes: Optional[int] = None, return_edge_indices: bool = False, ) -> Union[Tensor, Tuple[Tensor, Tensor]]: """Samples random walks of length :obj:`walk_length` from all node indices in :obj:`start` in the graph given by :obj:`(row, col)` as described in the `"node2vec: Scalable Feature Learning for Networks" `_ paper. Edge indices :obj:`(row, col)` need to be coalesced/sorted according to :obj:`row` (use the :obj:`coalesced` attribute to force). Args: row (LongTensor): Source nodes. col (LongTensor): Target nodes. start (LongTensor): Nodes from where random walks start. walk_length (int): The walk length. p (float, optional): Likelihood of immediately revisiting a node in the walk. (default: :obj:`1`) q (float, optional): Control parameter to interpolate between breadth-first strategy and depth-first strategy (default: :obj:`1`) coalesced (bool, optional): If set to :obj:`True`, will coalesce/sort the graph given by :obj:`(row, col)` according to :obj:`row`. (default: :obj:`True`) num_nodes (int, optional): The number of nodes. (default: :obj:`None`) return_edge_indices (bool, optional): Whether to additionally return the indices of edges traversed during the random walk. (default: :obj:`False`) :rtype: :class:`LongTensor` """ if num_nodes is None: num_nodes = max(int(row.max()), int(col.max()), int(start.max())) + 1 if coalesced: perm = torch.argsort(row * num_nodes + col) row, col = row[perm], col[perm] deg = row.new_zeros(num_nodes) deg.scatter_add_(0, row, torch.ones_like(row)) rowptr = row.new_zeros(num_nodes + 1) torch.cumsum(deg, 0, out=rowptr[1:]) node_seq, edge_seq = torch.ops.torch_cluster.random_walk( rowptr, col, start, walk_length, p, q) if return_edge_indices: return node_seq, edge_seq return node_seq pytorch_cluster-1.6.3/torch_cluster/sampler.py000066400000000000000000000006241451151623700216560ustar00rootroot00000000000000import torch def neighbor_sampler(start: torch.Tensor, rowptr: torch.Tensor, size: float): assert not start.is_cuda factor: float = -1. count: int = -1 if size <= 1: factor = size assert factor > 0 else: count = int(size) return torch.ops.torch_cluster.neighbor_sampler(start, rowptr, count, factor) pytorch_cluster-1.6.3/torch_cluster/testing.py000066400000000000000000000006631451151623700216730ustar00rootroot00000000000000from typing import Any import torch dtypes = [ torch.half, torch.bfloat16, torch.float, torch.double, torch.int, torch.long ] grad_dtypes = [torch.half, torch.float, torch.double] devices = [torch.device('cpu')] if torch.cuda.is_available(): devices += [torch.device('cuda:0')] def tensor(x: Any, dtype: torch.dtype, device: torch.device): return None if x is None else torch.tensor(x, dtype=dtype, device=device) pytorch_cluster-1.6.3/torch_cluster/typing.py000066400000000000000000000002041451151623700215170ustar00rootroot00000000000000import torch try: WITH_PTR_LIST = hasattr(torch.ops.torch_cluster, 'fps_ptr_list') except Exception: WITH_PTR_LIST = False