From 00d84bf753a0f4d79ea3025c04862d0308cbcb6b Mon Sep 17 00:00:00 2001 From: Paul Zander Date: Mon, 25 Aug 2025 21:14:03 +0200 Subject: [PATCH] cuda 13 Signed-off-by: Paul Zander diff --git a/ThirdParty/viskores/vtkviskores/viskores/viskores/cont/cuda/internal/CudaAllocator.cu b/ThirdParty/viskores/vtkviskores/viskores/viskores/cont/cuda/internal/CudaAllocator.cu index cc2da2b..ad382ee 100644 --- a/ThirdParty/viskores/vtkviskores/viskores/viskores/cont/cuda/internal/CudaAllocator.cu +++ b/ThirdParty/viskores/vtkviskores/viskores/viskores/cont/cuda/internal/CudaAllocator.cu @@ -284,10 +284,14 @@ void CudaAllocator::PrepareForControl(const void* ptr, std::size_t numBytes) { if (IsManagedPointer(ptr) && numBytes >= Threshold) { + // Create device location with specific device ID + cudaMemLocation hostLoc; + hostLoc.type = cudaMemLocationTypeHost; + // TODO these hints need to be benchmarked and adjusted once we start // sharing the pointers between cont/exec - VISKORES_CUDA_CALL(cudaMemAdvise(ptr, numBytes, cudaMemAdviseSetAccessedBy, cudaCpuDeviceId)); - VISKORES_CUDA_CALL(cudaMemPrefetchAsync(ptr, numBytes, cudaCpuDeviceId, cudaStreamPerThread)); + VISKORES_CUDA_CALL(cudaMemAdvise(ptr, numBytes, cudaMemAdviseSetAccessedBy, hostLoc)); + VISKORES_CUDA_CALL(cudaMemPrefetchAsync(ptr, numBytes, hostLoc, 0)); } } @@ -299,10 +303,15 @@ void CudaAllocator::PrepareForInput(const void* ptr, std::size_t numBytes) viskores::cont::RuntimeDeviceInformation() .GetRuntimeConfiguration(viskores::cont::DeviceAdapterTagCuda()) .GetDeviceInstance(dev); + + cudaMemLocation deviceLoc; + deviceLoc.type = cudaMemLocationTypeDevice; + deviceLoc.id = dev; + // VISKORES_CUDA_CALL(cudaMemAdvise(ptr, numBytes, cudaMemAdviseSetPreferredLocation, dev)); // VISKORES_CUDA_CALL(cudaMemAdvise(ptr, numBytes, cudaMemAdviseSetReadMostly, dev)); - VISKORES_CUDA_CALL(cudaMemAdvise(ptr, numBytes, cudaMemAdviseSetAccessedBy, dev)); - VISKORES_CUDA_CALL(cudaMemPrefetchAsync(ptr, numBytes, dev, cudaStreamPerThread)); + VISKORES_CUDA_CALL(cudaMemAdvise(ptr, numBytes, cudaMemAdviseSetAccessedBy, deviceLoc)); + VISKORES_CUDA_CALL(cudaMemPrefetchAsync(ptr, numBytes, deviceLoc, 0, cudaStreamPerThread)); } } @@ -314,10 +323,15 @@ void CudaAllocator::PrepareForOutput(const void* ptr, std::size_t numBytes) viskores::cont::RuntimeDeviceInformation() .GetRuntimeConfiguration(viskores::cont::DeviceAdapterTagCuda()) .GetDeviceInstance(dev); + + cudaMemLocation deviceLoc; + deviceLoc.type = cudaMemLocationTypeDevice; + deviceLoc.id = dev; + // VISKORES_CUDA_CALL(cudaMemAdvise(ptr, numBytes, cudaMemAdviseSetPreferredLocation, dev)); // VISKORES_CUDA_CALL(cudaMemAdvise(ptr, numBytes, cudaMemAdviseUnsetReadMostly, dev)); - VISKORES_CUDA_CALL(cudaMemAdvise(ptr, numBytes, cudaMemAdviseSetAccessedBy, dev)); - VISKORES_CUDA_CALL(cudaMemPrefetchAsync(ptr, numBytes, dev, cudaStreamPerThread)); + VISKORES_CUDA_CALL(cudaMemAdvise(ptr, numBytes, cudaMemAdviseSetAccessedBy, deviceLoc)); + VISKORES_CUDA_CALL(cudaMemPrefetchAsync(ptr, numBytes, deviceLoc, 0, cudaStreamPerThread)); } } @@ -329,10 +343,15 @@ void CudaAllocator::PrepareForInPlace(const void* ptr, std::size_t numBytes) viskores::cont::RuntimeDeviceInformation() .GetRuntimeConfiguration(viskores::cont::DeviceAdapterTagCuda()) .GetDeviceInstance(dev); + + cudaMemLocation deviceLoc; + deviceLoc.type = cudaMemLocationTypeDevice; + deviceLoc.id = dev; + // VISKORES_CUDA_CALL(cudaMemAdvise(ptr, numBytes, cudaMemAdviseSetPreferredLocation, dev)); // VISKORES_CUDA_CALL(cudaMemAdvise(ptr, numBytes, cudaMemAdviseUnsetReadMostly, dev)); - VISKORES_CUDA_CALL(cudaMemAdvise(ptr, numBytes, cudaMemAdviseSetAccessedBy, dev)); - VISKORES_CUDA_CALL(cudaMemPrefetchAsync(ptr, numBytes, dev, cudaStreamPerThread)); + VISKORES_CUDA_CALL(cudaMemAdvise(ptr, numBytes, cudaMemAdviseSetAccessedBy, deviceLoc)); + VISKORES_CUDA_CALL(cudaMemPrefetchAsync(ptr, numBytes, deviceLoc, 0, cudaStreamPerThread)); } } diff --git a/ThirdParty/viskores/vtkviskores/viskores/viskores/Swap.h b/ThirdParty/viskores/vtkviskores/viskores/viskores/Swap.h index 918075e..4d380db 100644 --- a/ThirdParty/viskores/vtkviskores/viskores/viskores/Swap.h +++ b/ThirdParty/viskores/vtkviskores/viskores/viskores/Swap.h @@ -41,7 +41,8 @@ namespace viskores // defined in the `viskores` namespace as an argument. If that function has an unqualified call to // `Swap`, it results in ADL being used, causing the templated functions `cub::Swap` and // `viskores::Swap` to conflict. -#if defined(VISKORES_CUDA_VERSION_MAJOR) && (VISKORES_CUDA_VERSION_MAJOR >= 12) +// This was deprecated in favour of `cuda::std::swap` in CUDA 13. +#if defined(VISKORES_CUDA_VERSION_MAJOR) && (VISKORES_CUDA_VERSION_MAJOR == 12) using cub::Swap; #else template -- 2.51.0