Commit e3b413a5 authored by Kyungjoo Kim's avatar Kyungjoo Kim
Browse files

Ifpack2 - block tridiag - prefetch async does not work on k80

  anyway, prefetching is a temporal solution to get memory locality.
  once, we use cuda space only, no reason to use that.
parent e19ba68f
......@@ -2088,19 +2088,6 @@ namespace KB = KokkosBatched::Experimental;
const local_ordinal_type per_team_scratch = internal_vector_scratch_type_3d_view::
shmem_size(blocksize, blocksize, vector_loop_size);
// when tpetra memory space is uvm, prefetch them to device or host accordingly
if (std::is_same<typename impl_type::node_memory_space,Kokkos::CudaUVMSpace>::value) {
if (std::is_same<execution_space,Kokkos::Cuda>::value) {
CUDA_SAFE_CALL(cudaMemPrefetchAsync(, A_rowptr.span()*sizeof(a_rowptr_value_type), 0));
CUDA_SAFE_CALL(cudaMemPrefetchAsync(, A_values.span()*sizeof(impl_scalar_type ), 0));
} else {
CUDA_SAFE_CALL(cudaMemPrefetchAsync(, A_rowptr.span()*sizeof(a_rowptr_value_type), cudaCpuDeviceId));
CUDA_SAFE_CALL(cudaMemPrefetchAsync(, A_values.span()*sizeof(impl_scalar_type ), cudaCpuDeviceId));
policy(packptr.extent(0)-1, team_size, vector_loop_size);
Markdown is supported
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment