Tpetra range issues in Kokkos subview with new view
Created by: bathmatt
@mhoemmen @crtrott @hcedwar
I'm getting this error when I have bounds checking on
Kokkos::subview bounds errorp=1 | *********** Caught Exception std::exception: Begin Error Report ***********
p=1 | /home/mbetten/Trilinos/Trilinos/packages/tpetra/core/src/Tpetra_Distributor.hpp:2136:
p=1 |
p=1 | Throw number = 1
p=1 |
p=1 | Throw test that evaluated to true: static_cast<size_t> (imports.dimension_0 ()) < totalNumImportPackets
p=1 |
p=1 | Tpetra::Distributor::doPosts(3 args): The 'imports' array must have enough entries to hold the expected number of import packets. imports.dimension_0() = 0 < totalNumImportPackets = 63.
p=1 | ************ Caught Exception std::exception: End Error Report ************
--------------------------------------------------------------------------
mpirun noticed that process rank 0 with PID 5293 on node hansen02 exited on signal 11 (Segmentation fault).
I've run this code in serial and parallel through valgrind , it is clean I've run it through cuda-memcheck in parallel and it doesn't show anything except for the exception shown below. I've run it through cuda-gdb in parallel, I've attached the tracebacks.
I think this is an issue with tpetra now. This is on Hansen, I can make the build public.
On rank 1
(cuda-gdb) where
#0 __cxxabiv1::__cxa_throw (obj=0x1f81f6c0,
tinfo=0x1d4b1b60 <_ZTISt13runtime_error@@GLIBCXX_3.4>,
dest=0x49b40c0 <_ZNSt13runtime_errorD1Ev@plt>)
at ../../.././libstdc++-v3/libsupc++/eh_throw.cc:62
#1 0x000000000843950e in Tpetra::Distributor::doPosts<Kokkos::Experimental::View<int const*, Kokkos::LayoutLeft, Kokkos::Device<Kokkos::Serial, Kokkos::CudaUVMSpace>, Kokkos::MemoryTraits<0u> >, Kokkos::Experimental::View<int*, Kokkos::LayoutLeft, Kokkos::Device<Kokkos::Serial, Kokkos::CudaUVMSpace> > > (this=
0x1f7a9610, exports=..., numPackets=3, imports=...)
at /home/mbetten/Trilinos/Trilinos/packages/tpetra/core/src/Tpetra_Distributor.hpp:2131
#2 0x000000000842fb78 in Tpetra::Distributor::doPostsAndWaits<Kokkos::Experimental::View<int const*, Kokkos::LayoutLeft, Kokkos::Device<Kokkos::Serial, Kokkos::CudaUVMSpace>, Kokkos::MemoryTraits<0u> >, Kokkos::Experimental::View<int*, Kokkos::LayoutLeft, Kokkos::Device<Kokkos::Serial, Kokkos::CudaUVMSpace> > > (
this=0x1f7a9610, exports=..., numPackets=3, imports=...)
at /home/mbetten/Trilinos/Trilinos/packages/tpetra/core/src/Tpetra_Distributor.hpp:2009
#3 0x000000000842b41a in Tpetra::Distributor::doReversePostsAndWaits<Kokkos::Experimental::View<int const*, Kokkos::LayoutLeft, Kokkos::Device<Kokkos::Cuda, Kokkos::CudaUVMSpace>, Kokkos::MemoryTraits<0u> >, Kokkos::Experimental::View<int*, Kokkos::Cuda> > (this=0x1f7a9610, exports=..., numPackets=3, imports=...)
at /home/mbetten/Trilinos/Trilinos/packages/tpetra/core/src/Tpetra_Distribut---Type <return> to continue, or q <return> to quit---
or.hpp:2803
#4 0x0000000008429bc2 in Tpetra::DistObject<int, int, int, Kokkos::Compat::KokkosDeviceWrapperNode<Kokkos::Cuda, Kokkos::CudaUVMSpace>, false>::doTransferNew
(this=0x1f7aea40, src=..., CM=Tpetra::REPLACE, numSameIDs=231,
permuteToLIDs_=..., permuteFromLIDs_=..., remoteLIDs_=...,
exportLIDs_=..., distor=...,
revOp=Tpetra::DistObject<int, int, int, Kokkos::Compat::KokkosDeviceWrapperNode<Kokkos::Cuda, Kokkos::CudaUVMSpace>, false>::DoReverse)
at /home/mbetten/Trilinos/Trilinos/packages/tpetra/core/src/Tpetra_DistObject_def.hpp:856
#5 0x0000000008428519 in Tpetra::DistObject<int, int, int, Kokkos::Compat::KokkosDeviceWrapperNode<Kokkos::Cuda, Kokkos::CudaUVMSpace>, false>::doTransfer (
this=0x1f7aea40, src=..., CM=Tpetra::REPLACE, numSameIDs=231,
permuteToLIDs_=..., permuteFromLIDs_=..., remoteLIDs_=...,
exportLIDs_=..., distor=...,
revOp=Tpetra::DistObject<int, int, int, Kokkos::Compat::KokkosDeviceWrapperNode<Kokkos::Cuda, Kokkos::CudaUVMSpace>, false>::DoReverse)
at /home/mbetten/Trilinos/Trilinos/packages/tpetra/core/src/Tpetra_DistObject_def.hpp:397
#6 0x00000000084270bd in Tpetra::DistObject<int, int, int, Kokkos::Compat::KokkosDeviceWrapperNode<Kokkos::Cuda, Kokkos::CudaUVMSpace>, false>::doImport (
this=0x1f7aea40, source=..., exporter=..., CM=Tpetra::REPLACE)
at /home/mbetten/Trilinos/Trilinos/packages/tpetra/core/src/Tpetra_DistObjec---Type <return> to continue, or q <return> to quit---
t_def.hpp:333
#7 0x0000000006b7b330 in panzer::DOFManager<int, int>::buildGlobalUnknowns (
this=0x1f7a3780, geomPattern=...)
at /home/mbetten/Trilinos/Trilinos/packages/panzer/dof-mgr/src/Panzer_DOFManager_impl.hpp:505
#8 0x0000000006b7a074 in panzer::DOFManager<int, int>::buildGlobalUnknowns (
this=0x1f7a3780)
at /home/mbetten/Trilinos/Trilinos/packages/panzer/dof-mgr/src/Panzer_DOFManager_impl.hpp:317
#9 0x00000000064ad919 in panzer::DOFManagerFactory<int, int>::buildUniqueGlobalIndexer<panzer::DOFManager<int, int> > (this=0x7fffffff6a80, mpiComm=...,
physicsBlocks=..., connMngr=..., fieldOrder=...)
at /home/mbetten/Trilinos/Trilinos/packages/panzer/disc-fe/src/Panzer_DOFManagerFactory_impl.hpp:148
#10 0x00000000064ac4ba in panzer::DOFManagerFactory<int, int>::buildUniqueGlobalIndexer (this=0x7fffffff6a80, mpiComm=..., physicsBlocks=..., connMngr=...,
fieldOrder=...)
at /home/mbetten/Trilinos/Trilinos/packages/panzer/disc-fe/src/Panzer_DOFManagerFactory_impl.hpp:66
#11 0x0000000004bef189 in drekar::UniqueGlobalIndexerFactory<int>::buildUniqueGlobalIndexer (this=0x7fffffff6cb0, mpiComm=..., physicsBlocks_exp=...,
on rank 0
#1 0x00007fffeca13e15 in abort () from /lib64/libc.so.6
#2 0x0000000008d2e9c2 in Kokkos::Impl::host_abort (
message=0x9ec8f16 <Kokkos::Experimental::(anonymous namespace)::AllowPadding+25640> "Kokkos::subview bounds error")
at /home/mbetten/Trilinos/Trilinos/packages/kokkos/core/src/impl/Kokkos_Error.cpp:64
#3 0x0000000004ff6eca in Kokkos::abort (
message=0x9ec8f16 <Kokkos::Experimental::(anonymous namespace)::AllowPadding+25640> "Kokkos::subview bounds error")
at /home/mbetten/Trilinos/Trilinos/packages/kokkos/core/src/impl/Kokkos_Error.hpp:74
#4 0x0000000007ca65a2 in error<0ul, std::pair<unsigned long, unsigned long> >
(dim=..., this=0x7fffffff4630)
at /home/mbetten/Trilinos/Trilinos/packages/kokkos/core/src/impl/KokkosExp_ViewMapping.hpp:552
#5 Kokkos::Experimental::Impl::SubviewExtents<1u, 1u>::SubviewExtents<0ul, std::pair<unsigned long, unsigned long> > (this=0x7fffffff4630, dim=...)
at /home/mbetten/Trilinos/Trilinos/packages/kokkos/core/src/impl/KokkosExp_ViewMapping.hpp:587
#6 0x00000000084434ba in Kokkos::Experimental::Impl::ViewMapping<void, Kokkos::Experimental::ViewTraits<int const*, Kokkos::LayoutLeft, Kokkos::Device<Kokkos::Serial, Kokkos::CudaUVMSpace>, Kokkos::MemoryTraits<0u> >, std::pair<unsigned lo---Type <return> to continue, or q <return> to quit---
ng, unsigned long> >::assign<Kokkos::Experimental::ViewTraits<int const*, Kokkos::LayoutLeft, Kokkos::Device<Kokkos::Serial, Kokkos::CudaUVMSpace>, Kokkos::MemoryTraits<0u> > >(Kokkos::Experimental::Impl::ViewMapping<Kokkos::Experimental::ViewTraits<int const*, Kokkos::LayoutLeft, Kokkos::Device<Kokkos::Serial, Kokkos::CudaUVMSpace>, Kokkos::MemoryTraits<0u> ><void> >&, Kokkos::Experimental::Impl::ViewMapping<Kokkos::Experimental::ViewTraits<int const*, Kokkos::LayoutLeft, Kokkos::Device<Kokkos::Serial, Kokkos::CudaUVMSpace>, Kokkos::MemoryTraits<0u> ><void> > const&, std::pair<unsigned long, unsigned long>) (dst=..., src=...,
args#0=...)
at /home/mbetten/Trilinos/Trilinos/packages/kokkos/core/src/impl/KokkosExp_ViewMapping.hpp:2788
#7 0x00000000084417c5 in Kokkos::Experimental::View<int const*, Kokkos::LayoutLeft, Kokkos::Device<Kokkos::Serial, Kokkos::CudaUVMSpace>, Kokkos::MemoryTraits<0u> >::View<int const*<Kokkos::LayoutLeft, Kokkos::Device<Kokkos::Serial, Kokkos::CudaUVMSpace>, Kokkos::MemoryTraits<0u> >, std::pair<unsigned long, unsigned long>> (this=0x7fffffff49e0, src_view=..., arg0=...)
at /home/mbetten/Trilinos/Trilinos/packages/kokkos/core/src/KokkosExp_View.hpp:1160
#8 0x000000000843f2bf in Kokkos::Experimental::subview<int const*, Kokkos::LayoutLeft, Kokkos::Device<Kokkos::Serial, Kokkos::CudaUVMSpace>, Kokkos::MemoryTraits<0u>, std::pair<unsigned long, unsigned long> > (src=...)
at /home/mbetten/Trilinos/Trilinos/packages/kokkos/core/src/KokkosExp_View.hpp:1462
---Type <return> to continue, or q <return> to quit---
#9 0x000000000843d56e in Kokkos::Compat::subview_offset<Kokkos::Experimental::View<int const*, Kokkos::LayoutLeft, Kokkos::Device<Kokkos::Serial, Kokkos::CudaUVMSpace>, Kokkos::MemoryTraits<0u> >, unsigned long> (view=..., offset=0,
size=63)
at /home/mbetten/Trilinos/Trilinos/packages/teuchos/kokkoscompat/src/KokkosCompat_View.hpp:221
#10 0x0000000008439d5d in Tpetra::Distributor::doPosts<Kokkos::Experimental::View<int const*, Kokkos::LayoutLeft, Kokkos::Device<Kokkos::Serial, Kokkos::CudaUVMSpace>, Kokkos::MemoryTraits<0u> >, Kokkos::Experimental::View<int*, Kokkos::LayoutLeft, Kokkos::Device<Kokkos::Serial, Kokkos::CudaUVMSpace> > > (this=
0x2b881130, exports=..., numPackets=3, imports=...)
at /home/mbetten/Trilinos/Trilinos/packages/tpetra/core/src/Tpetra_Distributor.hpp:2263
#11 0x000000000842fb78 in Tpetra::Distributor::doPostsAndWaits<Kokkos::Experimental::View<int const*, Kokkos::LayoutLeft, Kokkos::Device<Kokkos::Serial, Kokkos::CudaUVMSpace>, Kokkos::MemoryTraits<0u> >, Kokkos::Experimental::View<int*, Kokkos::LayoutLeft, Kokkos::Device<Kokkos::Serial, Kokkos::CudaUVMSpace> > > (
this=0x2b881130, exports=..., numPackets=3, imports=...)
at /home/mbetten/Trilinos/Trilinos/packages/tpetra/core/src/Tpetra_Distributor.hpp:2009
#12 0x000000000842b41a in Tpetra::Distributor::doReversePostsAndWaits<Kokkos::Experimental::View<int const*, Kokkos::LayoutLeft, Kokkos::Device<Kokkos::Cuda, Kokkos::CudaUVMSpace>, Kokkos::MemoryTraits<0u> >, Kokkos::Experimental::View<int*---Type <return> to continue, or q <return> to quit---
, Kokkos::Cuda> > (this=0x2b881130, exports=..., numPackets=3, imports=...)
at /home/mbetten/Trilinos/Trilinos/packages/tpetra/core/src/Tpetra_Distributor.hpp:2803
#13 0x0000000008429bc2 in Tpetra::DistObject<int, int, int, Kokkos::Compat::KokkosDeviceWrapperNode<Kokkos::Cuda, Kokkos::CudaUVMSpace>, false>::doTransferNew
(this=0x2b875ec0, src=..., CM=Tpetra::REPLACE, numSameIDs=10,
permuteToLIDs_=..., permuteFromLIDs_=..., remoteLIDs_=...,
exportLIDs_=..., distor=...,
revOp=Tpetra::DistObject<int, int, int, Kokkos::Compat::KokkosDeviceWrapperNode<Kokkos::Cuda, Kokkos::CudaUVMSpace>, false>::DoReverse)
at /home/mbetten/Trilinos/Trilinos/packages/tpetra/core/src/Tpetra_DistObject_def.hpp:856
#14 0x0000000008428519 in Tpetra::DistObject<int, int, int, Kokkos::Compat::KokkosDeviceWrapperNode<Kokkos::Cuda, Kokkos::CudaUVMSpace>, false>::doTransfer (
this=0x2b875ec0, src=..., CM=Tpetra::REPLACE, numSameIDs=10,
permuteToLIDs_=..., permuteFromLIDs_=..., remoteLIDs_=...,
exportLIDs_=..., distor=...,
revOp=Tpetra::DistObject<int, int, int, Kokkos::Compat::KokkosDeviceWrapperNode<Kokkos::Cuda, Kokkos::CudaUVMSpace>, false>::DoReverse)
at /home/mbetten/Trilinos/Trilinos/packages/tpetra/core/src/Tpetra_DistObject_def.hpp:397
#15 0x00000000084270bd in Tpetra::DistObject<int, int, int, Kokkos::Compat::KokkosDeviceWrapperNode<Kokkos::Cuda, Kokkos::CudaUVMSpace>, false>::doImport (
---Type <return> to continue, or q <return> to quit---
this=0x2b875ec0, source=..., exporter=..., CM=Tpetra::REPLACE)
at /home/mbetten/Trilinos/Trilinos/packages/tpetra/core/src/Tpetra_DistObject_def.hpp:333
#16 0x0000000006b7b330 in panzer::DOFManager<int, int>::buildGlobalUnknowns (
this=0x2b87f8c0, geomPattern=...)
at /home/mbetten/Trilinos/Trilinos/packages/panzer/dof-mgr/src/Panzer_DOFManager_impl.hpp:505
#17 0x0000000006b7a074 in panzer::DOFManager<int, int>::buildGlobalUnknowns (
this=0x2b87f8c0)
at /home/mbetten/Trilinos/Trilinos/packages/panzer/dof-mgr/src/Panzer_DOFManager_impl.hpp:317
#18 0x00000000064ad919 in panzer::DOFManagerFactory<int, int>::buildUniqueGlobalIndexer<panzer::DOFManager<int, int> > (this=0x7fffffff6a90, mpiComm=...,
physicsBlocks=..., connMngr=..., fieldOrder=...)
at /home/mbetten/Trilinos/Trilinos/packages/panzer/disc-fe/src/Panzer_DOFManagerFactory_impl.hpp:148
#19 0x00000000064ac4ba in panzer::DOFManagerFactory<int, int>::buildUniqueGlobalIndexer (this=0x7fffffff6a90, mpiComm=..., physicsBlocks=..., connMngr=...,
fieldOrder=...)
at /home/mbetten/Trilinos/Trilinos/packages/panzer/disc-fe/src/Panzer_DOFManagerFactory_impl.hpp:66
#20 0x0000000004bef189 in drekar::UniqueGlobalIndexerFactory<int>::buildUniqueGlobalIndexer (this=0x7fffffff6cc0, mpiComm=..., physicsBlocks_exp=...,