Commit 04e6cd5d authored by Carter Edwards's avatar Carter Edwards
Browse files

Kokkos: Add 'kokkos/example/' subpackage with thread-scalable generation of a...

Kokkos: Add 'kokkos/example/' subpackage with thread-scalable generation of a box element mesh fixture.

Kokkos: Add the 'ALL' argument for a subview to specify an entire range of a dimension.
        This will lead to consistency of subviews when prior subview methods are removed.
parent 046878e7
......@@ -181,6 +181,20 @@
</EmailAddresses>
<ParentPackage value="Kokkos"/>
</Package>
<Package name="KokkosExample" dir="packages/kokkos/example" type="EX">
<LIB_REQUIRED_DEP_PACKAGES value="KokkosCore"/>
<LIB_OPTIONAL_DEP_PACKAGES/>
<TEST_REQUIRED_DEP_PACKAGES value="KokkosCore"/>
<TEST_OPTIONAL_DEP_PACKAGES/>
<LIB_REQUIRED_DEP_TPLS/>
<LIB_OPTIONAL_DEP_TPLS/>
<TEST_REQUIRED_DEP_TPLS/>
<TEST_OPTIONAL_DEP_TPLS value="CUSPARSE,MKL"/>
<EmailAddresses>
<Regression address="kokkos-regression@software.sandia.gov"/>
</EmailAddresses>
<ParentPackage value="Kokkos"/>
</Package>
<Package name="KokkosCompat" dir="packages/kokkos/compat" type="EX">
<LIB_REQUIRED_DEP_PACKAGES value="KokkosCore,KokkosClassic,Teuchos"/>
<LIB_OPTIONAL_DEP_PACKAGES value="ThreadPool"/>
......@@ -197,7 +211,7 @@
</Package>
<Package name="Kokkos" dir="packages/kokkos" type="PS">
<LIB_REQUIRED_DEP_PACKAGES/>
<LIB_OPTIONAL_DEP_PACKAGES value="KokkosClassic,KokkosCore,KokkosContainers,KokkosCompat"/>
<LIB_OPTIONAL_DEP_PACKAGES value="KokkosClassic,KokkosCore,KokkosContainers,KokkosExample,KokkosCompat"/>
<TEST_REQUIRED_DEP_PACKAGES/>
<TEST_OPTIONAL_DEP_PACKAGES/>
<LIB_REQUIRED_DEP_TPLS/>
......
......@@ -3,6 +3,7 @@ SET(SUBPACKAGES_DIRS_CLASSIFICATIONS_OPTREQS
Classic classic PS OPTIONAL
Core core EX OPTIONAL
Containers containers EX OPTIONAL
Example example EX OPTIONAL
Compat compat EX OPTIONAL
)
......
......@@ -565,6 +565,8 @@ void resize( View<T,L,D,M,S> & v ,
//----------------------------------------------------------------------------
struct ALL {};
template< class DstViewType ,
class T , class L , class D , class M , class S ,
class ArgType0 >
......
......@@ -157,7 +157,7 @@ public:
#pragma simd vectorlength(work_align)
#pragma ivdep
for ( size_type iwork = range.first ; iwork < range.second ; ++iwork ) {
functor( iwork , functor.reference( thread.reduce_data() , count * ( iwork & work_mask ) ) );
functor( iwork , ReduceOp::reference( thread.reduce_data() , count * ( iwork & work_mask ) ) );
}
for ( size_type j = 1 ; j < work_align ; ++j ) {
......
......@@ -361,13 +361,19 @@ void * ThreadsExec::get_shmem( const int size )
namespace Kokkos {
namespace Impl {
void ThreadsExec::verify_is_process( const std::string & name )
void ThreadsExec::verify_is_process( const std::string & name , const bool initialized )
{
if ( ! is_process() ) {
std::string msg( name );
msg.append( " FAILED : Called by a worker thread, can only be called by the master process." );
Kokkos::Impl::throw_runtime_exception( msg );
}
if ( initialized && 0 == s_threads_count ) {
std::string msg( name );
msg.append( " FAILED : Threads not initialized." );
Kokkos::Impl::throw_runtime_exception( msg );
}
}
// Wait for root thread to become inactive
......@@ -397,7 +403,7 @@ void ThreadsExec::activate_threads()
/** \brief Begin execution of the asynchronous functor */
void ThreadsExec::start( void (*func)( ThreadsExec & , const void * ) , const void * arg )
{
verify_is_process("ThreadsExec::start");
verify_is_process("ThreadsExec::start" , true );
if ( s_current_function_lock &&
s_current_function_lock != arg ) {
......@@ -419,6 +425,8 @@ void ThreadsExec::start( void (*func)( ThreadsExec & , const void * ) , const vo
void * ThreadsExec::execute( void (*func)( ThreadsExec & , const void * ) , const void * arg )
{
verify_is_process("ThreadsExec::execute" , true );
if ( s_current_function || s_current_function_arg ) {
Kokkos::Impl::throw_runtime_exception( std::string( "ThreadsExec::execute() FAILED : already executing" ) );
}
......@@ -449,7 +457,7 @@ void * ThreadsExec::execute( void (*func)( ThreadsExec & , const void * ) , cons
bool ThreadsExec::sleep()
{
verify_is_process("ThreadsExec::sleep");
verify_is_process("ThreadsExec::sleep", true );
if ( & execute_sleep == s_current_function ) return false ;
......@@ -468,7 +476,7 @@ bool ThreadsExec::sleep()
bool ThreadsExec::wake()
{
verify_is_process("ThreadsExec::wake");
verify_is_process("ThreadsExec::wake", true );
if ( & execute_sleep != s_current_function ) return false ;
......@@ -561,7 +569,7 @@ void ThreadsExec::resize_shared_scratch( size_t size )
void ThreadsExec::print_configuration( std::ostream & s , const bool detail )
{
verify_is_process("ThreadsExec::print_configuration");
verify_is_process("ThreadsExec::print_configuration",false);
fence();
......@@ -616,7 +624,7 @@ void ThreadsExec::initialize(
{
static const Sentinel sentinel ;
verify_is_process("ThreadsExec::initialize");
verify_is_process("ThreadsExec::initialize",false);
std::ostringstream msg ;
......@@ -792,7 +800,7 @@ void ThreadsExec::initialize(
void ThreadsExec::finalize()
{
verify_is_process("ThreadsExec::finalize");
verify_is_process("ThreadsExec::finalize",false);
fence();
......
......@@ -119,7 +119,7 @@ public:
static bool is_process();
static void verify_is_process( const std::string & );
static void verify_is_process( const std::string & , const bool initialized );
static void initialize( const std::pair<unsigned,unsigned> team_topo ,
std::pair<unsigned,unsigned> core_topo );
......
......@@ -514,6 +514,66 @@ struct ViewAssignment< LayoutDefault , LayoutDefault , void >
ViewTracking< traits_type >::increment( dst.m_ptr_on_device );
}
//------------------------------------
/** \brief Extract Rank-1 array from LayoutLeft Rank-2 array. */
template< class DT , class DL , class DD , class DM ,
class ST , class SL , class SD , class SM >
KOKKOS_INLINE_FUNCTION
ViewAssignment( View<DT,DL,DD,DM,LayoutDefault> & dst ,
const View<ST,SL,SD,SM,LayoutDefault> & src ,
const ALL & ,
const typename enable_if< (
ViewAssignable< ViewTraits<DT,DL,DD,DM> , ViewTraits<ST,SL,SD,SM> >::assignable_value
&&
is_same< typename ViewTraits<ST,SL,SD,SM>::array_layout , LayoutLeft >::value
&&
( ViewTraits<ST,SL,SD,SM>::rank == 2 )
&&
( ViewTraits<DT,DL,DD,DM>::rank == 1 )
&&
( ViewTraits<DT,DL,DD,DM>::rank_dynamic == 1 )
), unsigned >::type i1 )
{
typedef ViewTraits<DT,DL,DD,DM> traits_type ;
ViewTracking< traits_type >::decrement( dst.m_ptr_on_device );
dst.m_shape.N0 = src.m_shape.N0 ;
dst.m_ptr_on_device = src.m_ptr_on_device + src.m_stride.value * i1 ;
ViewTracking< traits_type >::increment( dst.m_ptr_on_device );
}
//------------------------------------
/** \brief Extract Rank-1 array from LayoutRight Rank-2 array. */
template< class DT , class DL , class DD , class DM ,
class ST , class SL , class SD , class SM >
KOKKOS_INLINE_FUNCTION
ViewAssignment( View<DT,DL,DD,DM,LayoutDefault> & dst ,
const View<ST,SL,SD,SM,LayoutDefault> & src ,
const typename enable_if< (
ViewAssignable< ViewTraits<DT,DL,DD,DM> , ViewTraits<ST,SL,SD,SM> >::assignable_value
&&
is_same< typename ViewTraits<ST,SL,SD,SM>::array_layout , LayoutRight >::value
&&
( ViewTraits<ST,SL,SD,SM>::rank == 2 )
&&
( ViewTraits<DT,DL,DD,DM>::rank == 1 )
&&
( ViewTraits<DT,DL,DD,DM>::rank_dynamic == 1 )
), unsigned >::type i0 ,
const ALL & )
{
typedef ViewTraits<DT,DL,DD,DM> traits_type ;
ViewTracking< traits_type >::decrement( dst.m_ptr_on_device );
dst.m_shape.N0 = src.m_shape.N1 ;
dst.m_ptr_on_device = src.m_ptr_on_device + src.m_stride.value * i0 ;
ViewTracking< traits_type >::increment( dst.m_ptr_on_device );
}
//------------------------------------
/** \brief Extract LayoutRight Rank-N array from range of LayoutRight Rank-N array */
template< class DT , class DL , class DD , class DM ,
......
INCLUDE(TribitsSubPackageMacros)
TRIBITS_SUBPACKAGE(Example)
TRIBITS_ADD_EXAMPLE_DIRECTORIES(query_device fixture)
TRIBITS_SUBPACKAGE_POSTPROCESS()
SET(LIB_REQUIRED_DEP_PACKAGES KokkosCore)
SET(LIB_OPTIONAL_DEP_PACKAGES)
SET(TEST_REQUIRED_DEP_PACKAGES KokkosCore)
SET(TEST_OPTIONAL_DEP_PACKAGES)
SET(LIB_REQUIRED_DEP_TPLS)
SET(LIB_OPTIONAL_DEP_TPLS)
SET(TEST_REQUIRED_DEP_TPLS )
SET(TEST_OPTIONAL_DEP_TPLS CUSPARSE MKL )
/*
//@HEADER
// ************************************************************************
//
// Kokkos: Manycore Performance-Portable Multidimensional Arrays
// Copyright (2012) Sandia Corporation
//
// Under the terms of Contract DE-AC04-94AL85000 with Sandia Corporation,
// the U.S. Government retains certain rights in this software.
//
// Redistribution and use in source and binary forms, with or without
// modification, are permitted provided that the following conditions are
// met:
//
// 1. Redistributions of source code must retain the above copyright
// notice, this list of conditions and the following disclaimer.
//
// 2. Redistributions in binary form must reproduce the above copyright
// notice, this list of conditions and the following disclaimer in the
// documentation and/or other materials provided with the distribution.
//
// 3. Neither the name of the Corporation nor the names of the
// contributors may be used to endorse or promote products derived from
// this software without specific prior written permission.
//
// THIS SOFTWARE IS PROVIDED BY SANDIA CORPORATION "AS IS" AND ANY
// EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
// IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
// PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL SANDIA CORPORATION OR THE
// CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
// EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
// PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
// PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF
// LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING
// NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
// SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
//
// Questions? Contact H. Carter Edwards (hcedwar@sandia.gov)
//
// ************************************************************************
//@HEADER
*/
#ifndef KOKKOS_EXAMPLE_BOXELEMFIXTURE_HPP
#define KOKKOS_EXAMPLE_BOXELEMFIXTURE_HPP
#include <utility>
#include <Kokkos_View.hpp>
#include <Kokkos_Parallel.hpp>
#include <HexElement.hpp>
#include <BoxElemPart.hpp>
//----------------------------------------------------------------------------
namespace Kokkos {
namespace Example {
template< class Device , BoxElemPart::ElemOrder Order >
class BoxElemFixture {
public:
enum { ElemNode = Order == BoxElemPart::ElemLinear ? 8 :
Order == BoxElemPart::ElemQuadratic ? 27 : 0 };
private:
typedef Kokkos::Example::HexElement_TensorData< ElemNode > hex_data ;
Kokkos::Example::BoxElemPart m_box_part ;
Kokkos::View< unsigned*[3] , Device > m_node_grid ;
Kokkos::View< unsigned*[ElemNode] , Device > m_elem_node ;
Kokkos::View< unsigned*[2] , Device > m_recv_node ;
Kokkos::View< unsigned*[2] , Device > m_send_node ;
Kokkos::View< unsigned* , Device > m_send_node_id ;
unsigned char m_elem_node_local[ ElemNode ][4] ;
public:
typedef Kokkos::View< const unsigned * [ElemNode] , Device , Kokkos::MemoryUnmanaged > elem_node_type ;
typedef Kokkos::View< const unsigned * [3] , Device , Kokkos::MemoryUnmanaged > node_grid_type ;
typedef Kokkos::View< const unsigned * [2] , Device , Kokkos::MemoryUnmanaged > comm_list_type ;
typedef Kokkos::View< const unsigned * , Device , Kokkos::MemoryUnmanaged > send_nodeid_type ;
KOKKOS_INLINE_FUNCTION
unsigned elem_node_local( unsigned inode , unsigned k ) const
{ return m_elem_node_local[inode][k] ; }
KOKKOS_INLINE_FUNCTION
unsigned node_grid( unsigned inode , unsigned iaxis ) const { return m_node_grid(inode,iaxis); }
KOKKOS_INLINE_FUNCTION
unsigned elem_node( unsigned ielem , unsigned inode ) const { return m_elem_node(ielem,inode); }
elem_node_type elem_node() const { return m_elem_node ; }
node_grid_type node_grid() const { return m_node_grid ; }
comm_list_type recv_node() const { return m_recv_node ; }
comm_list_type send_node() const { return m_send_node ; }
send_nodeid_type send_nodeid() const { return m_send_node_id ; }
KOKKOS_INLINE_FUNCTION
BoxElemFixture( const BoxElemFixture & rhs )
: m_box_part( rhs.m_box_part )
, m_node_grid( rhs.m_node_grid )
, m_elem_node( rhs.m_elem_node )
, m_recv_node( rhs.m_recv_node )
, m_send_node( rhs.m_send_node )
, m_send_node_id( rhs.m_send_node_id )
{
for ( unsigned i = 0 ; i < ElemNode ; ++i ) {
m_elem_node_local[i][0] = rhs.m_elem_node_local[i][0] ;
m_elem_node_local[i][1] = rhs.m_elem_node_local[i][1] ;
m_elem_node_local[i][2] = rhs.m_elem_node_local[i][2] ;
m_elem_node_local[i][3] = 0 ;
}
}
BoxElemFixture & operator = ( const BoxElemFixture & rhs )
{
m_box_part = rhs.m_box_part ;
m_node_grid = rhs.m_node_grid ;
m_elem_node = rhs.m_elem_node ;
m_recv_node = rhs.m_recv_node ;
m_send_node = rhs.m_send_node ;
m_send_node_id = rhs.m_send_node_id ;
for ( unsigned i = 0 ; i < ElemNode ; ++i ) {
m_elem_node_local[i][0] = rhs.m_elem_node_local[i][0] ;
m_elem_node_local[i][1] = rhs.m_elem_node_local[i][1] ;
m_elem_node_local[i][2] = rhs.m_elem_node_local[i][2] ;
m_elem_node_local[i][3] = 0 ;
}
return *this ;
}
BoxElemFixture( const BoxElemPart::Decompose decompose ,
const unsigned global_size ,
const unsigned global_rank ,
const unsigned elem_nx ,
const unsigned elem_ny ,
const unsigned elem_nz )
: m_box_part( Order , decompose , global_size , global_rank , elem_nx , elem_ny , elem_nz )
, m_node_grid( "fixture_node_grid" , m_box_part.uses_node_count() )
, m_elem_node( "fixture_elem_node" , m_box_part.uses_elem_count() )
, m_recv_node( "fixture_recv_node" , m_box_part.recv_node_msg_count() )
, m_send_node( "fixture_send_node" , m_box_part.send_node_msg_count() )
, m_send_node_id( "fixture_send_node_id" , m_box_part.send_node_id_count() )
{
{
const hex_data elem_data ;
for ( unsigned i = 0 ; i < ElemNode ; ++i ) {
m_elem_node_local[i][0] = elem_data.eval_map[i][0] ;
m_elem_node_local[i][1] = elem_data.eval_map[i][1] ;
m_elem_node_local[i][2] = elem_data.eval_map[i][2] ;
m_elem_node_local[i][3] = 0 ;
}
}
size_t nwork = m_recv_node.dimension_0()
+ m_send_node.dimension_0()
+ m_send_node_id.dimension_0()
+ m_node_grid.dimension_0()
+ m_elem_node.dimension_0() * m_elem_node.dimension_1() ;
Kokkos::parallel_for( nwork , *this );
}
// Initialization:
typedef Device device_type ;
KOKKOS_INLINE_FUNCTION
void operator()( size_t i ) const
{
if ( i < m_recv_node.dimension_0() ) {
m_recv_node(i,0) = m_box_part.recv_node_rank(i);
m_recv_node(i,1) = m_box_part.recv_node_count(i);
}
else {
i -= m_recv_node.dimension_0();
if ( i < m_send_node.dimension_0() ) {
m_send_node(i,0) = m_box_part.send_node_rank(i);
m_send_node(i,1) = m_box_part.send_node_count(i);
}
else {
i -= m_send_node.dimension_0();
if ( i < m_send_node_id.dimension_0() ) {
m_send_node_id(i) = m_box_part.send_node_id(i);
}
else {
i -= m_send_node_id.dimension_0();
if ( i < m_node_grid.dimension_0() ) {
unsigned node_coord[3] ;
m_box_part.local_node_coord( i , node_coord );
m_node_grid(i,0) = node_coord[0] ;
m_node_grid(i,1) = node_coord[1] ;
m_node_grid(i,2) = node_coord[2] ;
}
else {
i -= m_node_grid.dimension_0();
const size_t ielem = i / ElemNode ;
const size_t inode = i % ElemNode ;
unsigned elem_coord[3] ;
unsigned node_coord[3] ;
m_box_part.uses_elem_coord( ielem , elem_coord );
node_coord[0] = elem_coord[0] + m_elem_node_local[inode][0] ;
node_coord[1] = elem_coord[1] + m_elem_node_local[inode][1] ;
node_coord[2] = elem_coord[2] + m_elem_node_local[inode][2] ;
m_elem_node(ielem,inode) = m_box_part.local_node_id( node_coord );
}
}
}
}
}
};
} // namespace Example
} // namespace Kokkos
//----------------------------------------------------------------------------
#endif /* #ifndef KOKKOS_EXAMPLE_BOXELEMFIXTURE_HPP */
/*
//@HEADER
// ************************************************************************
//
// Kokkos: Manycore Performance-Portable Multidimensional Arrays
// Copyright (2012) Sandia Corporation
//
// Under the terms of Contract DE-AC04-94AL85000 with Sandia Corporation,
// the U.S. Government retains certain rights in this software.
//
// Redistribution and use in source and binary forms, with or without
// modification, are permitted provided that the following conditions are
// met:
//
// 1. Redistributions of source code must retain the above copyright
// notice, this list of conditions and the following disclaimer.
//
// 2. Redistributions in binary form must reproduce the above copyright
// notice, this list of conditions and the following disclaimer in the
// documentation and/or other materials provided with the distribution.
//
// 3. Neither the name of the Corporation nor the names of the
// contributors may be used to endorse or promote products derived from
// this software without specific prior written permission.
//
// THIS SOFTWARE IS PROVIDED BY SANDIA CORPORATION "AS IS" AND ANY
// EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
// IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
// PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL SANDIA CORPORATION OR THE
// CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
// EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
// PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
// PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF
// LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING
// NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
// SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
//
// Questions? Contact H. Carter Edwards (hcedwar@sandia.gov)
//
// ************************************************************************
//@HEADER
*/
#include <utility>
#include <iostream>
#include <sstream>
#include <stdexcept>
#include <limits>
#include <BoxElemPart.hpp>
//----------------------------------------------------------------------------
namespace Kokkos {
namespace Example {
void box_partition( const unsigned global_size ,
const unsigned global_rank ,
const unsigned global_box[][2] ,
unsigned box[][2] )
{
box[0][0] = global_box[0][0] ; box[0][1] = global_box[0][1] ;
box[1][0] = global_box[1][0] ; box[1][1] = global_box[1][1] ;
box[2][0] = global_box[2][0] ; box[2][1] = global_box[2][1] ;
unsigned ip = 0 ;
unsigned np = global_size ;
while ( 1 < np ) {
// P = [ ip + j * portion , ip + ( j + 1 ) * portion )
unsigned jip , jup ;
{
const unsigned part = ( 0 == ( np % 5 ) ) ? 5 : (
( 0 == ( np % 3 ) ) ? 3 : 2 );
const unsigned portion = np / part ;
if ( 2 < part || global_rank < ip + portion ) {
jip = portion * size_t( double( global_rank - ip ) / double(portion) );
jup = jip + portion ;
}
else {
jip = portion ;
jup = np ;
}
}
// Choose axis with largest count:
const unsigned nb[3] = {
box[0][1] - box[0][0] ,
box[1][1] - box[1][0] ,
box[2][1] - box[2][0] };
const unsigned axis = nb[2] > nb[1] ? ( nb[2] > nb[0] ? 2 : 0 )
: ( nb[1] > nb[0] ? 1 : 0 );
box[ axis ][1] = box[ axis ][0] + unsigned( double(nb[axis]) * ( double(jup) / double(np) ));
box[ axis ][0] = box[ axis ][0] + unsigned( double(nb[axis]) * ( double(jip) / double(np) ));
np = jup - jip ;
ip = ip + jip ;
}
}
} /* namespace Example */
} /* namespace Kokkos */
//----------------------------------------------------------------------------