Commit 2cd3a41c authored by Carter Edwards's avatar Carter Edwards
Browse files

Kokkos Example: Simple finite element integration of a vector valued function on a box.

Mapping of nodal coordinates can be skewed.
Linear or quadratic elements.
Lumping of element integral to nodes via atomics or second pass gather-sum functor.
Integral of function has analytic answer for verification.

TODO: Clean and add extensive comments.
TODO: Build with cmake and add to cmake test.

Fix create_mirror_view to allow mirroring views with 'const' data type.
parent ac53a3e5
......@@ -754,8 +754,7 @@ template< class T , class L , class D , class M , class S >
typename View<T,L,D,M,S>::HostMirror
create_mirror_view( const View<T,L,D,M,S> & view ,
typename Impl::enable_if<
Impl::is_same< typename ViewTraits<T,L,D,M>::memory_space ,
HostSpace >::value
Impl::ViewAssignable< typename View<T,L,D,M,S>::HostMirror , View<T,L,D,M,S> >::value
>::type * = 0 )
{
return view ;
......@@ -765,11 +764,10 @@ template< class T , class L , class D , class M , class S >
typename View<T,L,D,M,S>::HostMirror
create_mirror_view( const View<T,L,D,M,S> & view ,
typename Impl::enable_if<
! Impl::is_same< typename ViewTraits<T,L,D,M>::memory_space ,
HostSpace >::value
! Impl::ViewAssignable< typename View<T,L,D,M,S>::HostMirror , View<T,L,D,M,S> >::value
>::type * = 0 )
{
typedef typename View<T,L,D,M>::HostMirror host_view ;
typedef typename View<T,L,D,M,S>::HostMirror host_view ;
host_view tmp ;
Impl::ViewAssignment< S >( tmp , view );
return tmp ;
......
INCLUDE(TribitsAddExecutableAndTest)
INCLUDE_DIRECTORIES(${CMAKE_CURRENT_BINARY_DIR})
INCLUDE_DIRECTORIES(${CMAKE_CURRENT_SOURCE_DIR})
SET(SOURCES "")
FILE(GLOB SOURCES *.cpp)
SET(LIBRARIES kokkosexample_fixture kokkoscore_devicethreads kokkoscore_impl)
IF( TPL_ENABLE_CUDA AND TPL_ENABLE_CUSPARSE )
SET(CUDA_ATTACH_VS_BUILD_RULE_TO_CUDA_FILE OFF)
# add MPI include dir to nvcc
IF( TPL_ENABLE_MPI )
CUDA_INCLUDE_DIRECTORIES( ${MPI_BASE_DIR}/include )
ENDIF()
TRIBITS_ADD_LIBRARY(
test_fixture_cuda
SOURCES TestFixture.cu
CUDALIBRARY
)
LIST( APPEND LIBRARIES
kokkoscore_devicecuda test_fixture_cuda
)
ENDIF()
TRIBITS_ADD_EXECUTABLE(
feint
SOURCES ${SOURCES}
COMM serial mpi
DEPLIBS ${LIBRARIES}
)
/*
//@HEADER
// ************************************************************************
//
// Kokkos: Manycore Performance-Portable Multidimensional Arrays
// Copyright (2012) Sandia Corporation
//
// Under the terms of Contract DE-AC04-94AL85000 with Sandia Corporation,
// the U.S. Government retains certain rights in this software.
//
// Redistribution and use in source and binary forms, with or without
// modification, are permitted provided that the following conditions are
// met:
//
// 1. Redistributions of source code must retain the above copyright
// notice, this list of conditions and the following disclaimer.
//
// 2. Redistributions in binary form must reproduce the above copyright
// notice, this list of conditions and the following disclaimer in the
// documentation and/or other materials provided with the distribution.
//
// 3. Neither the name of the Corporation nor the names of the
// contributors may be used to endorse or promote products derived from
// this software without specific prior written permission.
//
// THIS SOFTWARE IS PROVIDED BY SANDIA CORPORATION "AS IS" AND ANY
// EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
// IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
// PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL SANDIA CORPORATION OR THE
// CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
// EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
// PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
// PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF
// LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING
// NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
// SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
//
// Questions? Contact H. Carter Edwards (hcedwar@sandia.gov)
//
// ************************************************************************
//@HEADER
*/
#ifndef KOKKOS_EXAMPLE_FEINT_FUNCTORS_HPP
#define KOKKOS_EXAMPLE_FEINT_FUNCTORS_HPP
#include <stdio.h>
#include <Kokkos_Serial.hpp>
#include <Kokkos_Atomic.hpp>
#include <BoxElemFixture.hpp>
namespace Kokkos {
namespace Example {
template< class FixtureType ,
class FunctionType ,
bool PerformScatterAddWithAtomic >
struct FiniteElementIntegration ;
template< class Device , BoxElemPart::ElemOrder ElemOrder , class GridMap ,
class FunctionType ,
bool PerformScatterAddWithAtomic >
struct FiniteElementIntegration<
Kokkos::Example::BoxElemFixture< Device , ElemOrder , GridMap > ,
FunctionType ,
PerformScatterAddWithAtomic >
{
typedef Kokkos::Example::BoxElemFixture< Device , ElemOrder > BoxFixtureType ;
typedef Kokkos::Example::HexElement_Data< BoxFixtureType::ElemNode > HexElemDataType ;
enum { ElemNodeCount = HexElemDataType::element_node_count };
enum { IntegrationCount = HexElemDataType::integration_count };
enum { ValueCount = FunctionType::value_count };
typedef View<int*, Device> ElemErrorType ;
typedef View<double*[ElemNodeCount][ValueCount],Device> ElemValueType ;
typedef View<double*[ValueCount], Device> NodeValueType ;
const HexElemDataType m_hex_elem_data ;
const BoxFixtureType m_box_fixture ;
const FunctionType m_function ;
const ElemErrorType m_elem_error ;
const ElemValueType m_elem_integral ;
const NodeValueType m_node_lumped ;
//----------------------------------------
FiniteElementIntegration(
const BoxFixtureType & box_fixture ,
const FunctionType & function )
: m_hex_elem_data()
, m_box_fixture( box_fixture )
, m_function( function )
, m_elem_error( "elem_error" , box_fixture.elem_count() )
, m_elem_integral( "elem_integral" , box_fixture.elem_count() )
, m_node_lumped( "node_lumped" , box_fixture.node_count() )
{}
//----------------------------------------
// Device for parallel dispatch.
typedef Device device_type ;
// Value type for parallel reduction.
struct value_type { double value[ ValueCount ]; int error ; };
//----------------------------------------
// Transform element interpolation function gradients and
// compute determinant of spatial jacobian.
KOKKOS_INLINE_FUNCTION
float transform_gradients(
const float grad[][ ElemNodeCount ] , // Gradient of bases master element
const double coord[][ ElemNodeCount ] ,
float dpsi[][ ElemNodeCount ] ) const
{
enum { TensorDim = 9 };
enum { j11 = 0 , j12 = 1 , j13 = 2 ,
j21 = 3 , j22 = 4 , j23 = 5 ,
j31 = 6 , j32 = 7 , j33 = 8 };
// Temporary for jacobian accumulation in double.
double J[ TensorDim ] = { 0, 0, 0, 0, 0, 0, 0, 0, 0 };
for( int i = 0; i < ElemNodeCount ; ++i ) {
J[j11] += grad[0][i] * coord[0][i] ;
J[j12] += grad[0][i] * coord[1][i] ;
J[j13] += grad[0][i] * coord[2][i] ;
J[j21] += grad[1][i] * coord[0][i] ;
J[j22] += grad[1][i] * coord[1][i] ;
J[j23] += grad[1][i] * coord[2][i] ;
J[j31] += grad[2][i] * coord[0][i] ;
J[j32] += grad[2][i] * coord[1][i] ;
J[j33] += grad[2][i] * coord[2][i] ;
}
// Inverse jacobian, compute double and store float.
float invJ[ TensorDim ] = {
float( J[j22] * J[j33] - J[j23] * J[j32] ) ,
float( J[j13] * J[j32] - J[j12] * J[j33] ) ,
float( J[j12] * J[j23] - J[j13] * J[j22] ) ,
float( J[j23] * J[j31] - J[j21] * J[j33] ) ,
float( J[j11] * J[j33] - J[j13] * J[j31] ) ,
float( J[j13] * J[j21] - J[j11] * J[j23] ) ,
float( J[j21] * J[j32] - J[j22] * J[j31] ) ,
float( J[j12] * J[j31] - J[j11] * J[j32] ) ,
float( J[j11] * J[j22] - J[j12] * J[j21] ) };
const float detJ = J[j11] * invJ[j11] +
J[j21] * invJ[j12] +
J[j31] * invJ[j13] ;
{
const float detJinv = 1.0 / detJ ;
for ( int i = 0 ; i < TensorDim ; ++i ) { invJ[i] *= detJinv ; }
}
// Transform gradients:
for ( int i = 0; i < ElemNodeCount ; ++i ) {
dpsi[0][i] = grad[0][i] * invJ[j11] + grad[1][i] * invJ[j12] + grad[2][i] * invJ[j13];
dpsi[1][i] = grad[0][i] * invJ[j21] + grad[1][i] * invJ[j22] + grad[2][i] * invJ[j23];
dpsi[2][i] = grad[0][i] * invJ[j31] + grad[1][i] * invJ[j32] + grad[2][i] * invJ[j33];
}
return detJ ;
}
KOKKOS_INLINE_FUNCTION
void operator()( const int ielem , value_type & update ) const
{
// Local temporaries for gathering nodal data.
double node_coord[3][ ElemNodeCount ];
int inode[ ElemNodeCount ] ;
// Gather indices of element's node from global variable to local temporary.
for ( int i = 0 ; i < ElemNodeCount ; ++i ) {
inode[i] = m_box_fixture.elem_node( ielem , i );
}
// Gather coordinates of element's nodes from global variable to local temporary.
for ( int i = 0 ; i < ElemNodeCount ; ++i ) {
node_coord[0][i] = m_box_fixture.node_coord( inode[i] , 0 );
node_coord[1][i] = m_box_fixture.node_coord( inode[i] , 1 );
node_coord[2][i] = m_box_fixture.node_coord( inode[i] , 2 );
}
int elem_error = 0 ;
m_elem_error(ielem) = 0 ;
// Local temporary to accumulate numerical cubature of vector valued function.
double accum[ ValueCount ];
for ( int j = 0 ; j < ValueCount ; ++j ) { accum[j] = 0 ; }
// Cubature loop for this element:
for ( int k = 0 ; k < IntegrationCount ; ++k ) {
// Integration point in space as interpolated from nodal coordinates:
double point[3] = { 0 , 0 , 0 };
for ( int i = 0 ; i < ElemNodeCount ; ++i ) {
point[0] += node_coord[0][i] * m_hex_elem_data.values[k][i] ;
point[1] += node_coord[1][i] * m_hex_elem_data.values[k][i] ;
point[2] += node_coord[2][i] * m_hex_elem_data.values[k][i] ;
}
// Example function vector value at cubature point:
double val_at_pt[ ValueCount ];
m_function( point , val_at_pt );
// Temporary array for transformed element basis functions' gradient.
// Not used in this example, but computed anyway by the more general
// deformation function.
float dpsi[3][ ElemNodeCount ];
// Compute deformation jacobian, transform basis function gradient,
// and return determinant of deformation jacobian.
const float detJ = transform_gradients( m_hex_elem_data.gradients[k] , node_coord , dpsi );
if ( detJ <= 0 ) { elem_error = 1 ; }
// Integration weight.
const float w = m_hex_elem_data.weights[k] * detJ ;
// Cubature of function.
for ( int j = 0 ; j < ValueCount ; ++j ) { accum[j] += val_at_pt[j] * w ; }
}
if ( elem_error ) {
m_elem_error(ielem) = 1 ;
update.error = 1 ;
}
else {
// Element contribution to global integral:
for ( int j = 0 ; j < ValueCount ; ++j ) {
update.value[j] += accum[j] ;
}
// Element-node quantity for lumping to nodes:
for ( int i = 0 ; i < ElemNodeCount ; ++i ) {
for ( int j = 0 ; j < ValueCount ; ++j ) {
// Save element's integral apportionment to nodes to global memory
m_elem_integral( ielem , i , j ) = accum[j] / ElemNodeCount ;
}
}
if ( PerformScatterAddWithAtomic ) {
// Option to immediately scatter-add the integrated quantities to nodes.
// This is a race condition as two or more threads could attempt
// concurrent update of nodal values. The atomic_fetch_add (+=)
// function guarantees that the summation will occur correctly;
// however, there can be no guarantee for the order of summation.
// Due to non-associativity of floating point arithmetic the result
// is non-deterministic within bounds of floating point round-off.
for ( int i = 0 ; i < ElemNodeCount ; ++i ) {
for ( int j = 0 ; j < ValueCount ; ++j ) {
Kokkos::atomic_fetch_add( & m_node_lumped( inode[i] , j ) , m_elem_integral( ielem , i , j ) );
}
}
}
}
}
//--------------------------------------------------------------------------
KOKKOS_INLINE_FUNCTION
void init( value_type & update ) const
{
for ( int j = 0 ; j < ValueCount ; ++j ) update.value[j] = 0 ;
update.error = 0 ;
}
KOKKOS_INLINE_FUNCTION
void join( volatile value_type & update ,
volatile const value_type & input ) const
{
for ( int j = 0 ; j < ValueCount ; ++j ) update.value[j] += input.value[j] ;
if ( input.error ) update.error = 1 ;
}
};
} /* namespace Example */
} /* namespace Kokkos */
//----------------------------------------------------------------------------
//----------------------------------------------------------------------------
namespace Kokkos {
namespace Example {
template< class ViewElemNode ,
class ViewNodeScan ,
class ViewNodeElem >
void map_node_to_elem( const ViewElemNode & elem_node ,
const ViewNodeScan & node_scan ,
const ViewNodeElem & node_elem )
{
const typename ViewElemNode::HostMirror host_elem_node = Kokkos::create_mirror_view( elem_node );
const typename ViewNodeScan::HostMirror host_node_scan = Kokkos::create_mirror_view( node_scan );
const typename ViewNodeElem::HostMirror host_node_elem = Kokkos::create_mirror_view( node_elem );
const int elem_count = host_elem_node.dimension_0();
const int elem_node_count = host_elem_node.dimension_1();
const int node_count = host_node_scan.dimension_0() - 1 ;
const View<int*,Kokkos::Serial> node_elem_count( "node_elem_count" , node_count );
Kokkos::deep_copy( host_elem_node , elem_node );
for ( int i = 0 ; i < elem_count ; ++i ) {
for ( int j = 0 ; j < elem_node_count ; ++j ) {
++node_elem_count( host_elem_node(i,j) );
}
}
for ( int i = 0 ; i < node_count ; ++i ) {
host_node_scan(i+1) += host_node_scan(i) + node_elem_count(i);
node_elem_count(i) = 0 ;
}
for ( int i = 0 ; i < elem_count ; ++i ) {
for ( int j = 0 ; j < elem_node_count ; ++j ) {
const int inode = host_elem_node(i,j);
const int offset = host_node_scan(inode) + node_elem_count(inode);
host_node_elem( offset , 0 ) = i ;
host_node_elem( offset , 1 ) = j ;
++node_elem_count(inode);
}
}
Kokkos::deep_copy( node_scan , host_node_scan );
Kokkos::deep_copy( node_elem , host_node_elem );
}
template< class ViewNodeValue ,
class ViewElemValue ,
bool AlreadyUsedAtomic >
struct LumpElemToNode {
typedef typename ViewElemValue::device_type device_type ;
typedef typename ViewElemValue::shape_type elem_value_shape_type ;
enum { value_count = elem_value_shape_type::N2 };
ViewNodeValue m_node_value ;
ViewElemValue m_elem_value ;
View<int*, device_type> m_node_scan ;
View<int*[2],device_type> m_node_elem ;
template< class ViewElemNode >
LumpElemToNode( const ViewNodeValue & node_value ,
const ViewElemValue & elem_value ,
const ViewElemNode & elem_node )
: m_node_value( node_value )
, m_elem_value( elem_value )
, m_node_scan( "node_scan" , AlreadyUsedAtomic ? 0 : node_value.dimension_0() + 1 )
, m_node_elem( "node_elem" , AlreadyUsedAtomic ? 0 : elem_node.dimension_0() * elem_node.dimension_1() )
{
if ( ! AlreadyUsedAtomic ) {
map_node_to_elem( elem_node , m_node_scan , m_node_elem );
}
}
//----------------------------------------
struct value_type { double value[ value_count ]; };
KOKKOS_INLINE_FUNCTION
void operator()( const int inode , value_type & update ) const
{
if ( ! AlreadyUsedAtomic ) {
int i = m_node_scan(inode);
const int end = m_node_scan(inode+1);
value_type local ;
for ( int j = 0 ; j < value_count ; ++j ) { local.value[j] = 0 ; }
for ( ; i < end ; ++i ) {
const int ielem = m_node_elem(i,0);
const int ielem_node = m_node_elem(i,1);
for ( int j = 0 ; j < value_count ; ++j ) {
local.value[j] += m_elem_value( ielem , ielem_node , j );
}
}
for ( int j = 0 ; j < value_count ; ++j ) {
m_node_value( inode , j ) += local.value[j] ;
update.value[j] += local.value[j] ;
}
}
else {
for ( int j = 0 ; j < value_count ; ++j ) {
update.value[j] += m_node_value( inode , j );
}
}
}
KOKKOS_INLINE_FUNCTION
void init( value_type & update ) const
{ for ( int j = 0 ; j < value_count ; ++j ) { update.value[j] = 0 ; } }
KOKKOS_INLINE_FUNCTION
void join( volatile value_type & update ,
volatile const value_type & input ) const
{ for ( int j = 0 ; j < value_count ; ++j ) { update.value[j] += input.value[j] ; } }
//----------------------------------------
};
} /* namespace Example */
} /* namespace Kokkos */
#endif /* #ifndef KOKKOS_EXAMPLE_FEINT_FUNCTORS_HPP */
#!/bin/bash
#-----------------------------------------------------------------------------
# Simple build script with options
#-----------------------------------------------------------------------------
# Directory for Kokkos
KOKKOS="../../core"
source ${KOKKOS}/src/build_common.sh
# Process command line options and set compilation variables
#
# INC_PATH : required include paths
# CXX : C++ compiler with compiler-specific options
# CXX_SOURCES : C++ files for host
# NVCC : Cuda compiler with compiler-specific options
# NVCC_SOURCES : Cuda sources
#-----------------------------------------------------------------------------
INC_PATH="${INC_PATH} -I../fixture"
CXX_SOURCES="${CXX_SOURCES} ../fixture/BoxElemPart.cpp"
CXX_SOURCES="${CXX_SOURCES} *.cpp"
#-----------------------------------------------------------------------------
# If Cuda compiler set build Cuda source code
if [ -n "${NVCC}" ] ;
then
NVCC_SOURCES="${NVCC_SOURCES} *.cu"
CXX_SOURCES="${CXX_SOURCES}"
echo ${NVCC} ${INC_PATH} ${NVCC_SOURCES}
${NVCC} ${INC_PATH} ${NVCC_SOURCES}
fi
#-----------------------------------------------------------------------------
# Build C++ source code:
EXEC="feint.exe"
echo ${CXX} ${INC_PATH} -o ${EXEC} ${CXX_SOURCES} ${LIB}
rm -f ${EXEC}
${CXX} ${INC_PATH} -o ${EXEC} ${CXX_SOURCES} ${LIB}
rm -f *.o *.a
#-----------------------------------------------------------------------------
/*
//@HEADER
// ************************************************************************
//
// Kokkos: Manycore Performance-Portable Multidimensional Arrays
// Copyright (2012) Sandia Corporation
//
// Under the terms of Contract DE-AC04-94AL85000 with Sandia Corporation,
// the U.S. Government retains certain rights in this software.
//
// Redistribution and use in source and binary forms, with or without
// modification, are permitted provided that the following conditions are
// met:
//
// 1. Redistributions of source code must retain the above copyright
// notice, this list of conditions and the following disclaimer.
//
// 2. Redistributions in binary form must reproduce the above copyright
// notice, this list of conditions and the following disclaimer in the
// documentation and/or other materials provided with the distribution.
//
// 3. Neither the name of the Corporation nor the names of the
// contributors may be used to endorse or promote products derived from
// this software without specific prior written permission.
//
// THIS SOFTWARE IS PROVIDED BY SANDIA CORPORATION "AS IS" AND ANY
// EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
// IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
// PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL SANDIA CORPORATION OR THE
// CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
// EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
// PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
// PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF
// LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING
// NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
// SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
//
// Questions? Contact H. Carter Edwards (hcedwar@sandia.gov)
//
// ************************************************************************
//@HEADER
*/
#ifndef KOKKOS_EXAMPLE_FEINT_HPP
#define KOKKOS_EXAMPLE_FEINT_HPP
#include <iostream>
#include <BoxElemFixture.hpp>