Skip to content
GitLab
Menu
Projects
Groups
Snippets
Help
Help
Support
Community forum
Keyboard shortcuts
?
Submit feedback
Contribute to GitLab
Sign in
Toggle navigation
Menu
Open sidebar
James Willenbring
Trilinos
Commits
5e0a04bc
Commit
5e0a04bc
authored
Sep 05, 2013
by
Carter Edwards
Browse files
Kokkos Examples: Add commentary in preparation for tutorial walk-through.
parent
eb71601c
Changes
4
Hide whitespace changes
Inline
Side-by-side
packages/kokkos/example/feint/ElemFunctor.hpp
View file @
5e0a04bc
...
...
@@ -52,11 +52,15 @@
namespace
Kokkos
{
namespace
Example
{
/** \brief Numerically integrate a function on a finite element mesh and
* project the integrated values to nodes.
*/
template
<
class
FixtureType
,
class
FunctionType
,
bool
PerformScatterAddWithAtomic
>
struct
FiniteElementIntegration
;
// Specialized for an 'Example::BoxElemFixture' finite element mesh
template
<
class
Device
,
BoxElemPart
::
ElemOrder
ElemOrder
,
class
GridMap
,
class
FunctionType
,
bool
PerformScatterAddWithAtomic
>
...
...
@@ -65,23 +69,29 @@ struct FiniteElementIntegration<
FunctionType
,
PerformScatterAddWithAtomic
>
{
typedef
Kokkos
::
Example
::
BoxElemFixture
<
Device
,
ElemOrder
>
BoxFixtureType
;
typedef
Kokkos
::
Example
::
HexElement_Data
<
BoxFixtureType
::
ElemNode
>
HexElemDataType
;
// Element mesh types:
typedef
Kokkos
::
Example
::
BoxElemFixture
<
Device
,
ElemOrder
>
BoxFixtureType
;
typedef
Kokkos
::
Example
::
HexElement_Data
<
BoxFixtureType
::
ElemNode
>
HexElemDataType
;
enum
{
ElemNodeCount
=
HexElemDataType
::
element_node_count
};
enum
{
IntegrationCount
=
HexElemDataType
::
integration_count
};
enum
{
ValueCount
=
FunctionType
::
value_count
};
// Dictionary of view types:
typedef
View
<
int
*
,
Device
>
ElemErrorType
;
typedef
View
<
double
*
[
ElemNodeCount
][
ValueCount
],
Device
>
ElemValueType
;
typedef
View
<
double
*
[
ValueCount
],
Device
>
NodeValueType
;
const
HexElemDataType
m_hex_elem_data
;
const
BoxFixtureType
m_box_fixture
;
const
FunctionType
m_function
;
const
ElemErrorType
m_elem_error
;
const
ElemValueType
m_elem_integral
;
const
NodeValueType
m_node_lumped
;
// Data members for this Functor:
const
HexElemDataType
m_hex_elem_data
;
///< Master element
const
BoxFixtureType
m_box_fixture
;
///< Unstructured mesh data
const
FunctionType
m_function
;
///< Function to integrate
const
ElemErrorType
m_elem_error
;
///< Flags for element errors
const
ElemValueType
m_elem_integral
;
///< Per-element quantities
const
NodeValueType
m_node_lumped
;
///< Quantities lumped to nodes
//----------------------------------------
...
...
@@ -89,7 +99,7 @@ struct FiniteElementIntegration<
const
BoxFixtureType
&
box_fixture
,
const
FunctionType
&
function
)
:
m_hex_elem_data
()
,
m_box_fixture
(
box_fixture
)
,
m_box_fixture
(
box_fixture
)
// Shallow copy of the mesh fixture
,
m_function
(
function
)
,
m_elem_error
(
"elem_error"
,
box_fixture
.
elem_count
()
)
,
m_elem_integral
(
"elem_integral"
,
box_fixture
.
elem_count
()
)
...
...
@@ -100,8 +110,11 @@ struct FiniteElementIntegration<
// Device for parallel dispatch.
typedef
Device
device_type
;
// Value type for parallel reduction.
struct
value_type
{
double
value
[
ValueCount
];
int
error
;
};
// Value type for global parallel reduction.
struct
value_type
{
double
value
[
ValueCount
];
///< Integrated quantitie
int
error
;
///< Element inversion flag
};
//----------------------------------------
// Transform element interpolation function gradients and
...
...
@@ -117,7 +130,7 @@ struct FiniteElementIntegration<
j21
=
3
,
j22
=
4
,
j23
=
5
,
j31
=
6
,
j32
=
7
,
j33
=
8
};
// Temporary for jacobian accumulation i
n
double.
// Temporary for jacobian accumulation i
s
double
for summation accuracy
.
double
J
[
TensorDim
]
=
{
0
,
0
,
0
,
0
,
0
,
0
,
0
,
0
,
0
};
for
(
int
i
=
0
;
i
<
ElemNodeCount
;
++
i
)
{
...
...
@@ -134,7 +147,7 @@ struct FiniteElementIntegration<
J
[
j33
]
+=
grad
[
2
][
i
]
*
coord
[
2
][
i
]
;
}
// Inverse jacobian, compute double and store float.
// Inverse jacobian, compute
as
double and store
as
float.
float
invJ
[
TensorDim
]
=
{
float
(
J
[
j22
]
*
J
[
j33
]
-
J
[
j23
]
*
J
[
j32
]
)
,
float
(
J
[
j13
]
*
J
[
j32
]
-
J
[
j12
]
*
J
[
j33
]
)
,
...
...
@@ -159,15 +172,23 @@ struct FiniteElementIntegration<
// Transform gradients:
for
(
int
i
=
0
;
i
<
ElemNodeCount
;
++
i
)
{
dpsi
[
0
][
i
]
=
grad
[
0
][
i
]
*
invJ
[
j11
]
+
grad
[
1
][
i
]
*
invJ
[
j12
]
+
grad
[
2
][
i
]
*
invJ
[
j13
];
dpsi
[
1
][
i
]
=
grad
[
0
][
i
]
*
invJ
[
j21
]
+
grad
[
1
][
i
]
*
invJ
[
j22
]
+
grad
[
2
][
i
]
*
invJ
[
j23
];
dpsi
[
2
][
i
]
=
grad
[
0
][
i
]
*
invJ
[
j31
]
+
grad
[
1
][
i
]
*
invJ
[
j32
]
+
grad
[
2
][
i
]
*
invJ
[
j33
];
dpsi
[
0
][
i
]
=
grad
[
0
][
i
]
*
invJ
[
j11
]
+
grad
[
1
][
i
]
*
invJ
[
j12
]
+
grad
[
2
][
i
]
*
invJ
[
j13
];
dpsi
[
1
][
i
]
=
grad
[
0
][
i
]
*
invJ
[
j21
]
+
grad
[
1
][
i
]
*
invJ
[
j22
]
+
grad
[
2
][
i
]
*
invJ
[
j23
];
dpsi
[
2
][
i
]
=
grad
[
0
][
i
]
*
invJ
[
j31
]
+
grad
[
1
][
i
]
*
invJ
[
j32
]
+
grad
[
2
][
i
]
*
invJ
[
j33
];
}
return
detJ
;
}
// Functor's function called for each element in the mesh
// to numerically integrate the function and add element quantities
// to the global integral.
KOKKOS_INLINE_FUNCTION
void
operator
()(
const
int
ielem
,
value_type
&
update
)
const
{
...
...
@@ -176,28 +197,27 @@ struct FiniteElementIntegration<
int
inode
[
ElemNodeCount
]
;
// Gather indices of element's node from global
variable
to local
t
em
pora
ry.
// Gather indices of element's node from global
memory
to local
m
em
o
ry.
for
(
int
i
=
0
;
i
<
ElemNodeCount
;
++
i
)
{
inode
[
i
]
=
m_box_fixture
.
elem_node
(
ielem
,
i
);
}
// Gather coordinates of element's nodes from global
variable
to local
t
em
pora
ry.
// Gather coordinates of element's nodes from global
memory
to local
m
em
o
ry.
for
(
int
i
=
0
;
i
<
ElemNodeCount
;
++
i
)
{
node_coord
[
0
][
i
]
=
m_box_fixture
.
node_coord
(
inode
[
i
]
,
0
);
node_coord
[
1
][
i
]
=
m_box_fixture
.
node_coord
(
inode
[
i
]
,
1
);
node_coord
[
2
][
i
]
=
m_box_fixture
.
node_coord
(
inode
[
i
]
,
2
);
}
int
elem_error
=
0
;
m_elem_error
(
ielem
)
=
0
;
// Local temporary to accumulate numerical cubature of vector valued function.
// Local temporary to accumulate numerical integration
// of vector valued function.
double
accum
[
ValueCount
];
for
(
int
j
=
0
;
j
<
ValueCount
;
++
j
)
{
accum
[
j
]
=
0
;
}
// Cubature loop for this element:
int
error
=
0
;
// Numerical integration loop for this element:
for
(
int
k
=
0
;
k
<
IntegrationCount
;
++
k
)
{
// Integration point in space as interpolated from nodal coordinates:
...
...
@@ -219,55 +239,58 @@ struct FiniteElementIntegration<
// Compute deformation jacobian, transform basis function gradient,
// and return determinant of deformation jacobian.
const
float
detJ
=
transform_gradients
(
m_hex_elem_data
.
gradients
[
k
]
,
node_coord
,
dpsi
);
float
detJ
=
transform_gradients
(
m_hex_elem_data
.
gradients
[
k
]
,
node_coord
,
dpsi
);
if
(
detJ
<=
0
)
{
elem_error
=
1
;
}
// Check for inverted spatial jacobian
if
(
detJ
<=
0
)
{
error
=
1
;
detJ
=
0
;
}
// Integration weight.
const
float
w
=
m_hex_elem_data
.
weights
[
k
]
*
detJ
;
// Cubature of function.
for
(
int
j
=
0
;
j
<
ValueCount
;
++
j
)
{
accum
[
j
]
+=
val_at_pt
[
j
]
*
w
;
}
for
(
int
j
=
0
;
j
<
ValueCount
;
++
j
)
{
accum
[
j
]
+=
val_at_pt
[
j
]
*
w
;
}
}
if
(
elem_error
)
{
m_elem_error
(
ielem
)
=
1
;
update
.
error
=
1
;
}
else
{
// Element contribution to global integral:
m_elem_error
(
ielem
)
=
error
;
// Element contribution to global integral:
if
(
error
)
{
update
.
error
=
1
;
}
for
(
int
j
=
0
;
j
<
ValueCount
;
++
j
)
{
update
.
value
[
j
]
+=
accum
[
j
]
;
}
// Element-node quantity for lumping to nodes:
for
(
int
i
=
0
;
i
<
ElemNodeCount
;
++
i
)
{
for
(
int
j
=
0
;
j
<
ValueCount
;
++
j
)
{
update
.
value
[
j
]
+=
accum
[
j
]
;
// Save element's integral apportionment to nodes to global memory
m_elem_integral
(
ielem
,
i
,
j
)
=
accum
[
j
]
/
ElemNodeCount
;
}
}
if
(
PerformScatterAddWithAtomic
)
{
// Option to immediately scatter-add the integrated quantities to nodes.
// This is a race condition as two or more threads could attempt
// concurrent update of nodal values. The atomic_fetch_add (+=)
// function guarantees that the summation will occur correctly;
// however, there can be no guarantee for the order of summation.
// Due to non-associativity of floating point arithmetic the result
// is non-deterministic within bounds of floating point round-off.
// Element-node quantity for lumping to nodes:
for
(
int
i
=
0
;
i
<
ElemNodeCount
;
++
i
)
{
for
(
int
j
=
0
;
j
<
ValueCount
;
++
j
)
{
// Save element's integral apportionment to nodes to global memory
m_elem_integral
(
ielem
,
i
,
j
)
=
accum
[
j
]
/
ElemNodeCount
;
}
}
if
(
PerformScatterAddWithAtomic
)
{
// Option to immediately scatter-add the integrated quantities to nodes.
// This is a race condition as two or more threads could attempt
// concurrent update of nodal values. The atomic_fetch_add (+=)
// function guarantees that the summation will occur correctly;
// however, there can be no guarantee for the order of summation.
// Due to non-associativity of floating point arithmetic the result
// is non-deterministic within bounds of floating point round-off.
for
(
int
i
=
0
;
i
<
ElemNodeCount
;
++
i
)
{
for
(
int
j
=
0
;
j
<
ValueCount
;
++
j
)
{
Kokkos
::
atomic_fetch_add
(
&
m_node_lumped
(
inode
[
i
]
,
j
)
,
m_elem_integral
(
ielem
,
i
,
j
)
);
}
Kokkos
::
atomic_fetch_add
(
&
m_node_lumped
(
inode
[
i
]
,
j
)
,
m_elem_integral
(
ielem
,
i
,
j
)
);
}
}
}
}
//--------------------------------------------------------------------------
// Initialization of the global reduction value.
KOKKOS_INLINE_FUNCTION
void
init
(
value_type
&
update
)
const
{
...
...
@@ -275,6 +298,7 @@ struct FiniteElementIntegration<
update
.
error
=
0
;
}
// Join two contributions to global reduction value.
KOKKOS_INLINE_FUNCTION
void
join
(
volatile
value_type
&
update
,
volatile
const
value_type
&
input
)
const
...
...
@@ -298,70 +322,42 @@ template< class ViewElemNode ,
class
ViewNodeElem
>
void
map_node_to_elem
(
const
ViewElemNode
&
elem_node
,
const
ViewNodeScan
&
node_scan
,
const
ViewNodeElem
&
node_elem
)
{
const
typename
ViewElemNode
::
HostMirror
host_elem_node
=
Kokkos
::
create_mirror_view
(
elem_node
);
const
typename
ViewNodeScan
::
HostMirror
host_node_scan
=
Kokkos
::
create_mirror_view
(
node_scan
);
const
typename
ViewNodeElem
::
HostMirror
host_node_elem
=
Kokkos
::
create_mirror_view
(
node_elem
);
const
int
elem_count
=
host_elem_node
.
dimension_0
();
const
int
elem_node_count
=
host_elem_node
.
dimension_1
();
const
int
node_count
=
host_node_scan
.
dimension_0
()
-
1
;
const
View
<
int
*
,
Kokkos
::
Serial
>
node_elem_count
(
"node_elem_count"
,
node_count
);
Kokkos
::
deep_copy
(
host_elem_node
,
elem_node
);
for
(
int
i
=
0
;
i
<
elem_count
;
++
i
)
{
for
(
int
j
=
0
;
j
<
elem_node_count
;
++
j
)
{
++
node_elem_count
(
host_elem_node
(
i
,
j
)
);
}
}
for
(
int
i
=
0
;
i
<
node_count
;
++
i
)
{
host_node_scan
(
i
+
1
)
+=
host_node_scan
(
i
)
+
node_elem_count
(
i
);
node_elem_count
(
i
)
=
0
;
}
for
(
int
i
=
0
;
i
<
elem_count
;
++
i
)
{
for
(
int
j
=
0
;
j
<
elem_node_count
;
++
j
)
{
const
int
inode
=
host_elem_node
(
i
,
j
);
const
int
offset
=
host_node_scan
(
inode
)
+
node_elem_count
(
inode
);
host_node_elem
(
offset
,
0
)
=
i
;
host_node_elem
(
offset
,
1
)
=
j
;
++
node_elem_count
(
inode
);
}
}
Kokkos
::
deep_copy
(
node_scan
,
host_node_scan
);
Kokkos
::
deep_copy
(
node_elem
,
host_node_elem
);
}
const
ViewNodeElem
&
node_elem
);
/** \brief Functor to gather-sum elements' per-node quantities
* to element nodes. Gather-sum is thread safe and
* does not require atomic updates.
*/
template
<
class
ViewNodeValue
,
class
ViewElemValue
,
bool
AlreadyUsedAtomic
>
struct
LumpElemToNode
{
typedef
typename
ViewElemValue
::
device_type
device_type
;
typedef
typename
ViewElemValue
::
shape_type
elem_value_shape_type
;
enum
{
value_count
=
elem_value_shape_type
::
N2
};
// In this example we know that the ViewElemValue
// array specification is < double*[nNode][nValue] >
ViewNodeValue
m_node_value
;
ViewElemValue
m_elem_value
;
View
<
int
*
,
device_type
>
m_node_scan
;
View
<
int
*
[
2
],
device_type
>
m_node_elem
;
enum
{
value_count
=
ViewElemValue
::
shape_type
::
N2
};
ViewNodeValue
m_node_value
;
///< Integrated values at nodes
ViewElemValue
m_elem_value
;
///< Values apportioned to nodes
View
<
int
*
,
device_type
>
m_node_scan
;
///< Offsets for nodes->element
View
<
int
*
[
2
],
device_type
>
m_node_elem
;
///< Node->element connectivity
// Only allocate node->element connectivity if have
// not already used atomic updates for the nodes.
template
<
class
ViewElemNode
>
LumpElemToNode
(
const
ViewNodeValue
&
node_value
,
const
ViewElemValue
&
elem_value
,
const
ViewElemNode
&
elem_node
)
:
m_node_value
(
node_value
)
,
m_elem_value
(
elem_value
)
,
m_node_scan
(
"node_scan"
,
AlreadyUsedAtomic
?
0
:
node_value
.
dimension_0
()
+
1
)
,
m_node_elem
(
"node_elem"
,
AlreadyUsedAtomic
?
0
:
elem_node
.
dimension_0
()
*
elem_node
.
dimension_1
()
)
,
m_node_scan
(
"node_scan"
,
AlreadyUsedAtomic
?
0
:
node_value
.
dimension_0
()
+
1
)
,
m_node_elem
(
"node_elem"
,
AlreadyUsedAtomic
?
0
:
elem_node
.
dimension_0
()
*
elem_node
.
dimension_1
()
)
{
if
(
!
AlreadyUsedAtomic
)
{
map_node_to_elem
(
elem_node
,
m_node_scan
,
m_node_elem
);
...
...
@@ -376,26 +372,36 @@ struct LumpElemToNode {
void
operator
()(
const
int
inode
,
value_type
&
update
)
const
{
if
(
!
AlreadyUsedAtomic
)
{
int
i
=
m_node_scan
(
inode
);
const
int
end
=
m_node_scan
(
inode
+
1
);
// Sum element quantities to a local variable.
value_type
local
;
for
(
int
j
=
0
;
j
<
value_count
;
++
j
)
{
local
.
value
[
j
]
=
0
;
}
for
(
;
i
<
end
;
++
i
)
{
const
int
ielem
=
m_node_elem
(
i
,
0
);
const
int
ielem_node
=
m_node_elem
(
i
,
1
);
for
(
int
j
=
0
;
j
<
value_count
;
++
j
)
{
local
.
value
[
j
]
+=
m_elem_value
(
ielem
,
ielem_node
,
j
);
{
// nodes' element ids span [i,end)
int
i
=
m_node_scan
(
inode
);
const
int
end
=
m_node_scan
(
inode
+
1
);
for
(
;
i
<
end
;
++
i
)
{
// element #ielem , local node #ielem_node is this node:
const
int
ielem
=
m_node_elem
(
i
,
0
);
const
int
ielem_node
=
m_node_elem
(
i
,
1
);
// Sum the vector-values quantity
for
(
int
j
=
0
;
j
<
value_count
;
++
j
)
{
local
.
value
[
j
]
+=
m_elem_value
(
ielem
,
ielem_node
,
j
);
}
}
}
// Assign nodal quantity (no race condition).
// Sum global value.
for
(
int
j
=
0
;
j
<
value_count
;
++
j
)
{
m_node_value
(
inode
,
j
)
+
=
local
.
value
[
j
]
;
m_node_value
(
inode
,
j
)
=
local
.
value
[
j
]
;
update
.
value
[
j
]
+=
local
.
value
[
j
]
;
}
}
else
{
// Already used atomic update of the nodal quantity,
// query and sum the value.
for
(
int
j
=
0
;
j
<
value_count
;
++
j
)
{
update
.
value
[
j
]
+=
m_node_value
(
inode
,
j
);
}
...
...
@@ -409,11 +415,68 @@ struct LumpElemToNode {
KOKKOS_INLINE_FUNCTION
void
join
(
volatile
value_type
&
update
,
volatile
const
value_type
&
input
)
const
{
for
(
int
j
=
0
;
j
<
value_count
;
++
j
)
{
update
.
value
[
j
]
+=
input
.
value
[
j
]
;
}
}
//----------------------------------------
{
for
(
int
j
=
0
;
j
<
value_count
;
++
j
)
{
update
.
value
[
j
]
+=
input
.
value
[
j
]
;
}
}
};
//----------------------------------------------------------------------------
//----------------------------------------------------------------------------
template
<
class
ViewElemNode
,
class
ViewNodeScan
,
class
ViewNodeElem
>
void
map_node_to_elem
(
const
ViewElemNode
&
elem_node
,
const
ViewNodeScan
&
node_scan
,
const
ViewNodeElem
&
node_elem
)
{
const
typename
ViewElemNode
::
HostMirror
host_elem_node
=
Kokkos
::
create_mirror_view
(
elem_node
);
const
typename
ViewNodeScan
::
HostMirror
host_node_scan
=
Kokkos
::
create_mirror_view
(
node_scan
);
const
typename
ViewNodeElem
::
HostMirror
host_node_elem
=
Kokkos
::
create_mirror_view
(
node_elem
);
const
int
elem_count
=
host_elem_node
.
dimension_0
();
const
int
elem_node_count
=
host_elem_node
.
dimension_1
();
const
int
node_count
=
host_node_scan
.
dimension_0
()
-
1
;
const
View
<
int
*
,
Kokkos
::
Serial
>
node_elem_count
(
"node_elem_count"
,
node_count
);
Kokkos
::
deep_copy
(
host_elem_node
,
elem_node
);
for
(
int
i
=
0
;
i
<
elem_count
;
++
i
)
{
for
(
int
j
=
0
;
j
<
elem_node_count
;
++
j
)
{
++
node_elem_count
(
host_elem_node
(
i
,
j
)
);
}
}
for
(
int
i
=
0
;
i
<
node_count
;
++
i
)
{
host_node_scan
(
i
+
1
)
+=
host_node_scan
(
i
)
+
node_elem_count
(
i
);
node_elem_count
(
i
)
=
0
;
}
for
(
int
i
=
0
;
i
<
elem_count
;
++
i
)
{
for
(
int
j
=
0
;
j
<
elem_node_count
;
++
j
)
{
const
int
inode
=
host_elem_node
(
i
,
j
);
const
int
offset
=
host_node_scan
(
inode
)
+
node_elem_count
(
inode
);
host_node_elem
(
offset
,
0
)
=
i
;
host_node_elem
(
offset
,
1
)
=
j
;
++
node_elem_count
(
inode
);
}
}
Kokkos
::
deep_copy
(
node_scan
,
host_node_scan
);
Kokkos
::
deep_copy
(
node_elem
,
host_node_elem
);
}
}
/* namespace Example */
}
/* namespace Kokkos */
...
...
packages/kokkos/example/feint/feint.hpp
View file @
5e0a04bc
...
...
@@ -52,13 +52,17 @@
namespace
Kokkos
{
namespace
Example
{
/** \brief Vector valued function to numerically integrate.
*
* F(X) = { 1 , x , y , z , x*y , y*z , z*x , x*y*z }
*
* Integrates on a unit cube to:
* { 1 , 1/2 , 1/2 , 1/2 , 1/4 , 1/4 , 1/4 , 1/8 }
*/
struct
MyFunctionType
{
enum
{
value_count
=
8
};
// Integral on a unit cube:
// { 1 , 1/2 , 1/2 , 1/2 , 1/4 , 1/4 , 1/4 , 1/8 }
// Evaluate function at coordinate.
template
<
typename
CoordType
,
typename
ValueType
>
KOKKOS_INLINE_FUNCTION
...
...
@@ -81,32 +85,47 @@ void feint(
const
unsigned
global_elem_ny
,
const
unsigned
global_elem_nz
)
{
const
unsigned
mpi_rank
=
0
;
const
unsigned
mpi_size
=
1
;
//----------------------------------------
// Create the unstructured finite element mesh box fixture on the device:
static
const
Kokkos
::
Example
::
BoxElemPart
::
Decompose
decompose
=
Kokkos
::
Example
::
BoxElemPart
::
DecomposeElem
;
// DecomposeElem | DecomposeNode ;
typedef
Kokkos
::
Example
::
BoxElemFixture
<
Device
,
Kokkos
::
Example
::
BoxElemPart
::
ElemLinear
>
// BoxElemFixture< Device , Kokkos::Example::BoxElemPart::ElemQuadratic >
BoxFixtureType
;
// typedef Kokkos::Example::BoxElemFixture< Device , Kokkos::Example::BoxElemPart:: ElemLinear > BoxFixtureType ;
typedef
Kokkos
::
Example
::
BoxElemFixture
<
Device
,
Kokkos
::
Example
::
BoxElemPart
::
ElemQuadratic
>
BoxFixtureType
;
// MPI distributed parallel domain decomposition of the fixture.
// Either by element (DecomposeElem) or by node (DecomposeNode)
// with ghosted elements.
typedef
Kokkos
::
Example
::
FiniteElementIntegration
<
BoxFixtureType
,
MyFunctionType
,
UseAtomic
>
FeintType
;
static
const
Kokkos
::
Example
::
BoxElemPart
::
Decompose
decompose
=
Kokkos
::
Example
::
BoxElemPart
::
DecomposeElem
;
// decompose = Kokkos::Example::BoxElemPart:: DecomposeNode ;
typedef
Kokkos
::
Example
::
LumpElemToNode
<
typename
FeintType
::
NodeValueType
,
typename
FeintType
::
ElemValueType
,
UseAtomic
>
LumpType
;
// Not using MPI in this example.
const
unsigned
mpi_rank
=
0
;
const
unsigned
mpi_size
=
1
;
const
BoxFixtureType
fixture
(
decompose
,
mpi_size
,
mpi_rank
,
global_elem_nx
,
global_elem_ny
,
global_elem_nz
);
global_elem_nx
,
global_elem_ny
,
global_elem_nz
);
//----------------------------------------
// Create and execute the numerical integration functor on the device:
typedef
Kokkos
::
Example
::
FiniteElementIntegration
<
BoxFixtureType
,
MyFunctionType
,
UseAtomic
>
FeintType
;
const
FeintType
feint
(
fixture
,
MyFunctionType
()
);
typename
FeintType
::
value_type
elem_integral
;
// A reduction for the global integral:
Kokkos
::
parallel_reduce
(
fixture
.
elem_count
()
,
feint
,
elem_integral
);
if
(
elem_integral
.
error
)
{
std
::
cout
<<
"
S
patial jacobian error"
<<
std
::
endl
;
std
::
cout
<<
"
An element had a s
patial jacobian error"
<<
std
::
endl
;
return
;
}
...
...
@@ -116,6 +135,14 @@ void feint(
}
std
::
cout
<<
std
::
endl
;
//----------------------------------------
// Create and execute the nodal lumped value projection and reduction functor:
typedef
Kokkos
::
Example
::
LumpElemToNode
<
typename
FeintType
::
NodeValueType
,
typename
FeintType
::
ElemValueType
,
UseAtomic
>
LumpType
;
const
LumpType
lump
(
feint
.
m_node_lumped
,
feint
.
m_elem_integral
,
fixture
.
elem_node
()
);
...
...
packages/kokkos/example/feint/main.cpp
View file @
5e0a04bc
...
...
@@ -9,27 +9,44 @@
int
main
()
{
enum
{
UseAtomic
=
false
};
#if defined( KOKKOS_HAVE_PTHREAD )
{
std
::
cout
<<
"feint< Threads >"
<<
std
::
endl
;
std
::
pair
<
unsigned
,
unsigned
>
use_cores
=
Kokkos
::
hwloc
::
get_core_topology
();
// Use 4 cores per NUMA region, unless fewer available
use_cores
.
second
=
std
::
min
(
4u
,
use_cores
.
second
);
// One team of one thread.
std
::
pair
<
unsigned
,
unsigned
>
team_topology
(
1
,
1
);
// Use 2 cores per team and 1 thread/core:
std
::
pair
<
unsigned
,
unsigned
>
team_topology
(
use_cores
.
first
*
use_cores
.
second
/
2
,
2
);
Kokkos
::
Threads
::
initialize
(
team_topology
);
Kokkos
::
Example
::
feint
<
Kokkos
::
Threads
,
UseAtomic
>
();
std
::
cout
<<
"feint< Threads , NotUsingAtomic >"
<<
std
::
endl
;
Kokkos
::
Example
::
feint
<
Kokkos
::
Threads
,
false
>
();
std
::
cout
<<
"feint< Threads , Usingtomic >"
<<
std
::
endl
;
Kokkos
::
Example
::
feint
<
Kokkos
::
Threads
,
true
>
();
Kokkos
::
Threads
::
finalize
();
}
#endif
#if defined( KOKKOS_HAVE_CUDA )
{
std
::
cout
<<
"feint< Cuda >"
<<
std
::
endl
;
const
unsigned
device_count
=
Kokkos
::
Cuda
::
detect_device_count
();
// Use the last device:
Kokkos
::