Team reduction for array type
Created by: kyungjoo-kim
Currently team reduction is designed for scalar type and I need team reduction for array and the array should be thread specific. The input array may have initial values.
template< typename iType, class Lambda, typename ValueType, class JoinType > KOKKOS_INLINE_FUNCTION void parallel_reduce(const Impl::TeamThreadRangeBoundariesStruct<iType,Impl::ThreadsExecTeamMember>& loop_boundaries, const Lambda & lambda, const JoinType& join, ValueType& init_result) {
ValueType result = init_result;
for( iType i = loop_boundaries.start; i < loop_boundaries.end; i+=loop_boundaries.increment) { ValueType tmp = ValueType();
/// this tmp initialization should be reconsider to use thread local array.
lambda(i,tmp);
join(result,tmp);
}
init_result = loop_boundaries.thread.team_reduce(result,Impl::JoinLambdaAdapter<ValueType,JoinType>(join)); }