David Lerner David Lerner - 2 months ago 25
C++ Question

CUDA thrust zip_iterator tuple transform_reduce

I want to compute \left | \vec{a} - \vec{b} \right | for vectors \vec{a} and \vec{b}, where \left | \vec{x} \right | denotes the magnitude of the vector \vec{x}. Since this involves taking the square root of the sum of the squares of the differences between each corresponding component of the two vectors, it should be a highly parallelizable task. I am using Cuda and Thrust, through Cygwin, on Windows 10. Both Cuda and Thrust are in general working.

The below code compiles and runs (with nvcc), but only because I have commented out three lines toward the bottom of

main
, each of which I think should work but does not.
func::operator()(tup t)
thinks that the arguments I'm passing it are not in fact of type tup.

I have also commented out the actual body of the operator, in the interest of making it more likely to at least compile. The operator is supposed to find the squared difference between the elements of the input tup. The reduction
unary_op
from
transform_reduce
(which in this case is
func()
) would then add these, giving me the norm squared of the difference of vectors.

#include <iostream>
#include <stdlib.h>
#include <thrust/device_vector.h>
#include <thrust/transform.h>
#include <thrust/tuple.h>
#include <thrust/transform_reduce.h>
#include <thrust/iterator/zip_iterator.h>

typedef thrust::device_vector<float> dvec;
typedef dvec::iterator iter;
typedef thrust::tuple<iter, iter> tup;

struct func: public thrust::unary_function<tup, float>
{
__device__ float operator()(tup t) //difsq
{
// I've commented out these two lines for testing purposes:
// float f = thrust::get<0>(t) - thrust::get<1>(t);
// return f*f;
return 3.14;
}
};

int main()
{
dvec a(40, 4.f);
dvec b(40, 3.f);
auto begin = thrust::make_zip_iterator(thrust::make_tuple(a.begin(), b.begin()));
auto end = thrust::make_zip_iterator(thrust::make_tuple(a.end(), b.end()));

//these two lines work
thrust::get<0>(begin[0]);
std::cout << thrust::get<0>(begin[0]) - thrust::get<1>(begin[0]);


//these three lines do not
//thrust::transform_reduce(begin, end, func(), 0.0f, thrust::plus<float>());
//func()(begin[0]);
//thrust::transform(begin, end, begin, func());


std::cout << "done" << std::endl;
return 0;
}


I get this error: (my program is called
sandbox.cu
)

C:\Program Files\NVIDIA GPU Computing Toolkit\CUDA\v7.5\bin/../include\thrust/detail/tuple.inl(310): error: no instance of constructor "thrust::detail::normal_iterator<Pointer>::normal_iterator [with Pointer=thrust::device_ptr<float>]" matches the argument list
argument types are: (const thrust::device_reference<float>)
detected during:
instantiation of "thrust::detail::cons<HT, TT>::cons(const thrust::detail::cons<HT2, TT2> &) [with HT=iter, TT=thrust::detail::cons<iter, thrust::null_type>, HT2=thrust::device_reference<float>, TT2=thrust::detail::cons<thrust::device_reference<float>, thrust::null_type>]"
C:\Program Files\NVIDIA GPU Computing Toolkit\CUDA\v7.5\bin/../include\thrust/tuple.h(361): here
instantiation of "thrust::tuple<T0, T1, T2, T3, T4, T5, T6, T7, T8, T9>::tuple(const thrust::detail::cons<U1, U2> &) [with T0=iter, T1=iter, T2=thrust::null_type, T3=thrust::null_type, T4=thrust::null_type, T5=thrust::null_type, T6=thrust::null_type, T7=thrust::null_type, T8=thrust::null_type, T9=thrust::null_type, U1=thrust::device_reference<float>, U2=thrust::detail::cons<thrust::device_reference<float>, thrust::null_type>]"
sandbox.cu(37): here

C:\Program Files\NVIDIA GPU Computing Toolkit\CUDA\v7.5\bin/../include\thrust/detail/tuple.inl(411): error: no instance of constructor "thrust::detail::normal_iterator<Pointer>::normal_iterator [with Pointer=thrust::device_ptr<float>]" matches the argument list
argument types are: (const thrust::device_reference<float>)
detected during:
instantiation of "thrust::detail::cons<HT, thrust::null_type>::cons(const thrust::detail::cons<HT2, thrust::null_type> &) [with HT=iter, HT2=thrust::device_reference<float>]"
(310): here
instantiation of "thrust::detail::cons<HT, TT>::cons(const thrust::detail::cons<HT2, TT2> &) [with HT=iter, TT=thrust::detail::cons<iter, thrust::null_type>, HT2=thrust::device_reference<float>, TT2=thrust::detail::cons<thrust::device_reference<float>, thrust::null_type>]"
C:\Program Files\NVIDIA GPU Computing Toolkit\CUDA\v7.5\bin/../include\thrust/tuple.h(361): here
instantiation of "thrust::tuple<T0, T1, T2, T3, T4, T5, T6, T7, T8, T9>::tuple(const thrust::detail::cons<U1, U2> &) [with T0=iter, T1=iter, T2=thrust::null_type, T3=thrust::null_type, T4=thrust::null_type, T5=thrust::null_type, T6=thrust::null_type, T7=thrust::null_type, T8=thrust::null_type, T9=thrust::null_type, U1=thrust::device_reference<float>, U2=thrust::detail::cons<thrust::device_reference<float>, thrust::null_type>]"
sandbox.cu(37): here

2 errors detected in the compilation of "C:/cygwin64/tmp/tmpxft_00001a90_00000000-10_sandbox.cpp1.ii".

Answer

Solved! tup should have been thrust::tuple<float, float>, not thrust::tuple<iter, iter>. Full solution:

#include <iostream>
#include <stdlib.h>
#include <thrust/device_vector.h>
#include <thrust/transform.h>
#include <thrust/tuple.h>
#include <thrust/transform_reduce.h>
#include <thrust/iterator/zip_iterator.h>

typedef thrust::device_vector<float> dvec;
typedef thrust::tuple<float, float> tup;

struct func
{
  __device__ float operator()(tup t) //difsq
  {
     float f = thrust::get<0>(t) - thrust::get<1>(t);
     return f*f;
  }
};

int main()
{
  dvec a(4, 3.f);
  dvec b(4, 2.f);
  auto begin = thrust::make_zip_iterator(thrust::make_tuple(a.begin(), b.begin()));
  auto end = thrust::make_zip_iterator(thrust::make_tuple(a.end(), b.end()));
  std::cout << thrust::transform_reduce(begin, end, func(), 0.0f, thrust::plus<float>()) << std::endl;
  std::cout << "done" << std::endl;
  return 0;
}