Gonzo Gonzo - 3 years ago 159
R Question

Extending Rcpp function to input vector of any type

I have following function which does a simple loop on

NumericVector
and returns
int
type value.

Rcpp::cppFunction({'
int calc_streak( NumericVector x, int i1, int i2){
int cur_streak=1;

if (NumericVector::is_na(x[0])){
cur_streak = NumericVector::get_na();
} else {
cur_streak = 1;
}

for(int j = i1; j <= i2 ; ++j) {
if( x[ j ] == x[ j-1 ]){
cur_streak += 1;

} else if(NumericVector::is_na( x[ j ] )){
cur_streak = NumericVector::get_na();

} else {
cur_streak = 1;

}
}
return cur_streak;
}
"})

calc_streak(c(1,1,1,1),i1=0,i2=3)
# [1] 4


Function works fine for me but the real issue is when I'm trying to extend this functionality on other input-types. I've been searching on stack here and here, but those examples doesn't work in my case or I don't know how to use examples properly. I've tried few methods dealing with unknown input type, and none was successful in my case.
Three examples below





  1. The simplest one inspired by this - created main function which run one of previously defined functions depending on type of argument
    TYPEOF(x)
    . This function returns expected value for
    integer
    and
    numeric
    . For
    character
    session crashes


    Rcpp::cppFunction('
    #include <Rcpp.h>
    using namespace Rcpp;

    int streak_run_int(IntegerVector x, int i1, int i2){
    int cur_streak=1;

    if (IntegerVector::is_na(x[0])){
    cur_streak = NumericVector::get_na();
    } else {
    cur_streak = 1;
    }

    for(int j = i1; j <= i2 ; ++j) {
    if( x[ j ] == x[ j-1 ]){
    cur_streak += 1;

    } else if(IntegerVector::is_na( x[ j ] )){
    cur_streak = NumericVector::get_na();

    } else {
    cur_streak = 1;

    }
    }
    return cur_streak;
    }

    int streak_run_char(CharacterVector x, int i1, int i2){
    int cur_streak=1;

    if (CharacterVector::is_na(x[0])){
    cur_streak = NumericVector::get_na();
    } else {
    cur_streak = 1;
    }

    for(int j = i1; j <= i2 ; ++j) {
    if( x[ j ] == x[ j-1 ]){
    cur_streak += 1;

    } else if(CharacterVector::is_na( x[ j ] )){
    cur_streak = NumericVector::get_na();

    } else {
    cur_streak = 1;

    }
    }
    return cur_streak;
    }


    // [[Rcpp::export]]
    int streak_run4(SEXP x, int i1, int i2) {
    switch (TYPEOF(x)) {
    case INTSXP: {
    return streak_run_int(as<IntegerVector>(x), i1, i2);
    }
    case STRSXP: {
    return streak_run_char(as<CharacterVector>(x), i1, i2);
    }
    default: { return 0; }
    }
    }
    ')

    # expected results for int and real - for character session crashes
    streak_run4( c(1,1,1,1),i1=0, i2=3)
    streak_run4( as.integer(c(1,1,1,1)),i1=0, i2=3)
    streak_run4( as.character(c(1,1,1,1)),i1=0, i2=3)







  1. Second function has exactly the same idea, but using template instead of defining multiple functions. Same results as above - session crash on
    character
    input


    Rcpp::cppFunction('
    #include <Rcpp.h>
    using namespace Rcpp;

    namespace impl {

    template <int RTYPE>
    int streak_run_impl(const Vector<RTYPE>& x, int i1, int i2)
    {
    int cur_streak=1;

    if (Vector<RTYPE>::is_na(x[0])){
    cur_streak = NumericVector::get_na();
    } else {
    cur_streak = 1;
    }

    for(int j = i1; j <= i2 ; ++j) {
    if( x[ j ] == x[ j-1 ]){
    cur_streak += 1;

    } else if(Vector<RTYPE>::is_na( x[ j ] )){
    cur_streak = NumericVector::get_na();

    } else {
    cur_streak = 1;

    }
    }
    return cur_streak;
    }

    }

    // [[Rcpp::export]]
    int streak_run3(SEXP x, int i1, int i2) {
    switch (TYPEOF(x)) {
    case INTSXP: {
    return impl::streak_run_impl(as<IntegerVector>(x), i1, i2);
    }
    case REALSXP: {
    return impl::streak_run_impl(as<NumericVector>(x), i1, i2);
    }
    case STRSXP: {
    return impl::streak_run_impl(as<CharacterVector>(x), i1, i2);
    }
    case LGLSXP: {
    return impl::streak_run_impl(as<LogicalVector>(x), i1, i2);
    }
    case CPLXSXP: {
    return impl::streak_run_impl(as<ComplexVector>(x), i1, i2);
    }
    default: {
    return 0;
    }
    }
    }
    ')

    streak_run3( c(1,1,1,1),i1=0, i2=3)
    streak_run3( as.integer(c(1,1,1,1)),i1=0, i2=3)
    streak_run3( as.character(c(1,1,1,1)),i1=0, i2=3)







  1. Another one is inspired by this article, and this time I wasn't even able to compile C++ function, while having an error
    use of overloaded operator '==' is ambiguous
    . Anyway, after examining two above examples, I don't expect any other result.

    Rcpp::cppFunction('
    #include <Rcpp.h>
    using namespace Rcpp;

    class streak_run2_impl {
    private:
    int i1;
    int i2;

    public:
    streak_run2_impl(int i1, int i2) : i1(i1), i2(i2) {}

    template <int RTYPE>
    IntegerVector operator()(const Vector<RTYPE>& x)
    {

    int cur_streak=1;

    if (Vector<RTYPE>::is_na(x[0])){
    cur_streak = NumericVector::get_na();
    } else {
    cur_streak = 1;
    }

    for(int j = i1; j <= i2 ; ++j) {
    if( x[ j ] == x[ j-1 ] ){
    cur_streak += 1;

    } else if(Vector<RTYPE>::is_na( x[ j ] )){

    cur_streak = NumericVector::get_na();

    } else {
    cur_streak = 1;
    }
    }
    return cur_streak;
    }
    };


    // [[Rcpp::export]]
    RObject streak_run2(RObject x, int i1 = 0, int i2=6){
    RCPP_RETURN_VECTOR(streak_run2_impl(i1, i2), x);
    }
    ')






So my question is:
How to properly define this function to obtain results for input vector of any R class?

I would be obliged for any advices.

Answer Source

I think the main error in examples are that you start your loop at j = 0 so you call operator[](-1). The following works for me. Make the following func.cpp

#include <Rcpp.h>
#include <algorithm>
using namespace Rcpp;

template <int RTYPE>
int streak_run_impl(const Vector<RTYPE>& x, int i1, int i2)
{
  int cur_streak = 1;

  if (Vector<RTYPE>::is_na(x[0])){
    cur_streak = NA_INTEGER;
  } else {
    cur_streak = 1;
  }

  for(int j = std::max(i1, 1) /* have to start at one at least */; 
      j < std::min(i2 + 1, (int)x.size()) /* check size of x */; ++j){
    if(x[j] == x[j - 1]){
      cur_streak += 1;

    } else if(Vector<RTYPE>::is_na(x[j])){
      cur_streak = NA_INTEGER;

    } else {
      cur_streak = 1;

    }
  }
  return cur_streak;
}

// [[Rcpp::export]]
int streak_run3(SEXP x, int i1, int i2) {
  switch (TYPEOF(x)) {
    case INTSXP: {
      return streak_run_impl(as<IntegerVector>(x), i1, i2);
    }
    case REALSXP: {
      return streak_run_impl(as<NumericVector>(x), i1, i2);
    }
    case STRSXP: {
      return streak_run_impl(as<CharacterVector>(x), i1, i2);
    }
    case LGLSXP: {
      return streak_run_impl(as<LogicalVector>(x), i1, i2);
    }
    case CPLXSXP: {
      return streak_run_impl(as<ComplexVector>(x), i1, i2);
    }
    default: {
      return 0;
    }
  }
}

Then run this R script with the working directory set to that of the .cpp file

Rcpp::sourceCpp("func.cpp")

streak_run3(c(1,1,1,1), i1=0, i2=3)
streak_run3(as.integer(c(1,1,1,1)), i1=0, i2=3)
streak_run3(as.character(c(1,1,1,1)), i1=0, i2=3)
Recommended from our users: Dynamic Network Monitoring from WhatsUp Gold from IPSwitch. Free Download