김시온 김시온 - 2 months ago 22
Apache Configuration Question

spark mvn compile error.[ERROR] Failed to execute goal org.apache.maven.plugins:maven-compiler-plugin:3.1:compile (default-compile)

i'm learning refer with the book that 'Spark with Machine learning'

groupId: org.apache.spark
artifactId: spark-core_2.11
version: 2.0.1

JavaApp.java

import org.apache.spark.api.java.JavaRDD;
import org.apache.spark.api.java.JavaSparkContext;
import org.apache.spark.api.java.function.DoubleFunction;
import org.apache.spark.api.java.function.Function;
import org.apache.spark.api.java.function.Function2;
import org.apache.spark.api.java.function.PairFunction;
import scala.Tuple2;
import java.util.Collections;
import java.util.Comparator;
import java.util.List;

/**
* A simple Spark app in Java
*/
public class JavaApp {

public static void main(String[] args) {
JavaSparkContext sc = new JavaSparkContext("local[2]", "First Spark App");
// we take the raw data in CSV format and convert it into a set of records of the form (user, product, price)
JavaRDD<String[]> data = sc.textFile("data/UserPurchaseHistory.csv")
.map(new Function<String, String[]>() {
@Override
public String[] call(String s) throws Exception {
return s.split(",");
}
});

// let's count the number of purchases
long numPurchases = data.count();

// let's count how many unique users made purchases
long uniqueUsers = data.map(new Function<String[], String>() {
@Override
public String call(String[] strings) throws Exception {
return strings[0];
}
}).distinct().count();

// let's sum up our total revenue
double totalRevenue = data.map(new DoubleFunction<String[]>() {
@Override
public double call(String[] strings) throws Exception {
//double ret=Double.parseDouble(strings[2]);
//return ret;
//return Double.parseDouble(strings[2]);
}
}).sum();

// let's find our most popular product
// first we map the data to records of (product, 1) using a PairFunction
// and the Tuple2 class.
// then we call a reduceByKey operation with a Function2, which is essentially the sum function
List<Tuple2<String, Integer>> pairs = data.map(new PairFunction<String[], String, Integer>() {
@Override
public Tuple2<String, Integer> call(String[] strings) throws Exception {
return new Tuple2(strings[1], 1);
}
}).reduceByKey(new Function2<Integer, Integer, Integer>() {
@Override
public Integer call(Integer integer, Integer integer2) throws Exception {
return integer + integer2;
}
}).collect();

// finally we sort the result. Note we need to create a Comparator function,
// that reverses the sort order.
Collections.sort(pairs, new Comparator<Tuple2<String, Integer>>() {
@Override
public int compare(Tuple2<String, Integer> o1, Tuple2<String, Integer> o2) {
return -(o1._2() - o2._2());
}
});
String mostPopular = pairs.get(0)._1();
int purchases = pairs.get(0)._2();

// print everything out
System.out.println("Total purchases: " + numPurchases);
System.out.println("Unique users: " + uniqueUsers);
System.out.println("Total revenue: " + totalRevenue);
System.out.println(String.format("Most popular product: %s with %d purchases",
mostPopular, purchases));

sc.stop();

}

}


my pom.xml

<?xml version="1.0" encoding="UTF-8"?>
<project xmlns="http://maven.apache.org/POM/4.0.0"
xmlns:xsi="http://www.w3.org/2001/XMLSchema-instance"
xsi:schemaLocation="http://maven.apache.org/POM/4.0.0 http://maven.apache.org/xsd/maven-4.0.0.xsd">
<modelVersion>4.0.0</modelVersion>

<groupId>java-spark-app</groupId>
<artifactId>java-spark-app</artifactId>
<version>1.0</version>
<properties>
<project.build.sourceEncoding>UTF-8</project.build.sourceEncoding>
<project.reporting.outputEncoding>UTF-8</project.reporting.outputEncoding>
</properties>
<dependencies>
<dependency>
<groupId>org.apache.spark</groupId>
<artifactId>spark-core_2.11</artifactId>
<version>2.0.1</version>
</dependency>
</dependencies>

</project>


so, I compiled the code using maven, but cannot solve below error message

[INFO] 2 errors
[INFO] -------------------------------------------------------------
[INFO] ------------------------------------------------------------------------
[INFO] BUILD FAILURE
[INFO] ------------------------------------------------------------------------
[INFO] Total time: 6.785 s
[INFO] Finished at: 2016-10-10T05:17:42+00:00
[INFO] Final Memory: 34M/777M
[INFO] ------------------------------------------------------------------------
[ERROR] Failed to execute goal org.apache.maven.plugins:maven-compiler-plugin:3.1:compile (default-compile) on project java-spark-app: Compilation failure: Compilation failure:
[ERROR] /home/ubuntu/workspace/practice/8519OS_Code/Chapter_01/java-spark-app/src/main/java/JavaApp.java:[40,35] method map in class org.apache.spark.api.java.AbstractJavaRDDLike<T,This> cannot be applied to given types;
[ERROR] required: org.apache.spark.api.java.function.Function<java.lang.String[],R>
[ERROR] found: <anonymous org.apache.spark.api.java.function.DoubleFunction<java.lang.String[]>>
[ERROR] reason: cannot infer type-variable(s) R
[ERROR] (argument mismatch; <anonymous org.apache.spark.api.java.function.DoubleFunction<java.lang.String[]>> cannot be converted to org.apache.spark.api.java.function.Function<java.lang.String[],R>)
[ERROR] /home/ubuntu/workspace/practice/8519OS_Code/Chapter_01/java-spark-app/src/main/java/JavaApp.java:[53,51] method map in class org.apache.spark.api.java.AbstractJavaRDDLike<T,This> cannot be applied to given types;
[ERROR] required: org.apache.spark.api.java.function.Function<java.lang.String[],R>
[ERROR] found: <anonymous org.apache.spark.api.java.function.PairFunction<java.lang.String[],java.lang.String,java.lang.Integer>>
[ERROR] reason: cannot infer type-variable(s) R
[ERROR] (argument mismatch; <anonymous org.apache.spark.api.java.function.PairFunction<java.lang.String[],java.lang.String,java.lang.Integer>> cannot be converted to org.apache.spark.api.java.function.Function<java.lang.String[],R>)
[ERROR] -> [Help 1]
[ERROR]
[ERROR] To see the full stack trace of the errors, re-run Maven with the -e switch.
[ERROR] Re-run Maven using the -X switch to enable full debug logging.
[ERROR]
[ERROR] For more information about the errors and possible solutions, please read the following articles:
[ERROR] [Help 1] http://cwiki.apache.org/confluence/display/MAVEN/MojoFailureException


This problem worryed me during 4days.
Please help me :(

Answer

The code sample you are referring might be using old version of Spark. I have modified the code you posted and it is working fine with latest version of Spark. Try to build and run below code.

import java.util.ArrayList;
import java.util.Collections;
import java.util.Comparator;
import java.util.List;

import org.apache.spark.api.java.JavaRDD;
import org.apache.spark.api.java.JavaSparkContext;
import org.apache.spark.api.java.function.DoubleFunction;
import org.apache.spark.api.java.function.Function;
import org.apache.spark.api.java.function.Function2;
import org.apache.spark.api.java.function.PairFunction;

import scala.Tuple2;

public class JavaApp {
public static void main(String[] args) {
        JavaSparkContext sc = new JavaSparkContext("local[2]", "First Spark App");
        // we take the raw data in CSV format and convert it into a set of records of the form (user, product, price)
        JavaRDD<String[]> data = sc.textFile("data/UserPurchaseHistory.csv")
                .map(new Function<String, String[]>() {
                    @Override
                    public String[] call(String s) throws Exception {
                        return s.split(",");
                    }
                });

        // let's count the number of purchases
        long numPurchases = data.count();

        // let's count how many unique users made purchases
        long uniqueUsers = data.map(new Function<String[], String>() {
            @Override
            public String call(String[] strings) throws Exception {
                return strings[0];
            }
        }).distinct().count();

        // let's sum up our total revenue
        double totalRevenue = data.mapToDouble(new DoubleFunction<String[]>() {
            @Override
            public double call(String[] strings) throws Exception {
                //double ret=Double.parseDouble(strings[2]);
                //return ret;
                return Double.parseDouble(strings[2]);
            }
        }).sum();

        // let's find our most popular product
        // first we map the data to records of (product, 1) using a PairFunction
        // and the Tuple2 class.
        // then we call a reduceByKey operation with a Function2, which is essentially the sum function
        List<Tuple2<String, Integer>> pairs = data.mapToPair(new PairFunction<String[], String, Integer>() {
            @Override
            public Tuple2<String, Integer> call(String[] strings) throws Exception {
                return new Tuple2(strings[1], 1);
            }
        }).reduceByKey(new Function2<Integer, Integer, Integer>() {
            @Override
            public Integer call(Integer integer, Integer integer2) throws Exception {
                return integer + integer2;
            }
        }).collect();

        // finally we sort the result. Note we need to create a Comparator function,
        // that reverses the sort order.
        Collections.sort(new ArrayList(pairs), new Comparator<Tuple2<String, Integer>>() {
            @Override
            public int compare(Tuple2<String, Integer> o1, Tuple2<String, Integer> o2) {
                return -(o1._2() - o2._2());
            }
        });
        String mostPopular = pairs.get(0)._1();
        int purchases = pairs.get(0)._2();

        // print everything out
        System.out.println("Total purchases: " + numPurchases);
        System.out.println("Unique users: " + uniqueUsers);
        System.out.println("Total revenue: " + totalRevenue);
        System.out.println(String.format("Most popular product: %s with %d purchases",
                mostPopular, purchases));

    sc.stop();

    }
}