RtL RtL - 6 months ago 37
Java Question

java.lang.NumberFormatException: For input string: "0.89829948"

I want to take the count of each key like the wordcount problem but without introducing a intwritable 1 as value. I'm getting the following error :

Error: java.lang.ClassCastException: org.apache.hadoop.io.LongWritable cannot be cast to org.apache.hadoop.io.FloatWritable
at yarn1$MapForWordCount.map(yarn1.java:1)
at org.apache.hadoop.mapreduce.Mapper.run(Mapper.java:145)
at org.apache.hadoop.mapred.MapTask.runNewMapper(MapTask.java:784)
at org.apache.hadoop.mapred.MapTask.run(MapTask.java:341)
at org.apache.hadoop.mapred.YarnChild$2.run(YarnChild.java:163)
at java.security.AccessController.doPrivileged(Native Method)
at javax.security.auth.Subject.doAs(Subject.java:422)
at org.apache.hadoop.security.UserGroupInformation.doAs(UserGroupInformation.java:1628)
at org.apache.hadoop.mapred.YarnChild.main(YarnChild.java:158)


Input file is in the following form:

Alert,NU,2009-01-05,605280,1852,2.775335867,0.119936138,0.183023134,0.89829948,0.047442672,0.079679499,,,0.017733688,,,0.014081354,,,1.402004389


My code:

import java.io.IOException;
import org.apache.hadoop.conf.Configuration;
import org.apache.hadoop.fs.Path;
import org.apache.hadoop.io.FloatWritable;
import org.apache.hadoop.io.Text;
import org.apache.hadoop.mapreduce.Job;
import org.apache.hadoop.mapreduce.Mapper;
import org.apache.hadoop.mapreduce.Reducer;
import org.apache.hadoop.mapreduce.lib.input.FileInputFormat;
import org.apache.hadoop.mapreduce.lib.output.FileOutputFormat;

public class yarn1 {

public static void main(String [] args) throws Exception
{
Configuration c=new Configuration();
Job job = Job.getInstance(c, "word count");
job.setJarByClass(yarn1.class);
job.setMapperClass(MapForWordCount.class);
job.setReducerClass(ReduceForWordCount.class);
job.setOutputKeyClass(Text.class);
job.setOutputValueClass(FloatWritable.class);
FileInputFormat.addInputPath(job, new Path(args[0]));
FileOutputFormat.setOutputPath(job, new Path(args[1]));
System.exit(job.waitForCompletion(true)?0:1);
}

public static class MapForWordCount extends Mapper<FloatWritable, Text, Text, FloatWritable>{

public void map(FloatWritable key, Text value, Context con) throws IOException, InterruptedException
{
String line = value.toString();
String[] words=line.split(",");
Text outputKey = new Text(words[0]);
FloatWritable outputValue = new FloatWritable(Float.parseFloat(words[8]));

//IntWritable outputValue = new IntWritable(1);
con.write(outputKey, outputValue);
}
}

public static class ReduceForWordCount extends Reducer<Text, FloatWritable, Text, FloatWritable>
{
public void reduce(Text word, Iterable<FloatWritable> values, Context con) throws IOException, InterruptedException
{
int sum = 0;
for(FloatWritable value : values) {
//sum += value.get();
sum = sum + 1;
}
con.write(word, new FloatWritable(sum));
}
}


Sample output:

Alert 394


I'm fairly new to Hadoop MR so any help would be appreciated.

Answer Source

You have to change the input format for key to LongWritable and add a try catch for any exceptions due to empty strings.

Code with modifications :

public static class MapForWordCount extends Mapper<LongWritable, Text, Text, FloatWritable>{

public void map(LongWritable key, Text value, Context con) throws IOException, InterruptedException

{

String line = value.toString();

String[] words=line.split(",");

Text outputKey = new Text(words[0]);


if (words != null && words[0].length() > 0) {
    try {
        FloatWritable outputValue = new FloatWritable(Float.parseFloat(words[8]));
        con.write(outputKey, outputValue);
    } catch(Exception e) {             
    }
}    
}    
}