Hadoop development environment in eclipse

The sample steps are

Create a Java Project
In Build path add the hadoop jar files , which come by default when you download hadoop binary from apache website.
It is also suggested that you link javadoc and source files for the above jars also

Now create one sample program based on famous WordCount example of hadoop

The example present on the official documentation for version 0.20.203 is based on old API , if you want to see the latest example you can see the link below

http://shuyo.wordpress.com/2011/03/08/hadoop-development-environment-with-eclipse

The example also shows detailed steps to setup eclipse

Sample code which you can use , just create one folder named input within your project workspace folder
Click Run

package com.hadoop;

import java.io.IOException;
import java.util.*;

import org.apache.hadoop.fs.Path;
import org.apache.hadoop.conf.*;
import org.apache.hadoop.io.*;
import org.apache.hadoop.mapreduce.Job;

import org.apache.hadoop.mapreduce.Mapper;
import org.apache.hadoop.mapreduce.Reducer;
import org.apache.hadoop.mapreduce.Mapper.Context;

import org.apache.hadoop.mapreduce.lib.input.FileInputFormat;
import org.apache.hadoop.mapreduce.lib.output.FileOutputFormat;

import org.apache.hadoop.util.GenericOptionsParser;

public class WordCount1 {

public static class SimpleMapper extends Mapper {

private Text word = new Text();
private static final IntWritable one = new IntWritable(1);

public void map(Object key, Text value, Context context)
throws IOException {

StringTokenizer itr = new StringTokenizer(value.toString());
while (itr.hasMoreTokens()) {
word.set(itr.nextToken());
try {
context.write(word, one);
} catch (InterruptedException e) {
// TODO Auto-generated catch block
e.printStackTrace();
}

}

}
}

public static class SampleReducer extends Reducer {

private IntWritable result = new IntWritable();

protected void reduce(Text key, Iterator values,
Context context) throws IOException, InterruptedException {
// TODO Auto-generated method stub

int sum = 0;
while (values.hasNext()) {
sum += values.next().get();
}
context.write(key, new IntWritable(sum));

}

}

public static void main(String[] args) throws Exception {

Configuration conf = new Configuration();
GenericOptionsParser g = new GenericOptionsParser(conf, args);
String[] otherArgs = g.getRemainingArgs();
Job job = new Job(conf, "Example Hadoop 0.20.1 WordCount");
job.setJarByClass(WordCount1.class);
job.setMapperClass(SimpleMapper.class);
job.setReducerClass(SampleReducer.class);
job.setOutputKeyClass(Text.class);
job.setOutputValueClass(IntWritable.class);
FileInputFormat.addInputPath(job, new Path("input"));
FileOutputFormat.setOutputPath(job, new Path("output"));
System.exit(job.waitForCompletion(true) ? 0 : 1);
}
}

No comments:

Post a Comment

Please share your views and comments below.

Thank You.