更新时间:2022-10-04 20:26:55
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
|
import java.io.IOException;
import org.apache.hadoop.conf.Configuration;
import org.apache.hadoop.fs.FileSystem;
import org.apache.hadoop.io.IntWritable;
import org.apache.hadoop.mapred.JobConf;
import org.apache.hadoop.mapred.RecordWriter;
import org.apache.hadoop.mapred.lib.MultipleOutputFormat;
import org.apache.hadoop.util.Progressable;
import org.w3c.dom.Text;
public class MultipleOutputFormatTest extends MultipleOutputFormat<Text, IntWritable>{
protected String generateFileNameForKeyValue(Text key, IntWritable value, Configuration conf) {
char c = key.toString().toLowerCase().charAt( 0 );
if (c >= 'a' && c <= 'z' ){
return c + ".txt" ;
}
return "other.txt" ;
}
@Override
protected RecordWriter<Text, IntWritable> getBaseRecordWriter(
FileSystem fs, JobConf job, String name, Progressable arg3)
throws IOException {
// TODO Auto-generated method stub
return null ;
}
} |
在教程当中只需要重写generateFileNameForKeyValue
就能达到分文件的效果 但是在实践当中
还需要重写另一个方法 getBaseRecordWriter
还没有清楚其功能 先写着先
conf.setOutputFormat() //通过这个设定我们输出格式