使用Hadoop计算气象数据文档格式.docx-资源下载

使用Hadoop计算气象数据文档格式.docx

1、 if （airTemperature != MISSING & quality.matches（01459） context.write（new Text（year）, new IntWritable（airTemperature）;MinTemperatureReducer.javaimport org.apache.hadoop.mapreduce.Reducer;public class MinTemperatureReducer extends Reducer protected void reduce（Text key, Iterable values, Context conte

2、xt） throws IOException, InterruptedException int minValue = Integer.MAX_VALUE; for （IntWritable value : values） minValue = Math.min（value.get（）, minValue）; context.write（key, new IntWritable（minValue）;MinTemperature.javaimport org.apache.hadoop.fs.Path;import org.apache.hadoop.mapreduce.Job;import o

3、rg.apache.hadoop.mapreduce.lib.input.FileInputFormat;import org.apache.hadoop.mapreduce.lib.output.FileOutputFormat;public class MinTemperature public static void main（String args） throws Exception if （args.length != 2） System.err.println（Usage: MinTemperature ）; System.exit（-1）; Job job = new Job（）

4、; job.setJarByClass（MinTemperature.class）; FileInputFormat.addInputPath（job, new Path（args0）; FileOutputFormat.setOutputPath（job, new Path（args1）; job.setMapperClass（MinTemperatureMapper.class）; job.setReducerClass（MinTemperatureReducer.class）; job.setOutputKeyClass（Text.class）; job.setOutputValueCl

5、ass（IntWritable.class）; System.exit（job.waitForCompletion（true）?0:1）;编译：生成jar包运行mr任务查看结果;2 在linux或win下安装eclipse，并且连接到Hadoop集群（关键步骤是编译插件），运行习题1的map-reduce程序作为测试，抓图整个过程由于我的hadoop集群是安装在虚拟机上的，windows本地无相应的hadoop程序运行，因此在配置的需要将windows的用户名修改为相应的安装hadoop应用的用户名，我这里为bigdata1x，修改完成后重启windows系统，即可进行eclipse下的had

6、oop插件安装（如果不修改用户名与hadoop集群的用户名一致，后续连接到hadoop集群调试时会出错）修改eclipse的hadoop的配置信息并修改hadoop.tmp.dir参数配置好后点击切换到Map/Reduce视图，可看到DFS上的目录结构如下：以上data目录即为作业中题目1对应的数据创建相应的Map/Reduce工程将题目1中的代码增加到工程中配置相应的运行参数：再通过hadoop运行，报如下错误：15/04/25 19:39:05 WARN util.NativeCodeLoader: Unable to load native-hadoop library for your

7、 platform. using builtin-java classes where applicable05 ERROR security.UserGroupInformation: PriviledgedActionException as:bigdata1x cause:java.io.IOException: Failed to set permissions of path: tmphadoop-bigdata1xmapredstagingbigdata1x1095384666.staging to 0700Exception in thread main java.io.IOEx

8、ception: at org.apache.hadoop.fs.FileUtil.checkReturnValue（FileUtil.java:691） at org.apache.hadoop.fs.FileUtil.setPermission（FileUtil.java:664） at org.apache.hadoop.fs.RawLocalFileSystem.setPermission（RawLocalFileSystem.java:514） at org.apache.hadoop.fs.RawLocalFileSystem.mkdirs（RawLocalFileSystem.j

9、ava:349） at org.apache.hadoop.fs.FilterFileSystem.mkdirs（FilterFileSystem.java:193） at org.apache.hadoop.mapreduce.JobSubmissionFiles.getStagingDir（JobSubmissionFiles.java:126） at org.apache.hadoop.mapred.JobClient$2.run（JobClient.java:942）936） at java.security.AccessController.doPrivileged（Native M

10、ethod） at javax.security.auth.Subject.doAs（Subject.java:415） at org.apache.hadoop.security.UserGroupInformation.doAs（UserGroupInformation.java:1190） at org.apache.hadoop.mapred.JobClient.submitJobInternal（JobClient.java: at org.apache.hadoop.mapreduce.Job.submit（Job.java:550） at org.apache.hadoop.ma

11、preduce.Job.waitForCompletion（Job.java:580） at dataguru.lesson4.e2.MinTemperature.main（MinTemperature.java:27）通过网上查询，在主程序的中增加如下内容参见：如果配置了以上参数还报相同的错，则需要检查eclipse中hadoop.tmp.dir、mapred.local.dir配置的是否正确以上如果不增加mapred.jar参数配置，则对应的mapreduce程序不能将所有的class都提交到hadoop集群中，会抛出mapper、reducer的Class Not Found异常程序正常

12、运行日志如下：查看运行结果：第3-4题为2选1（请在课程资源下载第6周作业素材和视频素材），如能2题均完成为最佳。3 传递参数问题请阅读Exercise_1.java，编译并且运行。该程序从Test_1改编而来，其主要差别在于能够让用户在结果文件中的每一行前面添加一个用户自定义的字符串，而这个字符串将由参数传递到程序中。例如，运行 $hadoop jar Exercise_1.jar input_path output_path hadoop 之后，第三个参数“hadoop”将会在结果文件中显示，例如附件“result_1”所显示的。问题：着重考虑Exercise_1.java里面”需要注

13、意的部分“，改写Test_2程序，得到的结果必须跟附件resule_2一致，并且其中hadoop必须由参数传递。改写Test_2程序的代码内容如下：package dataguru.lesson4.e3;/* * Hadoop网络课程模板程序 * 编写者：James */import java.text.DateFormat;import java.text.SimpleDateFormat;import java.util.Date;import org.apache.hadoop.conf.Configuration;import org.apache.hadoop.conf.Config

14、ured;import org.apache.hadoop.mapreduce.lib.output.TextOutputFormat;import org.apache.hadoop.util.Tool;import org.apache.hadoop.util.ToolRunner;/* * 有Reducer版本public class Test_2_m extends Configured implements Tool /* * 计数器用于计数各种异常数据 enum Counter LINESKIP, / 出错的行 * MAP任务 public static class Map ex

15、tends Mapper public void map（LongWritable key, Text value, Context context） throws IOException, InterruptedException String line = value.toString（）; / 读取源数据 try / 数据处理 String lineSplit = line.split（ String anum = lineSplit0; String bnum = lineSplit1; context.write（new Text（bnum）, new Text（anum）; / 输

16、出 catch （java.lang.ArrayIndexOutOfBoundsException e） context.getCounter（Counter.LINESKIP）.increment（1）; / 出错令计数器+1 return; * REDUCE任务 public static class Reduce extends Reducer private String name; Override protected void setup（Context context） throws IOException, InterruptedException this.name = co

17、ntext.getConfiguration（）.get（name public void reduce（Text key, Iterable values, Context context） String valueString; String out = ; for （Text value : valueString = value.toString（）; out += valueString + | if （mons.lang.StringUtils.isNotEmpty（this.name） out+=this.name; context.write（key, new Text（out

18、）; public int run（String args） throws Exception Configuration conf = getConf（）; conf.set（mapred.job.tracker, 192.168.2.103:9001mapred.jarD:Hadooplesson4Test_2_m.jar if （args.length = 3） conf.set（, args2）; Job job = new Job（conf, Test_2 / 任务名 job.setJarByClass（Test_2_m.class）; / 指定Class FileInputForm

19、at.addInputPath（job, new Path（args0）; / 输入路径 FileOutputFormat.setOutputPath（job, new Path（args1）; / 输出路径 job.setMapperClass（Map.class）; / 调用上面Map类作为Map任务代码 job.setReducerClass（Reduce.class）; / 调用上面Reduce类作为Reduce任务代码 job.setOutputFormatClass（TextOutputFormat.class）; job.setOutputKeyClass（Text.class）

20、; / 指定输出的KEY的格式 job.setOutputValueClass（Text.class）; / 指定输出的VALUE的格式 job.waitForCompletion（true）; / 输出任务完成情况 System.out.println（任务名称： + job.getJobName（）;任务成功： + （job.isSuccessful（） ?是 :否）;输入行数： + job.getCounters（） .findCounter（org.apache.hadoop.mapred.Task$Counter, MAP_INPUT_RECORDS）.getValue（）;输出行数

21、：MAP_OUTPUT_RECORDS跳过的行： + job.getCounters（）.findCounter（Counter.LINESKIP）.getValue（）; return job.isSuccessful（） ? 0 : 1; * 设置系统说明设置MapReduce任务 public static void main（String args） throws Exception / 判断参数个数是否正确 / 如果无参数运行则显示以作程序说明 if （args.length ! System.err.println（ Test_2 output path System.err .

22、println（Example: hadoop jar /Test_2.jar hdfs:/localhost:9000/home/james/Test_2 hdfs:9000/home/james/outputCounter:t + LINESKIP + Lines which are too short System.exit（-1）; / 记录开始时间 DateFormat formatter = new SimpleDateFormat（yyyy-MM-dd HH:mm:ss Date start = new Date（）; / 运行任务 int res = ToolRunner.run（new Configuration（）, new Test_2_m（）, args）; / 输出任务耗时 Date end = new Date（）; float time = （float）（end.getTime（） - start.getTime（） / 60000.0）;任务开始： + formatter.f

邮箱/手机：
温馨提示：	快捷下载时，用户名和密码都是您填写的邮箱或者手机号，方便查询和重复下载（系统自动生成）。如填写123，账号就是123，密码也是123。
特别说明：	请自助下载，系统不会自动发送文件的哦；如果您已付费，想二次下载，请登录后访问：我的下载记录
支付方式：
验证码：	换一换

账号：
密码：
验证码：	换一换
当日自动登录忘记密码？