MTJoinHadoop多表连接.docx

上传人:b****7 文档编号:23798220 上传时间:2023-05-20 格式:DOCX 页数:13 大小:16.85KB
下载 相关 举报
MTJoinHadoop多表连接.docx_第1页
第1页 / 共13页
MTJoinHadoop多表连接.docx_第2页
第2页 / 共13页
MTJoinHadoop多表连接.docx_第3页
第3页 / 共13页
MTJoinHadoop多表连接.docx_第4页
第4页 / 共13页
MTJoinHadoop多表连接.docx_第5页
第5页 / 共13页
点击查看更多>>
下载资源
资源描述

MTJoinHadoop多表连接.docx

《MTJoinHadoop多表连接.docx》由会员分享,可在线阅读,更多相关《MTJoinHadoop多表连接.docx(13页珍藏版)》请在冰豆网上搜索。

MTJoinHadoop多表连接.docx

MTJoinHadoop多表连接

import java.io.IOException;

import java.util.*;

 

import org.apache.hadoop.conf.Configuration;

import org.apache.hadoop.fs.Path;

import org.apache.hadoop.io.IntWritable;

import org.apache.hadoop.io.Text;

import org.apache.hadoop.mapreduce.Job;

import org.apache.hadoop.mapreduce.Mapper;

import org.apache.hadoop.mapreduce.Reducer;

import org.apache.hadoop.mapreduce.lib.input.FileInputFormat;

import org.apache.hadoop.mapreduce.lib.output.FileOutputFormat;

import org.apache.hadoop.util.GenericOptionsParser;

 

public class MTjoin{

 

    public static int time =0;

 

    /*

    * 在map中先区分输入行属于左表还是右表,然后对两列值进行分割,

    * 保存连接列在key值,剩余列和左右表标志在value中,最后输出

    */

    public static class Map extends Mapper{

 

        // 实现map函数

        public void map(Objectkey,Textvalue,Contextcontext)

                throws IOException,InterruptedException{

           Stringline=value.toString();// 每行文件

           Stringrelationtype= new String();// 左右表标识

 

            // 输入文件首行,不处理

            if (line.contains("factoryname")== true

                   ||line.contains("addressed")== true){

                return;

           }

 

            // 输入的一行预处理文本

           StringTokenizeritr= new StringTokenizer(line);

           Stringmapkey= new String();

           Stringmapvalue= new String();

            int i=0;

            while (itr.hasMoreTokens()){

                // 先读取一个单词

               Stringtoken=itr.nextToken();

                // 判断该地址ID就把存到"values[0]"

                if (token.charAt(0)>= '0' &&token.charAt(0)<= '9'){

                   mapkey=token;

                    if (i>0){

                       relationtype= "1";

                   } else {

                       relationtype= "2";

                   }

                    continue;

               }

 

                // 存工厂名

               mapvalue+=token+ "";

               i++;

           }

 

            // 输出左右表

           context.write(new Text(mapkey), new Text(relationtype+ "+"+mapvalue));

       }

   }

 

    /*

    *reduce解析map输出,将value中数据按照左右表分别保存,

  * 然后求出笛卡尔积,并输出。

    */

    public static class Reduce extends Reducer{

 

        // 实现reduce函数

        public void reduce(Textkey,Iterablevalues,Contextcontext)

                throws IOException,InterruptedException{

 

            // 输出表头

            if (0== time){

                context.write(new Text("factoryname"), new Text("addressname"));

                time++;

           }

 

            int factorynum=0;

           String[]factory= new String[10];

            int addressnum=0;

           String[] address = new String[10];

 

            Iterator ite=values.iterator();

            while (ite.hasNext()){

               Stringrecord=ite.next().toString();

                int len=record.length();

                int i=2;

                if (0==len){

                    continue;

               }

 

                // 取得左右表标识

                char relationtype=record.charAt(0);

 

                // 左表

                if ('1' ==relationtype){

                   factory[factorynum]=record.substring(i);

                   factorynum++;

               }

 

                // 右表

                if ('2' ==relationtype){

                    address[addressnum]=record.substring(i);

                   addressnum++;

               }

           }

 

            // 求笛卡尔积

            if (0!

=factorynum&&0!

=addressnum){

                for (int m=0;m

                    for (int n=0;n

                        // 输出结果

                       context.write(new Text(factory[m]),

                                new Text(address[n]));

                   }

               }

           }

 

       }

   }

 

    public static void main(String[]args) throws Exception{

       Configurationconf= new Configuration();

        // 这句话很关键

       conf.set("mapred.job.tracker", "192.168.1.2:

9001");

 

       String[]ioArgs= new String[]{ "MTjoin_in", "MTjoin_out" };

       String[]otherArgs= new GenericOptionsParser(conf,ioArgs).getRemainingArgs();

        if (otherArgs.length !

=2){

           System.err.println("Usage:

MultipleTableJoin");

           System.exit

(2);

       }

 

       Jobjob= new Job(conf, "MultipleTableJoin");

       job.setJarByClass(MTjoin.class);

 

        // 设置Map和Reduce处理类

       job.setMapperClass(Map.class);

       job.setReducerClass(Reduce.class);

 

        // 设置输出类型

       job.setOutputKeyClass(Text.class);

       job.setOutputValueClass(Text.class);

 

        // 设置输入和输出目录

       FileInputFormat.addInputPath(job, new Path(otherArgs[0]));

       FileOutputFormat.setOutputPath(job, new Path(otherArgs[1]));

       System.exit(job.waitForCompletion(true)?

0:

1);

   }

}

importjava.io.IOException;

importjava.util.*;

 

importorg.apache.Hadoop.conf.Configuration;

importorg.apache.hadoop.fs.Path;

importorg.apache.hadoop.io.IntWritable;

importorg.apache.hadoop.io.Text;

importorg.apache.hadoop.mapreduce.Job;

importorg.apache.hadoop.mapreduce.Mapper;

importorg.apache.hadoop.mapreduce.Reducer;

importorg.apache.hadoop.mapreduce.lib.input.FileInputFormat;

importorg.apache.hadoop.mapreduce.lib.output.FileOutputFormat;

importorg.apache.hadoop.util.GenericOptionsParser;

 

publicclassMTjoin{

 

  publicstaticinttime=0;

 

  /*

  *在map中先区分输入行属于左表还是右表,然后对两列值进行分割,

  *保存连接列在key值,剩余列和左右表标志在value中,最后输出

  */

  publicstaticclassMapextendsMapper{

 

    //实现map函数

    publicvoidmap(Objectkey,Textvalue,Contextcontext)

        throwsIOException,InterruptedException{

      Stringline=value.toString();//每行文件

      Stringrelationtype=newString();//左右表标识

 

      //输入文件首行,不处理

      if(line.contains("factoryname")==true

          ||line.contains("addressed")==true){

        return;

      }

 

      //输入的一行预处理文本

      StringTokenizeritr=newStringTokenizer(line);

      Stringmapkey=newString();

      Stringmapvalue=newString();

      inti=0;

      while(itr.hasMoreTokens()){

        //先读取一个单词

        Stringtoken=itr.nextToken();

        //判断该地址ID就把存到"values[0]"

        if(token.charAt(0)>='0'&&token.charAt(0)<='9'){

          mapkey=token;

          if(i>0){

            relationtype="1";

          }else{

            relationtype="2";

          }

          continue;

        }

 

        //存工厂名

        mapvalue+=token+"";

        i++;

      }

 

      //输出左右表

      context.write(newText(mapkey),newText(relationtype+"+"+mapvalue));

    }

  }

 

  /*

  *reduce解析map输出,将value中数据按照左右表分别保存,

  *然后求出笛卡尔积,并输出。

  */

  publicstaticclassReduceextendsReducer{

 

    //实现reduce函数

    publicvoidreduce(Textkey,Iterablevalues,Contextcontext)

        throwsIOException,InterruptedException{

 

      //输出表头

      if(0==time){

        context.write(newText("factoryname"),newText("addressname"));

        time++;

      }

 

      intfactorynum=0;

      String[]factory=newString[10];

      intaddressnum=0;

      String[]address=newString[10];

 

      Iteratorite=values.iterator();

      while(ite.hasNext()){

        Stringrecord=ite.next().toString();

        intlen=record.length();

        inti=2;

        if(0==len){

          continue;

        }

 

        //取得左右表标识

        charrelationtype=record.charAt(0);

 

        //左表

        if('1'==relationtype){

          factory[factorynum]=record.substring(i);

          factorynum++;

        }

 

        //右表

        if('2'==relationtype){

          address[addressnum]=record.substring(i);

          addressnum++;

        }

      }

 

      //求笛卡尔积

      if(0!

=factorynum&&0!

=addressnum){

        for(intm=0;m

          for(intn=0;n

            //输出结果

            context.write(newText(factory[m]),

                newText(address[n]));

          }

        }

      }

 

    }

  }

 

  publicstaticvoidmain(String[]args)throwsException{

    Configurationconf=newConfiguration();

    //这句话很关键

 //   conf.set("mapred.job.tracker","192.168.1.2:

9001");

 

 //可使用args

 //   String[]ioArgs=newString[]{"MTjoin_in","MTjoin_out"};

    String[]otherArgs=newGenericOptionsParser(conf,args).getRemainingArgs();

    if(otherArgs.length!

=2){

      System.err.println("Usage:

MultipleTableJoin");

      System.exit

(2);

    }

 

    Jobjob=newJob(conf,"MultipleTableJoin");

    job.setJarByClass(MTjoin.class);

 

    //设置Map和Reduce处理类

    job.setMapperClass(Map.class);

    job.setReducerClass(Reduce.class);

 

    //设置输出类型

    job.setOutputKeyClass(Text.class);

    job.setOutputValueClass(Text.class);

 

    //设置输入和输出目录

    FileInputFormat.addInputPa

展开阅读全文
相关资源
猜你喜欢
相关搜索

当前位置:首页 > 人文社科 > 军事政治

copyright@ 2008-2022 冰豆网网站版权所有

经营许可证编号:鄂ICP备2022015515号-1