026 使用大数据对网站基本指标PV案例的分析

2022-09-24 18:16:52

案例：

　　使用电商网站的用户行为日志进行统计分析

一：准备

1.指标

　　PV:网页流浪量

　　UV:独立访客数

　　VV:访客的访问数，session次数

　　IP:独立的IP数

2.上传测试数据

3.查看第一条记录

　　注意点（字符显示）：

二：程序

1.分析

　　省份ID-》key

　　value-》1

　　-》 <proviced,list(1,1,1)>

2.数据类型

　　key：Text

　　value：IntWritable

3.map 端的业务

4.reduce端的业务

5.整合运行

6.结果

三：计数器

1.程序

2.结果

　　结果完全吻合。

四：完整程序

1.PV程序

 package com.senior.network;

 import java.io.IOException;

 import org.apache.commons.lang.StringUtils;

 import org.apache.hadoop.conf.Configuration;

 import org.apache.hadoop.conf.Configured;

 import org.apache.hadoop.fs.Path;

 import org.apache.hadoop.io.IntWritable;

 import org.apache.hadoop.io.LongWritable;

 import org.apache.hadoop.io.Text;

 import org.apache.hadoop.mapreduce.Job;

 import org.apache.hadoop.mapreduce.Mapper;

 import org.apache.hadoop.mapreduce.Mapper.Context;

 import org.apache.hadoop.mapreduce.Reducer;

 import org.apache.hadoop.mapreduce.lib.input.FileInputFormat;

 import org.apache.hadoop.mapreduce.lib.output.FileOutputFormat;

 import org.apache.hadoop.util.Tool;

 import org.apache.hadoop.util.ToolRunner;

 public class WebPvCount extends Configured implements Tool{

     //Mapper

     public static class WebPvCountMapper extends Mapper<LongWritable,Text,IntWritable,IntWritable>{

         private IntWritable mapoutputkey=new IntWritable();

         private static final IntWritable mapoutputvalue=new IntWritable(1);

         @Override

         protected void cleanup(Context context) throws IOException,InterruptedException {

         }

         @Override

         protected void setup(Context context) throws IOException,InterruptedException {

         }

         @Override

         protected void map(LongWritable key, Text value, Context context)throws IOException, InterruptedException {

             String lineValue=value.toString();

             String[] strs=lineValue.split("\t");

             if(30>strs.length){

                 return;

             }

             String priviceIdValue=strs[23];

             String urlValue=strs[1];

             if(StringUtils.isBlank(priviceIdValue)){

                 return;

             }

             if(StringUtils.isBlank(urlValue)){

                 return;

             }

             Integer priviceId=Integer.MAX_VALUE;

             try{

                 priviceId=Integer.valueOf(priviceIdValue);

             }catch(Exception e){

                 e.printStackTrace();

             }

             mapoutputkey.set(priviceId);

             context.write(mapoutputkey, mapoutputvalue);

         }

     }

     //Reducer

     public static class WebPvCountReducer extends Reducer<IntWritable,IntWritable,IntWritable,IntWritable>{

         private IntWritable outputvalue=new IntWritable();

         @Override

         protected void reduce(IntWritable key, Iterable<IntWritable> values,Context context)throws IOException, InterruptedException {

             int sum=0;

             for(IntWritable value : values){

                 sum+=value.get();

             }

             outputvalue.set(sum);

             context.write(key, outputvalue);

         }

     }

     //Driver

     public int run(String[] args)throws Exception{

         Configuration conf=this.getConf();

         Job job=Job.getInstance(conf,this.getClass().getSimpleName());

         job.setJarByClass(WebPvCount.class);

         //input

         Path inpath=new Path(args[0]);

         FileInputFormat.addInputPath(job, inpath);

         //output

         Path outpath=new Path(args[1]);

         FileOutputFormat.setOutputPath(job, outpath);

         //map

         job.setMapperClass(WebPvCountMapper.class);

         job.setMapOutputKeyClass(IntWritable.class);

         job.setMapOutputValueClass(IntWritable.class);

         //shuffle

         //reduce

         job.setReducerClass(WebPvCountReducer.class);

         job.setOutputKeyClass(IntWritable.class);

         job.setOutputValueClass(IntWritable.class);

         //submit

         boolean isSucess=job.waitForCompletion(true);

         return isSucess?0:1;

     }

     //main

     public static void main(String[] args)throws Exception{

         Configuration conf=new Configuration();

         //compress

         conf.set("mapreduce.map.output.compress", "true");

         conf.set("mapreduce.map.output.compress.codec", "org.apache.hadoop.io.compress.SnappyCodec");

         args=new String[]{

                 "hdfs://linux-hadoop01.ibeifeng.com:8020/user/beifeng/mapreduce/wordcount/inputWebData",

                 "hdfs://linux-hadoop01.ibeifeng.com:8020/user/beifeng/mapreduce/wordcount/outputWebData1"

         };

         int status=ToolRunner.run(new WebPvCount(), args);

         System.exit(status);

     }

 }

2.计数器

　　这个计数器集中在mapper端。

 package com.senior.network;

 import java.io.IOException;

 import org.apache.commons.lang.StringUtils;

 import org.apache.hadoop.conf.Configuration;

 import org.apache.hadoop.conf.Configured;

 import org.apache.hadoop.fs.Path;

 import org.apache.hadoop.io.IntWritable;

 import org.apache.hadoop.io.LongWritable;

 import org.apache.hadoop.io.Text;

 import org.apache.hadoop.mapreduce.Job;

 import org.apache.hadoop.mapreduce.Mapper;

 import org.apache.hadoop.mapreduce.Mapper.Context;

 import org.apache.hadoop.mapreduce.Reducer;

 import org.apache.hadoop.mapreduce.lib.input.FileInputFormat;

 import org.apache.hadoop.mapreduce.lib.output.FileOutputFormat;

 import org.apache.hadoop.util.Tool;

 import org.apache.hadoop.util.ToolRunner;

 public class WebPvCount extends Configured implements Tool{

     //Mapper

     public static class WebPvCountMapper extends Mapper<LongWritable,Text,IntWritable,IntWritable>{

         private IntWritable mapoutputkey=new IntWritable();

         private static final IntWritable mapoutputvalue=new IntWritable(1);

         @Override

         protected void cleanup(Context context) throws IOException,InterruptedException {

         }

         @Override

         protected void setup(Context context) throws IOException,InterruptedException {

         }

         @Override

         protected void map(LongWritable key, Text value, Context context)throws IOException, InterruptedException {

             String lineValue=value.toString();

             String[] strs=lineValue.split("\t");

             if(30>strs.length){

                 context.getCounter("webPvMapper_counter", "length_LT_30").increment(1L);

                 return;

             }

             String priviceIdValue=strs[23];

             String urlValue=strs[1];

             if(StringUtils.isBlank(priviceIdValue)){

                 context.getCounter("webPvMapper_counter", "priviceIdValue_null").increment(1L);

                 return;

             }

             if(StringUtils.isBlank(urlValue)){

                 context.getCounter("webPvMapper_counter", "url_null").increment(1L);

                 return;

             }

             Integer priviceId=Integer.MAX_VALUE;

             try{

                 priviceId=Integer.valueOf(priviceIdValue);

             }catch(Exception e){

                 context.getCounter("webPvMapper_counter", "switch_fail").increment(1L);

                 e.printStackTrace();

             }

             mapoutputkey.set(priviceId);

             context.write(mapoutputkey, mapoutputvalue);

         }

     }

     //Reducer

     public static class WebPvCountReducer extends Reducer<IntWritable,IntWritable,IntWritable,IntWritable>{

         private IntWritable outputvalue=new IntWritable();

         @Override

         protected void reduce(IntWritable key, Iterable<IntWritable> values,Context context)throws IOException, InterruptedException {

             int sum=0;

             for(IntWritable value : values){

                 sum+=value.get();

             }

             outputvalue.set(sum);

             context.write(key, outputvalue);

         }

     }

     //Driver

     public int run(String[] args)throws Exception{

         Configuration conf=this.getConf();

         Job job=Job.getInstance(conf,this.getClass().getSimpleName());

         job.setJarByClass(WebPvCount.class);

         //input

         Path inpath=new Path(args[0]);

         FileInputFormat.addInputPath(job, inpath);

         //output

         Path outpath=new Path(args[1]);

         FileOutputFormat.setOutputPath(job, outpath);

         //map

         job.setMapperClass(WebPvCountMapper.class);

         job.setMapOutputKeyClass(IntWritable.class);

         job.setMapOutputValueClass(IntWritable.class);

         //shuffle

         //reduce

         job.setReducerClass(WebPvCountReducer.class);

         job.setOutputKeyClass(IntWritable.class);

         job.setOutputValueClass(IntWritable.class);

         //submit

         boolean isSucess=job.waitForCompletion(true);

         return isSucess?0:1;

     }

     //main

     public static void main(String[] args)throws Exception{

         Configuration conf=new Configuration();

         //compress

         conf.set("mapreduce.map.output.compress", "true");

         conf.set("mapreduce.map.output.compress.codec", "org.apache.hadoop.io.compress.SnappyCodec");

         args=new String[]{

                 "hdfs://linux-hadoop01.ibeifeng.com:8020/user/beifeng/mapreduce/wordcount/inputWebData",

                 "hdfs://linux-hadoop01.ibeifeng.com:8020/user/beifeng/mapreduce/wordcount/outputWebData2"

         };

         int status=ToolRunner.run(new WebPvCount(), args);

         System.exit(status);

     }

 }

码农公寓

相关文章