给大厨写的R数据分析代码

2024-03-24 18:12:58
###************************************** 新老客户统计 ***************************************###

dachu <- read.csv("D:\\Dasktop\\bigdata_game\\天池\\大厨\\qijiandiankehu.csv", header = T, encoding = "utf-8", colClasses = c("character", "Date"))

str(dachu)

head(dachu,20)

temp <- table(dachu$买家昵称)

plot(table(sort(temp))/length(temp))

#library(data.table)

#month(dachu$下单日期[nrow(dachu)])

min(dachu$下单日期)

max(dachu$下单日期)

dachu$ym <- substr(dachu$下单日期, 1,7); head(dachu)

newcusts <- c()

oldcusts <- c()

ss <- sort(unique(dachu$ym))

#新客户满足一下两个条件：1）当月购买一次；2）之前无购买记录

#老客户满足一下两个条件之一：1）当月购买两次及以上；2）当月购买一次且之前有购买记录

for(i in 1:length(ss)){

  #date1 = as.Date(paste(substr(kk, 1, 6), paste(as.integer(substr(kk, 7, 7))+1,"-01", sep = ""), sep = ""))

  if(i == 1){

    date2 = as.Date(paste(ss[i+1], "-01", sep = ""))

    now = dachu$买家昵称[dachu$下单日期 < date2]

    temp = table(now)

    uniq = unique(now)

    newcusts = c(newcusts, sum(temp == 1))

    oldcusts = c(oldcusts, sum(temp > 1))

  }else if(i < length(ss)){

    date1 = as.Date(paste(ss[i], "-01", sep = ""))

    date2 = as.Date(paste(ss[i+1], "-01", sep = ""))

    now = dachu$买家昵称[(dachu$下单日期 < date2) & (dachu$下单日期 >= date1)]

    temp = table(now)

    #old_now = names(temp)[temp>1]

    new_now = names(temp)[temp==1]

    temp2 = table(c(uniq, new_now))

    newcusts = c(newcusts, (length(new_now) - sum(temp2 > 1)))

    #oldcusts = c(oldcusts, (length(old_now) + sum(temp2 > 1)))

    oldcusts = c(oldcusts, (length(temp) - length(new_now) + sum(temp2 > 1)))

    #uniq = unique(c(uniq, old_now, new_now))

    uniq = unique(c(uniq, names(temp)))

  }else{

    date1 = as.Date(paste(ss[i], "-01", sep = ""))

    now = dachu$买家昵称[dachu$下单日期 >= date1]

    temp = table(now)

    #old_now = names(temp)[temp>1]

    new_now = names(temp)[temp==1]

    temp2 = table(c(uniq, new_now))

    newcusts = c(newcusts, (length(new_now) - sum(temp2 > 1)))

    #oldcusts = c(oldcusts, (length(old_now) + sum(temp2 > 1)))

    oldcusts = c(oldcusts, (length(temp) - length(new_now) + sum(temp2 > 1)))

    #uniq = unique(c(uniq, old_now, new_now))

    uniq = unique(c(uniq, names(temp)))

  }

}

newcusts

oldcusts

(newcusts1 = cbind(date=ss, newcusts))

(oldcusts1 = cbind(date=ss, oldcusts))

write.csv(newcusts1, "C:\\Users\\hasee\\Desktop\\newcusts.csv",quote = F)

write.csv(oldcusts1, "C:\\Users\\hasee\\Desktop\\oldcusts.csv",quote = F)

#library(timeSeries)

win.graph()

opar <- par(no.readonly=TRUE)

par(lty=1, pch=1)  #par("cex") 查看默认值

# plot.ts(ts(newcusts+oldcusts, start = c(2014, 3), frequency = 12),main="薏凡特月度新老客户购买数量变化趋势", col=1)

# lines(ts(newcusts, start = c(2014, 3), frequency = 12), col=2)

# lines(ts(oldcusts, start = c(2014, 3), frequency = 12), col=3)

time <- seq.Date(as.Date("2014/3/1"), by = "month", length = length(ss))

plot(time, newcusts+oldcusts, xlab="月份", ylab="客户数", main="薏凡特月度新老客户购买数量变化趋势",

     type = "o", col=1)

# type画点/线, "p" for points, "l" for lines, "b" for both points and lines, "c" for empty points joined by lines,

# "o" for overplotted points and lines, "s" and "S" for stair steps and "h" for histogram-like vertical lines.

# Finally, "n" does not produce any points or lines.

# pch点型,

# cex点大小：

# lty线型：0=blank, 1=solid (default), 2=dashed, 3=dotted, 4=dotdash, 5=longdash, 6=twodash)

# lwd线宽

lines(time, newcusts, type = "o", col=2)

lines(time, oldcusts, type = "o", col=3)

legend("topright", c("总体客户", "新客户", "老客户"), col=1:3, lty=1, pch=1)

# “bottomright”, “bottom”, “bottomleft”, “left”, “topleft”, “top”, “topright”, “right”, “center”

par(opar)

#par(new=TRUE)

###************************************** 当月回购率 ***************************************###

# 月初统计购买一次的客户数，月末统计这部分人回购人数。

# 当月新进的客户且购买2次以上的不计入新客户

# 新客户可直接table=1的sum，但是当月回购的客户如何计算是难点。(可以unique内连接计数)

#数据导入

dachu <- read.csv("D:\\Dasktop\\bigdata_game\\天池\\大厨\\qijiandiankehu.csv", header = T, encoding = "utf-8", colClasses = c("character", "Date"))

str(dachu)

#定义保存新客户回购数据

new_customer <- data.frame()

min(dachu$下单时间)

max(dachu$下单时间)

ss=sort(unique(substr(dachu$下单时间,1,7)))

#从第二个月开始，首月新客数和回购数均为0

for(i in seq(length(ss))[-1]){

  data1 = as.Date(paste(ss[i], "-01", sep = ""))

  #月初之前客户购买记录

  data2 = max(i-12,1)

  data2 = as.Date(paste(ss[data2], "-01", sep = ""))

  temp <- table(dachu$买家昵称[(dachu$下单时间 >= data2)&(dachu$下单时间 < data1)])

  #月内客户购买记录

  if(i < length(ss)){

    data2 = as.Date(paste(ss[i+1], "-01", sep = ""))

    temp2 <- table(dachu$买家昵称[(dachu$下单时间 >= data1)&(dachu$下单时间 < data2)])

  }else{

    temp2 <- table(dachu$买家昵称[dachu$下单时间 >= data1])

  }

  #月内回购记录

  temp2 = merge(data.frame(k=names(temp)[temp==1]),

                data.frame(k=names(temp2)),

                by=c('k'))

  #保存日期、月初新客数、月内回购数

  new_customer = rbind(new_customer, data.frame(date=ss[i], counts=sum(temp==1), repurchase=nrow(temp2)))

}

#计算回购率

new_customer$rate <- new_customer[[3]] / new_customer[[2]]

#colnames(new_customer) = c('date','counts','repurchase','rate')

win.graph()

opar<-par(mfrow=c(2,2))

plot(new_customer$date,new_customer$counts)

plot(new_customer$date,new_customer$repurchase);plot(new_customer$date,new_customer$rate)

par(opar)

write.csv(new_customer,"C:\\Users\\hasee\\Desktop\\new_customer.csv")

###****************************************** 季度转化率 ****************************************###

#数据导入

dachu <- read.csv("C:\\Users\\hasee\\Desktop\\qijiandiankehu.csv", header = T, encoding = "utf-8", colClasses = c("character", "Date"))

str(dachu)

#定义保存新客户回购数据

new_customer <- data.frame()

min(dachu$下单时间)

max(dachu$下单时间)

ss=sort(unique(substr(dachu$下单时间,1,7)))

#从第二个月开始，首月新客数和回购数均为0

for(i in seq(length(ss)-2)[-1]){

  data1 = as.Date(paste(ss[i], "-01", sep = ""))

  #季度初之前客户购买记录

  temp <- table(dachu$买家昵称[dachu$下单时间 < data1])

  #季度内客户购买记录

  if(i < length(ss)-2){

    data2 = as.Date(paste(ss[i+3], "-01", sep = ""))

    temp2 <- table(dachu$买家昵称[(dachu$下单时间 >= data1)&(dachu$下单时间 < data2)])

  }else{

    temp2 <- table(dachu$买家昵称[dachu$下单时间 >= data1])

  }

  #季度内回购记录

  temp2 = merge(data.frame(k=names(temp)[temp==1]),

                data.frame(k=names(temp2)),

                by=c('k'))

  #保存日期、季度初新客数、月内回购数

  new_customer = rbind(new_customer, data.frame(date=ss[i], counts=sum(temp==1), repurchase=nrow(temp2)))

}

#计算回购率

new_customer$rate <- new_customer[[3]] / new_customer[[2]]

#colnames(new_customer) = c('date','counts','repurchase','rate')

win.graph()

opar<-par(mfrow=c(2,2))

plot(new_customer$date,new_customer$counts)

plot(new_customer$date,new_customer$repurchase);plot(new_customer$date,new_customer$rate)

par(opar)

write.csv(new_customer,"C:\\Users\\hasee\\Desktop\\new_customer.csv")

###************************************ 客户连带率：该段代码貌似有问题 ***********************************###

# 只针对所有一次客户

# 月连带率=本月发生连带的客户数/本月成交总客户数

# 产品连带率=购买该产品连带的客户数/购买该产品总体客户数

# 成交总客户=1次多件客户+一次一件客户

#数据导入

library(readxl)

# dachu <- read.csv("C:\\Users\\hasee\\Desktop\\liandailv.xlsx", header = T, encoding = "utf-8", colClasses = c("character", "Date", "character"))

# read_excel(path, sheet = 1, col_names = TRUE, col_types = NULL, na = "", skip = 0)

dachu <- read_excel("C:\\Users\\hasee\\Desktop\\liandailv.xlsx", sheet = 1, col_names = TRUE, col_types = c("text", "text", "text"), na = "", skip = 0)

dachu$下单日期 <- as.Date(dachu$下单日期)

str(dachu)

unique(dachu$商品ID)

#定义保存月度连带率

min(dachu$下单日期)

max(dachu$下单日期)

month_set=sort(unique(substr(dachu$下单日期,1,7)))

#月度连带率

month_associate_rate = data.frame()

date1 = min(dachu$下单日期)

for(i in seq(length(month_set))){

  if(i < length(month_set)){

    date2 = as.Date(paste(month_set[i+1], "-01", sep = ""))

    temp <- table(dachu$买家昵称[(dachu$下单日期 >= date1)&(dachu$下单日期 < date2)])

    date1 = date2

  }else{

    temp = table(dachu$买家昵称[dachu$下单日期 >= date1])

  }

  month_associate_rate = rbind(month_associate_rate, data.frame(month=month_set[i], count = length(temp), count2= sum(temp>1), rate=(sum(temp>1)/length(temp))))

}

month_associate_rate

#产品连带率

dachu$flag <- 0

head(dachu)

temp = table(dachu$买家昵称)

# library(dplyr)

# temp2 = left_join(dachu, data.frame(x = names(temp)[temp>1], flag.y = 1), by= c("买家昵称" = "x"),suffix = c("", ".y"))

temp2 = merge(dachu, data.frame(x = names(temp)[temp>1], flag.x = 1), by.x = "买家昵称", by.y = "x", all.x = TRUE)

temp2$flag[temp2$flag.x==1] = 1

temp2$flag.x = NULL

temp2

#定义保存产品连带率

prod_set=unique(dachu$商品ID)

product_associate_rate = data.frame()

#产品连带率

for(pi in prod_set){

  temp <- temp2$flag[temp2$商品ID == pi]

  product_associate_rate = rbind(product_associate_rate, data.frame(product=pi, count = length(temp), count2= sum(temp==1), rate=(sum(temp==1)/length(temp))))

}

product_associate_rate = product_associate_rate[order(product_associate_rate$count, decreasing = TRUE),]

product_associate_rate$product = as.character(product_associate_rate$product)

head(product_associate_rate)  

#验证

dachu[dachu$买家昵称 %in% dachu[dachu$商品ID=="42303520877",]$买家昵称,]

#产品连带率前五月度变化

#temp2为产品连带率里计算的那个

prod_set = product_associate_rate$product[1:5]

product_associate_rate_top5 = data.frame()

date1 = min(temp2$下单日期)

for(i in seq(length(month_set))){

  if(i < length(month_set)){

    date2 = as.Date(paste(month_set[i+1], "-01", sep = ""))

    temp <- temp2[(temp2$下单日期 >= date1)&(temp2$下单日期 < date2),]

    date1 = date2

  }else{

    temp = temp2[temp2$下单日期 >= date1,]

  }

  temp3 = data.frame(month=month_set[i])

  for(pi in prod_set){

    temp4 = temp$flag[temp$商品ID==pi]

    temp3 = cbind(temp3, length(temp4), sum(temp4==1), ifelse(length(temp4)==0,0,sum(temp4==1)/length(temp4)))

  }

  product_associate_rate_top5 = rbind(product_associate_rate_top5, temp3)

}

colnames(product_associate_rate_top5)[-1] <- paste('top',rep(1:5,each=3),c('count','count2','rate'),sep = '')

product_associate_rate_top5

#图形展示

win.graph()

opar<-par(mfrow=c(1,2))

plot(month_associate_rate$month, month_associate_rate$rate, type="l", col = "blue", main = "月度连带率", xlab = "月份", ylab="连带率")

plot(product_associate_rate$rate, main = "产品连带率", xlab = "产品", ylab="连带率")

par(opar)

write.csv(month_associate_rate,"C:\\Users\\hasee\\Desktop\\month_associate_rate.csv")

write.csv(product_associate_rate,"C:\\Users\\hasee\\Desktop\\product_associate_rate.csv") #, quote = TRUE

write.csv(product_associate_rate_top5,"C:\\Users\\hasee\\Desktop\\product_associate_rate_top5.csv") #, quote = TRUE

# dplyr包包含了各种关联查询的函数，如inner_join,left_join,full_join,rigth_join......

library(dplyr)

library("nycflights13")

# Drop unimportant variables so it's easier to understand the join results.

flights2 <-

  flights %>%

  select(year:day,tailnum, carrier)

flights2 %>%

  left_join(airlines,by= "carrier")

#merge(data.frame(x=1:3,y=0,z=2),data.frame(x=2:3,y=1:2),by=c("x"),all.x = T)

###******************************************* 回购率与首次消费金额关系 ********************************************###

dachu <- read.csv("D:\\Dasktop\\bigdata_game\\天池\\大厨\\suoyoukehushuju.csv", header = T, encoding = "utf-8", colClasses = c("character", "Date", "numeric"))

str(dachu)

head(dachu,20)

library(dplyr)

temp=head(dachu,20)

temp = head(arrange(dachu, 买家昵称, desc(下单时间)), 100);temp

#flights[order(flights$year, flights$month, flights$day), ]

#flights[order(desc(flights$arr_delay)), ]

#filter(group_by(temp, 买家昵称))

temp <- dachu%>%

  arrange(买家昵称, 下单时间) %>%

  group_by(买家昵称)%>%

  mutate(count = n())%>%

  slice(1)%>%

  filter()

win.graph()

opar<-par(mfrow=c(1,2))

#实付金额——购买次数分布图

plot(temp$实付金额, temp$count)

#实付金额——频数(人次)分布图

plot(table(temp$实付金额))

par(opar)

#通过第一个图，暂且分组0-1000等距每200,1000-2000,2000以上

temp$group <- 0

temp[temp$实付金额 < 1000, ]$group <- temp[temp$实付金额 < 1000, ]$实付金额 %/% 100

temp[(temp$实付金额 >= 1000) & (temp$实付金额 < 2000), ]$group <- 10

temp[temp$实付金额 >= 2000, ]$group <- 11

head(temp,20)

temp2 <- temp%>%

  group_by(group)%>%

  summarise(n1=sum(count>1), n2=n(), rate = n1/n2)

win.graph()

#各组回购率分布图

plot(temp2$group, temp2$rate)

# i <- c("gamma","a")

# switch(i,

#        beta = "You typed beta",

#        alpha = "You typed alpha",

#        gamma = "You typed gamma",

#        delta = "You typed delta"

# )

###******************************************* 客户联带对回购的影响 *******************************************###

t0 <- Sys.time()

dachu <- read.csv("D:\\Dasktop\\bigdata_game\\天池\\大厨\\AnalysisOrderDownLoad-订单信息-子订单（全量）-10027396-8025-107.csv",

                  header = T, encoding = "utf-8", colClasses = c(rep("character",4), rep("Date",3), rep("character",5), "integer","numeric","character",rep("numeric",2)))

str(dachu)

dachu <- dachu[,4:5]

head(dachu)

dachu$买家昵称 <- substr(dachu$买家昵称,3,nchar(dachu$买家昵称)-1)

head(dachu,20)

library(dplyr)

#首单购买件数回购率

temp <- dachu %>%

  group_by(买家昵称, 下单时间) %>%

  summarise(count=n()) %>%

  arrange(买家昵称, 下单时间) %>%

  group_by(买家昵称) %>%

  mutate(count2=n()) %>%

  slice(1) %>%

  group_by(count) %>%

  mutate(n1 = n(), n2 = sum(count2>1), rate = n2/n1) %>%

  slice(1) %>%

  select(count, n1, n2, rate)

temp

win.graph()

plot(temp$count, temp$rate, main="首单购买件数与回购率", xlab = "首单购买件数",

     ylab = "回购客户占比", col="red")

#按月计算新客中回购客户占比

temp <- dachu %>%

  group_by(买家昵称, 下单时间) %>%

  summarise(count=n()) %>%  #连带件数

  mutate(year=as.integer(substr(下单时间,1,4)),

         month=as.integer(substr(下单时间,6,7))) %>%

  arrange(买家昵称, 下单时间) %>%

  group_by(买家昵称) %>%

  mutate(count2=n()) %>%  #回购次数

  slice(1) %>%  #第一次出现(前面的按时间排序不可少)即为新客

  group_by(year, month) %>%

  mutate(n1 = n(), n2 = sum(count>1), rate = n2/n1) %>%

  slice(1) %>%

  select(下单时间, year, month, n1, n2, rate)

temp

win.graph()

time <- seq.Date(as.Date(paste(substr(min(temp$下单时间),1,7), "-01", sep = "")),

                 by = "month", length = nrow(temp))

plot(time, temp$rate, main = "各月新客中连带客户占比", xlab = "月份",

     ylab = "首单购买多件客户占比", type = "l")

#按订单统计连带率(即购买多件订单与总订单之比)

temp <- dachu %>%

  group_by(买家昵称, 下单时间) %>%

  summarise(count=n())

sum(temp$count>1)/nrow(temp)

Sys.time()-t0

###############################################################################################################

#setwd("H:/数据分析/内部数据/薏凡特旗舰店数据/旗舰店客户数据分析/0803")

setwd("D:\\Dasktop\\bigdata_game\\天池\\大厨")

dat <- read.csv("kehushuju.csv",header=TRUE,encoding="utf-8",colClasses=c("character","Date","integer","numeric","integer"),stringsAsFactors = F)

dat <- arrange(dat, 买家昵称, 下单日期)

head(dat)

# new_dat<-unique(dat)  #数据量多时，计算量很大，而且基本不会出现重复记录，所以可以省略

# head(new_dat)

library(dplyr)

##回购次数与回购概率

###

temp <- dat %>%

  group_by(买家昵称)%>%

  summarise(count=n())

head(temp)

rr1 <- c()

rr2 <- c()

rate <- c()

max_count <- max(temp$count)

for (i in 1:(max_count-1)){    ###可能会出错，rate分母=0

  # rr1[i] <- summarise(filter(temp,count==i+1),n())

  # rr2[i] <- summarise(filter(temp,count>=i),n())

  # rate[i] <- summarise(filter(temp,count==i+1),n())/summarise(filter(temp,count>=i),n())

  rr1 <- c(rr1, sum(temp$count == i+1))  #效率更高

  rr2 <- c(rr2, sum(temp$count >= i))

  rate <- c(rate, rr1[i]/rr2[i])  #避免重复计算

}

temp2<-filter(temp,count>=2)

head(temp2)

rrr<-cbind(rr1,rr2,rate)

rrr

# write.csv(rrr,"H:/数据分析/内部数据/薏凡特旗舰店数据/旗舰店客户数据分析/0803/rrr.csv")

#计算回购周期#####

#添加购买次数列

new_dat2 <- select(dat, 买家昵称,下单日期, 下单时点)

# new_dat2<-data.frame(new_dat2)   #已经是数据框结构，而且即便转换格式此处也不对，应该为：new_dat2<-as.data.frame(new_dat2)

# new_dat2<-unique(new_dat2)

# head(new_dat2)

# temp2<-group_by(new_dat2,买家昵称)

# temp2<-summarise(temp2,count=n())

# temp2 <- new_dat2 %>%

#   group_by(买家昵称) %>%

#   summarise(count=n())

# head(temp2)

# count2<-unique(temp2$count)

#

# new_dat2$counts=0

# for(i in count2){

#   rg<-temp[temp2$count==i,]$买家昵称;

#   new_dat2[new_dat2$买家昵称 %in% rg,]$counts=i

#

# }

new_dat2 <- merge(new_dat2, temp, by=c('买家昵称'))

head(new_dat2)

# old_dat<-filter(new_dat2,counts>=2)

# old_dat<-arrange(old_dat,下单日期)

# old_dat <- new_dat2 %>%    ##此处太慢，后面给出改进方法

#   filter(count>=2) %>%

#   arrange(下单日期)

# # old_dat<-unique(old_dat)

# head(old_dat)

# #max_count2<-max(old_dat$counts)

# #num<-c(1:max_count2)

# rebuy<-c()

# redays<-c()

# # t=1

# for(i in unique(old_dat$买家昵称) ){

#   rg<-filter(old_dat,old_dat$买家昵称==i)

#

#   for(j in 1:(rg$count[1]-1))

#   {

#     #t_diff <- rg$下单日期[j+1] - rg$下单日期[j]

#     t_diff <- as.integer(rg$下单日期[j+1] - rg$下单日期[j])

#     # rebuy[t]=j+1

#     # redays[t]=t_diff

#     # t=t+1

#     rebuy = c(rebuy,j+1)

#     redays = c(redays,t_diff)

#   }

# }

#

# head(rebuy)

# head(redays)

# mydata<-data.frame(rebuy,redays)

# #write.csv(mydata,"H:/数据分析/内部数据/薏凡特旗舰店数据/旗舰店客户数据分析/0803/mydata.csv")

# head(mydata)

###各时点回购人数占比

#不考虑时间因素时

rate <- data.frame()

for(i in sort(unique(dat$下单时点))){

  temp2 = new_dat2[new_dat2$下单时点 == i,]$count

  rate = rbind(rate, c(i, sum(temp2>1)/length(temp2)))

}

colnames(rate) <- c("下单时点", "rate")

rate

#考虑时间因素时

###如果考虑时间因素，则需加以下代码

new_dat3 <- arrange(new_dat2, 买家昵称, 下单日期)  #最好加排序，防止出错

head(new_dat3, 50)

# for(i in temp$买家昵称){  #由于循环较大故运行时间较长

#   new_dat3[new_dat3$买家昵称 == i,]$count <- 1:(temp[temp$买家昵称 == i,]$count)

# }

# head(new_dat3, 50)

#改进后,此方法必须对数据先排序！！

# t0 <- Sys.time()

# i <- 1; nmax <- nrow(new_dat3)

# repeat{

#   #m = i

#   n = new_dat3[i,4]

#   #ss = new_dat3[i,1]

#   # repeat{

#   #   i <- i + 1

#   #   if((new_dat3[i,1] != ss) | (i > nmax)){

#   #     new_dat3[m:(i-1),4] <- 1:new_dat3[m,4]

#   #     break

#   #   }

#   # }

#   new_dat3[i:(i + n - 1),4] <- 1:n

#   i = i+n

#   if(i > nmax)  break

# }

# Sys.time()-t0

#

# t0 <- Sys.time()

# i <- 1; nmax <- nrow(new_dat3)

# while(i <= nmax){

#   #m = i

#   n = new_dat3[i,4]

#   #ss = new_dat3[i,1]

#   # repeat{

#   #   i <- i + 1

#   #   if((new_dat3[i,1] != ss) | (i > nmax)){

#   #     new_dat3[m:(i-1),4] <- 1:new_dat3[m,4]

#   #     break

#   #   }

#   # }

#   new_dat3[i:(i + n - 1),4] <- 1:n

#   i = i+n

# }

# Sys.time()-t0

t0 <- Sys.time()

for(i in sort(unique(temp$count))){  #必须加sort排序

  df = (new_dat3$count == i)

  new_dat3[df, 4] <- rep(1:i, sum(df)/i)

}

Sys.time()-t0

head(new_dat3, 50)

tail(new_dat3,50)

#计算

rate2 <- data.frame(下单时点=c(), rate=c())

for(i in sort(unique(dat$下单时点))){

  temp2 = new_dat3[new_dat3$下单时点 == i,]$count

  rate2 = rbind(rate2, c(i, sum(temp2>1)/length(temp2)))

}

colnames(rate2) <- c("下单时点", "rate")

rate2

#改进方法

new_dat3$t_diff <- as.integer(new_dat3$下单日期 - c(new_dat3$下单日期[1], new_dat3$下单日期[-nrow(new_dat3)]))

head(new_dat3)

new_dat3$t_diff[new_dat3$count==1] <- 0

mydata <- new_dat3 %>%

  select(count, t_diff) %>%

  filter(count > 1) %>%

  rename(rebuy = count, redays = t_diff)

head(mydata)

plot(mydata)

#各次购买5天内回购情况

new_dat3$m5 <- (new_dat3$t_diff <5)

new_dat3$m5[new_dat3$count == 1] <- 0

setwd("H:/数据分析/内部数据/薏凡特旗舰店数据/旗舰店客户数据分析/0803/自我研究")

dat<-read.csv("kehushuju.csv",header=T,encoding="utf-8",colClasses=c("character","Date","integer","numeric","integer"))

head(dat)

library(dplyr)

dat1<-arrange(dat,下单日期)

head(dat1)

m=5 #定义回购周期，M=5表示客户在5天内回购

counts<-c(rep(0,length(dat1[,2])))

t0<-Sys.time()

for(i in 1:length(dat1[,2])){

  t_run<-dat1[,2][i]+m

  goal_dat1<-filter(dat1,下单日期<=t_run)

  if(length(filter(goal_dat1,goal_dat1$买家昵称==dat1[,1][i])[,1])>=2){

    counts[i]<-1

  }

}

tt<-Sys.time()-t0

head(counts)

end_dat5<-cbind(dat1,counts)

write.csv(end_dat5,"H:/数据分析/内部数据/薏凡特旗舰店数据/旗舰店客户数据分析/0803/自我研究/end_dat5.csv")
码农公寓

相关文章