(기여도 60%)
rm(list=ls())
install.packages("dplyr")
library(dplyr)
input<-read.csv('C:\\Users\\이해리\\Desktop\\multi\\bosomi_01.28.csv', sep=",")
input
input_ref <-read.table('C:\\Users\\이해리\\Desktop\\multi\\bosomi_ref_01.28.csv',sep = ",", header=TRUE)
input_ref
input_seq <- input %>%
select("seq","pid","hashcode")
input <- input[,c(2:10)] : seq 빼고 나머지칼럼으로 재추출
library(dplyr)
input_unique_hash <- input[,c(-1,-2)] %>%
distinct(hashcode, title, option_name, .keep_all = TRUE)
input_trans <- data.frame(matrix(nrow=0, ncol=0))
for (loop_col in 4:6) {
input_col <- input_unique_hash[,c(1:3,loop_col)]
names(input_col)[4] <- "option_nm"
input_trans <- rbind(input_trans, input_col)
}
input_attr_merged <- merge(input_trans, input_ref, by=c('option_nm'), all.x = TRUE)
#str(input_attr_merged)
input_attr_merged[] <- lapply(input_attr_merged, function(x) if (is.factor(x)) as.character(x) else {x})
input_attr_merged <- input_attr_merged[input_attr_merged$pid_title_ref != "증정",]
library(dplyr)
cntofopt_nm <- input_attr_merged %>%
#cntofopt_nm <- input_attr_merged[input_attr_merged$mark == 99,] %>%
group_by(hashcode, pid_title_ref, title, option_name, option_nm) %>%
summarise(cnt_opt_nm=n())
library(dplyr)
cntofmulti_meta <- cntofopt_nm %>%
group_by(hashcode, pid_title_ref, title, option_name) %>%
summarise(cnt_multi_meta=n())
cntofset <- cntofmulti_meta %>%
group_by(hashcode, title, option_name) %>%
summarise(cnt_set=n())
input_attr <- input_attr_merged %>%
distinct(hashcode, pid_title_ref, title, option_name, option_nm, .keep_all = TRUE)
input_attr <- merge(input_attr, cntofopt_nm, by = c('hashcode', 'pid_title_ref', 'title', 'option_name', 'option_nm'), all.x = TRUE)
input_attr <- merge(input_attr, cntofset, by = c('hashcode', 'title', 'option_name'), all.x = TRUE)
input_attr <- merge(input_attr, cntofmulti_meta, by = c('hashcode', 'pid_title_ref', 'title', 'option_name'), all.x = TRUE)
str(input_attr)
input_attr$cs_group_count_ref <- as.integer(input_attr$cs_group_count_ref)
input_attr$cs_group_count_same <- input_attr$cs_group_count_ref * input_attr$cnt_opt_nm
> input_attr <- input_attr[order(input_attr$hashcode, input_attr$pid_title_ref, input_attr$title, input_attr$option_name, input_attr$option_nm),]
str(input_attr)
nrow(input_attr)
input_attr$num_pid <- 1
for (loop_nrow in 2:nrow(input_attr)) {
input_attr$num_pid[loop_nrow] <- ifelse((input_attr$hashcode[loop_nrow] == input_attr$hashcode[loop_nrow-1]
& input_attr$pid_title_ref[loop_nrow] == input_attr$pid_title_ref[loop_nrow-1]), input_attr$num_pid[loop_nrow-1] + 1, 1)
}
# cnt_pid <- 추출할 row건수 판별
# input_attr 은 정확한 것으로 모두 확정값 (mark = 99)
# set_gift, set_multi_meta <- column 이름 변경 및 정의
#input_attr$cs_unit_count_final <- NULL
input_attr$cnt_pid <- input_attr$cnt_multi_meta - input_attr$num_pid
input_attr$mark <- 99
input_attr$set_gift <- input_attr$cnt_set
input_attr$set_multi_meta <- ifelse(input_attr$cnt_multi_meta == 1, 0, 1)
# multi_meta 인 경우 meta값 생성
for (loop_nrow in 1:nrow(input_attr)) {
input_attr$cs_unit_count[loop_nrow] <- ifelse(input_attr$num_pid[loop_nrow] == 1, input_attr$cs_unit_count_ref[loop_nrow],
paste(input_attr$cs_unit_count[loop_nrow - 1],
input_attr$cs_unit_count_ref[loop_nrow], sep ="|"))
input_attr$cs_group_count[loop_nrow] <- ifelse(input_attr$num_pid[loop_nrow] == 1, input_attr$cs_group_count_same[loop_nrow],
paste(input_attr$cs_group_count[loop_nrow - 1],
input_attr$cs_group_count_same[loop_nrow], sep ="|"))
input_attr$cs_extra_count[loop_nrow] <- ifelse(input_attr$num_pid[loop_nrow] == 1, input_attr$cs_extra_count_ref[loop_nrow],
paste(input_attr$cs_extra_count[loop_nrow - 1],
input_attr$cs_extra_count_ref[loop_nrow], sep ="|"))
input_attr$cs_goods_unit_base_size[loop_nrow] <- ifelse(input_attr$num_pid[loop_nrow] == 1, input_attr$cs_goods_unit_base_size_ref[loop_nrow],
paste(input_attr$cs_goods_unit_base_size[loop_nrow - 1],
input_attr$cs_goods_unit_base_size_ref[loop_nrow], sep ="|"))
input_attr$cs_type_count[loop_nrow] <- ifelse(input_attr$num_pid[loop_nrow] == 1, input_attr$cs_type_count_ref[loop_nrow],
paste(input_attr$cs_type_count[loop_nrow - 1],
input_attr$cs_type_count_ref[loop_nrow], sep ="|"))
input_attr$cs_grade_count[loop_nrow] <- ifelse(input_attr$num_pid[loop_nrow] == 1, input_attr$cs_grade_count_ref[loop_nrow],
paste(input_attr$cs_grade_count[loop_nrow - 1],
input_attr$cs_grade_count_ref[loop_nrow], sep ="|"))
input_attr$cs_gender_count[loop_nrow] <- ifelse(input_attr$num_pid[loop_nrow] == 1, input_attr$cs_gender_count_ref[loop_nrow],
paste(input_attr$cs_gender_count[loop_nrow - 1],
input_attr$cs_gender_count_ref[loop_nrow], sep ="|"))
input_attr$cs_weight_count[loop_nrow] <- ifelse(input_attr$num_pid[loop_nrow] == 1, input_attr$cs_weight_count_ref[loop_nrow],
paste(input_attr$cs_weight_count[loop_nrow - 1],
input_attr$cs_weight_count_ref[loop_nrow], sep ="|"))
input_attr$cs_box_count[loop_nrow] <- ifelse(input_attr$num_pid[loop_nrow] == 1, input_attr$cs_box_count_ref[loop_nrow],
paste(input_attr$cs_box_count[loop_nrow - 1],
input_attr$cs_box_count_ref[loop_nrow], sep ="|"))
}
matched <- input_attr[input_attr$cnt_pid == 0,]
matched <- matched[c(1:4,21:32)]
str(matched)
names(matched)[2] <- 'pid'
sku tool 에서는 filter(필수, 제외 키워드 등) 및 노출상품명에 따라서 해당 노드에 들어오지 않는 경우도 발생
노출상품명에 브랜드등(필수 키워드 등)의 키워드가 없는 경우 들어오지 않기 때문에 강제적으로 복제해 줌)
생성된 matched table에는 위의 경우가 포함되어 있기 때문에 input값에 없는 row가 있을 수 있음
nput과 matched 교집합이 아닌 경우 중
input에만 있는 경우 -> 격리 (mark = 11), default 값으로 meta 입력
matched에만 있는 경우 -> 노출상품명에 브랜드 등 키워드가 없는 경우
input_matched <- merge(input, matched, by = c('hashcode', 'pid', 'title', 'option_name'), all = TRUE)
input_matched$mark <- ifelse(is.na(input_matched$mark), 11, input_matched$mark)
#export <- merge(input, matched, by = c('hashcode', 'pid_title', 'title', 'option_name'), all.x = TRUE)
#export$mark <- ifelse(is.na(export$mark),11,export$mark)
input_matched$set_gift <- ifelse(input_matched$mark == 11, 1, input_matched$set_gift)
input_matched$set_multi_meta <- ifelse(input_matched$mark == 11, 0, input_matched$set_multi_meta)
input_matched$cs_unit_count <- ifelse(input_matched$mark == 11, 9999, input_matched$cs_unit_count)
input_matched$cs_group_count <- ifelse(input_matched$mark == 11, 9999, input_matched$cs_group_count)
input_matched$cs_extra_count <- ifelse(input_matched$mark == 11, 9999, input_matched$cs_extra_count)
input_matched$cs_goods_unit_base_size <- ifelse(input_matched$mark == 11, 9999, input_matched$cs_goods_unit_base_size)
input_matched$cs_type_count <- ifelse(input_matched$mark == 11, 9999, input_matched$cs_type_count)
input_matched$cs_grade_count <- ifelse(input_matched$mark == 11, 9999, input_matched$cs_grade_count)
input_matched$cs_gender_count <- ifelse(input_matched$mark == 11, 9999, input_matched$cs_gender_count)
input_matched$cs_weight_count <- ifelse(input_matched$mark == 11, 9999, input_matched$cs_weight_count)
input_matched$cs_box_count <- ifelse(input_matched$mark == 11, 9999, input_matched$cs_box_count)
input_add <- input %>%
distinct(pid,pid_title,market_pids)
input_matched_1 <- merge(input_matched, input_add, by="pid", all.x=TRUE)
input_matched_1 <- input_matched_1[,-5]
names(input_matched_1)[(ncol(input_matched_1)-1)] <- 'pid_title'
names(input_matched_1)[ncol(input_matched_1)] <- 'market_pids'
input_matched_final <- merge(input_matched_1, input_seq, by=c("hashcode","pid"), all.x=TRUE)
input_matched_final <- input_matched_final %>%
select("seq","pid","pid_title","hashcode","title","option_name","market_pids","mark","set_gift","set_multi_meta","cs_unit_count","cs_group_count","cs_extra_count","cs_goods_unit_base_size","cs_type_count","cs_grade_count","cs_gender_count","cs_weight_count","cs_box_count")
write.csv(input_matched_final, "C:\\Users\\이해리\\Desktop\\multi\\bosomi_result_01.28.csv", row.names=FALSE ) ```