作业流程(今日进度请参阅红框处)
设定资料集路径
2.1 我们有7个模型,每个模型输出3个机率表(官方800字内、官方800字外、测试赛),共21个。
2.2 机率表中有803个栏位,分别是1~800字机率、预测值、实际值及是否正确预测。
2.3 程序码
# 官方800字内机率表路径
offical_in800_1 = read.csv(file = "C:/Users/wooden/Desktop/dl/probCSV/densenet201_v2_2/official_in_800.csv",fileEncoding="UTF-8-BOM")
offical_in800_2 = read.csv(file = "C:/Users/wooden/Desktop/dl/probCSV/resnet152V2_v1_2/official_in_800.csv",fileEncoding="UTF-8-BOM")
offical_in800_3 = read.csv(file = "C:/Users/wooden/Desktop/dl/probCSV/xception_v2_2/official_in_800.csv",fileEncoding="UTF-8-BOM")
offical_in800_ex3 = read.csv(file = "C:/Users/wooden/Desktop/dl/probCSV/inceptionResNetV2_v1_2/official_in_800.csv",fileEncoding="UTF-8-BOM")
offical_in800_ex4 = read.csv(file = "C:/Users/wooden/Desktop/dl/probCSV/densenet201_in800_official_韦智.csv")
offical_in800_ex5 = read.csv(file = "C:/Users/wooden/Desktop/dl/probCSV/prob炫斐/official_in_800.csv",fileEncoding="UTF-8-BOM")
offical_in800_ex6 = read.csv(file = "C:/Users/wooden/Desktop/dl/probCSV/swa_v2/swa_v2_in800_official_韦智.csv")
# 官方800字内机率表之预测值与实际值
names(offical_in800_1)[801:802] = c('predict_word',"origin_word")
names(offical_in800_2)[801:802] = c('predict_word',"origin_word")
names(offical_in800_3)[801:802] = c('predict_word',"origin_word")
names(offical_in800_ex3)[801:802] = c('predict_word',"origin_word")
names(offical_in800_ex4)[801:802] = c('predict_word',"origin_word")
names(offical_in800_ex5)[801:802] = c('predict_word',"origin_word")
names(offical_in800_ex6)[801:802] = c('predict_word',"origin_word")
# 官方800字外机率表路径
offical_noin800_1 = read.csv(file = "C:/Users/wooden/Desktop/dl/probCSV/densenet201_v2_2/official_notin_800.csv",fileEncoding="UTF-8-BOM")
offical_noin800_2 = read.csv(file = "C:/Users/wooden/Desktop/dl/probCSV/resnet152V2_v1_2/official_notin_800.csv",fileEncoding="UTF-8-BOM")
offical_noin800_3 = read.csv(file = "C:/Users/wooden/Desktop/dl/probCSV/xception_v2_2/official_notin_800.csv",fileEncoding="UTF-8-BOM")
offical_noin800_ex3 = read.csv(file = "C:/Users/wooden/Desktop/dl/probCSV/inceptionResNetV2_v1_2/official_notin_800.csv",fileEncoding="UTF-8-BOM")
offical_noin800_ex4 = read.csv(file = "C:/Users/wooden/Desktop/dl/probCSV/densenet201_notin800_official_韦智.csv")
offical_noin800_ex5 = read.csv(file = "C:/Users/wooden/Desktop/dl/probCSV/prob炫斐/official_notin_800.csv",fileEncoding="UTF-8-BOM")
offical_noin800_ex6 = read.csv(file = "C:/Users/wooden/Desktop/dl/probCSV/swa_v2/swa_v2_notin800_official_韦智.csv")
# 测试赛
offical_noin800_1 = read.csv(file = "C:/Users/wooden/Desktop/dl/probCSV/densenet201_v2_2/official_notin_800.csv",fileEncoding="UTF-8-BOM")
offical_noin800_2 = read.csv(file = "C:/Users/wooden/Desktop/dl/probCSV/resnet152V2_v1_2/official_notin_800.csv", fileEncoding="UTF-8-BOM")
offical_noin800_3 = read.csv(file = "C:/Users/wooden/Desktop/dl/probCSV/xception_v2_2/official_notin_800.csv",fileEncoding="UTF-8-BOM")
offical_noin800_ex3 = read.csv(file = "C:/Users/wooden/Desktop/dl/probCSV/inceptionResNetV2_v1_2/official_notin_800.csv",fileEncoding="UTF-8-BOM")
offical_noin800_ex4 = read.csv(file = "C:/Users/wooden/Desktop/dl/probCSV/densenet201_notin800_official_韦智.csv")
offical_noin800_ex5 = read.csv(file = "C:/Users/wooden/Desktop/dl/probCSV/prob炫斐/official_notin_800.csv",fileEncoding="UTF-8-BOM")
offical_noin800_ex6 = read.csv(file = "C:/Users/wooden/Desktop/dl/probCSV/swa_v2/swa_v2_notin800_official_韦智.csv")
找出每个中文字的阈值
3.1 定义function:找出阈值最小值&平均机率
#function:各字准确度&最小值&平均机率(定义阈值)
get_acc_min = function(data_prob){
word = unique(names(data_prob)[1:800])
n = NULL
acc = NULL
mean_prob = NULL
min_prob = NULL
for(i in 1:length(word)){
tmp = data_prob[data_prob$origin_word == word[i],]
n[i] = nrow(tmp)
acc[i] = round(sum(tmp$predict_word == tmp$origin_word)/n[i],4)
if(any(tmp$predict_word == tmp$origin_word)){
min_prob[i] = min(as.numeric(tmp[tmp$predict_word == tmp$origin_word,which(names(tmp) == word[i])]))
mean_prob[i] = mean(as.numeric(tmp[tmp$predict_word == tmp$origin_word,which(names(tmp) == word[i])]))
}
else{
min_prob[i] = 0
mean_prob[i] = 0
}
}
data_summary = data.frame(word = word,acc = acc,min_prob = min_prob,n = n,mean_prob = mean_prob)
return(data_summary)
}
3.2. 找出800个字的阈值,汇整後储存CSV档案
# 取得模型各字准确度&最小值&平均机率
offical_in800_1_summary = get_acc_min(offical_in800_1)
offical_in800_2_summary = get_acc_min(offical_in800_2)
offical_in800_3_summary = get_acc_min(offical_in800_3)
offical_in800_ex3_summary = get_acc_min(offical_in800_ex3)
offical_in800_ex4_summary = get_acc_min(offical_in800_ex4)
offical_in800_ex5_summary = get_acc_min(offical_in800_ex5)
offical_in800_ex6_summary = get_acc_min(offical_in800_ex6)
# 赋值
final = offical_in800_1_summary
final$acc_2 = offical_in800_2_summary$acc
final$min_prob_2 = offical_in800_2_summary$min_prob
final$mean_prob_2 = offical_in800_2_summary$mean_prob
final$acc_3 = offical_in800_3_summary$acc
final$min_prob_3 = offical_in800_3_summary$min_prob
final$mean_prob_3 = offical_in800_3_summary$mean_prob
final$acc_ex3 = offical_in800_ex3_summary$acc
final$min_prob_ex3 = offical_in800_ex3_summary$min_prob
final$mean_prob_ex3 = offical_in800_ex3_summary$mean_prob
final$acc_ex4 = offical_in800_ex4_summary$acc
final$min_prob_ex4 = offical_in800_ex4_summary$min_prob
final$mean_prob_ex4 = offical_in800_ex4_summary$mean_prob
final$acc_ex5 = offical_in800_ex5_summary$acc
final$min_prob_ex5 = offical_in800_ex5_summary$min_prob
final$mean_prob_ex5 = offical_in800_ex5_summary$mean_prob
final$acc_ex6 = offical_in800_ex6_summary$acc
final$min_prob_ex6 = offical_in800_ex6_summary$min_prob
final$mean_prob_ex6 = offical_in800_ex6_summary$mean_prob
names(final) = c("word","acc_1",'min_prob_1',"n",'mean_prob_1',
"acc_2","min_prob_2",'mean_prob_2',
"acc_3","min_prob_3",'mean_prob_3',
"acc_ex3","min_prob_ex3",'mean_prob_ex3',
"acc_ex4","min_prob_ex4",'mean_prob_ex4',
"acc_ex5","min_prob_ex5",'mean_prob_ex5',
"acc_ex6","min_prob_ex6",'mean_prob_ex6')
final = final[,c("word","n",
"acc_1","acc_2","acc_3","acc_ex3","acc_ex4","acc_ex5","acc_ex6",
'min_prob_1',"min_prob_2","min_prob_3","min_prob_ex3","min_prob_ex4","min_prob_ex5","min_prob_ex6",
'mean_prob_1','mean_prob_2','mean_prob_3','mean_prob_ex3','mean_prob_ex4','mean_prob_ex5','mean_prob_ex6'
)]
# 储存中文字标签+ 该字出现n次 + ACC*7 + min_prob_1*7 + mean_prob_1*7个模型
write.csv(final,file = "C:/Users/wooden/Desktop/dl/model/model_weight_V3.csv",row.names = F)
3.3 输出结果(以CSV档显示)
任意选择奇数个模型组合後,产生组合权重表,并利用模型权重得到新的机率表。
4.1 定义function:任意组合模型(奇数个)。
# 任意组合奇数个模型
BitMatrix <- function(n){
set <- 0:(2^n-1)
rst <- matrix(0,ncol = n,nrow = 2^n)
for (i in 1:n){
rst[, i] = ifelse((set-rowSums(rst*rep(c(2^((n-1):0)), each=2^n)))/(2^(n-i))>=1, 1, 0)
}
rst
}
4.2 定义function:以官方800字内资料集机率表,组合模型後产出权重表。并利用模型权重得到新的机率表。
get_new_model = function(namesmodel = c(1),stat = 'acc',dataset = "offical_in800"){
new_stat = NULL
for(i in 1:length(namesmodel)){
if(stat == 'acc'){
eval(parse(text = paste0("final$wei_",namesmodel[i]," = final$acc_",namesmodel[i],"/(",paste0('final$acc_',namesmodel,collapse = "+"),")")))
}
else{
eval(parse(text = paste0("final$wei_",namesmodel[i]," = final$mean_prob_",namesmodel[i],"/(",paste0('final$mean_prob_',namesmodel,collapse = "+"),")")))
}
eval(parse(text = paste0("wei_matrix = matrix(final$wei_",namesmodel[i],",ncol = nrow(",dataset,"_",namesmodel[i],"),nrow = 800)")))
wei_matrix = t(wei_matrix)
if(i == 1){
eval(parse(text = paste0("result = ",dataset,"_",namesmodel[i],"[,1:800]*wei_matrix")))
}
else{
eval(parse(text = paste0("result = result + ",dataset,"_",namesmodel[i],"[,1:800]*wei_matrix")))
}
}
if(dataset != "offical_noin800"){
maxindex = apply(result,1,which.max)
result$acc = final$word[maxindex]
eval(parse(text = paste0("result$acc = ifelse(",dataset,"_",namesmodel[i],"$origin_word == final$word[maxindex],1,0)")))
}
if(dataset == 'test_data'){
eval(parse(text = paste0("result$origin_word = ",dataset,"_",namesmodel[i],"$origin_word")))
}
new_stat = final
eval(parse(text = paste0("new_stat$min_prob_new = ",paste0("new_stat$min_prob_",namesmodel,"*new_stat$wei_",namesmodel,sep = "",collapse = '+'))))
eval(parse(text = paste0("new_stat$mean_prob_new = ",paste0("new_stat$mean_prob_",namesmodel,"*new_stat$wei_",namesmodel,sep = "",collapse = '+'))))
result = list(result,new_stat)
return(result)
}
4.3 输出结果(以CSV档显示)
模型组合权重表(红框处为组合权重)
新机率表(红框处代表是否正确预测,正确预测为1;错误预测为0)
判断isnull
5.1 定义function
# 判断isnull的Function
get_min01 = function(namesmodel = c(1),stat = 'min_prob',dataset = "offical_in800",new_data = NULL,new_stat = NULL){
if(is.null(new_data) & is.null(new_stat)){
for(i in 1:length(namesmodel)){
if(stat == 'min_prob'){
eval(parse(text = paste0("tmp = ",dataset,"_",namesmodel[i],"[,1:800]")))
eval(parse(text = paste0("min_prob_index = final$min_prob_",namesmodel[i])))
min_01 = apply(tmp,1,FUN = function(x){
maxindex = which.max(x)
min_01 = ifelse(x[maxindex] >= min_prob_index[maxindex],0,1)
})
}
else{
eval(parse(text = paste0("tmp = ",dataset,"_",namesmodel[i],"[,1:800]")))
eval(parse(text = paste0("mean_prob_index = final$mean_prob_",namesmodel[i])))
min_01 = apply(tmp,1,FUN = function(x){
maxindex = which.max(x)
min_01 = ifelse(x[maxindex] >= mean_prob_index[maxindex],0,1)
})
}
if(i == 1){
result = min_01
}
else{
result = result + min_01
}
}
result = result/length(namesmodel)
result = ifelse(result >= 0.5,1,0)
}
else{
if(stat == 'min_prob'){
tmp = new_data[,1:800]
min_prob_index = new_stat$min_prob_new
min_01 = apply(tmp,1,FUN = function(x){
maxindex = which.max(x)
min_01 = ifelse(x[maxindex] >= min_prob_index[maxindex],0,1)
})
}
else{
tmp = new_data[,1:800]
mean_prob_index = new_stat$mean_prob_new
min_01 = apply(tmp,1,FUN = function(x){
maxindex = which.max(x)
min_01 = ifelse(x[maxindex] >= mean_prob_index[maxindex],0,1)
})
}
result = min_01
}
return(result)
}
让我们继续看下去...
>>: Day 21- To Do List (8) 利用 HTML Template 呈现资料
今天要来感谢很多事情,把所有的感谢都奉上,以及我先前做的程序码,提供给大家参考。 Photo by ...
今天预计讲解下面两个 (也就是下图的步骤 5) API 的 JSON 内容 把内容加上 Nonce ...
完结洒花 当初只是想藉这个机会督促自己学新东西,还真的没想到能够完赛 xD 这是我第一次写技术文章 ...
今天大概会聊到的范围 layout modifier 上一次讨论到 Modifier 时,觉得自己...
当需要把资料放在一起时,就会需要 Array (阵列)。 小提醒:阵列不是原始资料型别之一。 当有很...