Why do you want to mix all the functions into one? I would suggest to keep them separate and write test.merge
to only merge data from 2 outputs.
freq<- function(df, Vars,col.interest){
col.interest=as.data.frame(col.interest)
resultat1= df %>%
group_by(across(all_of(Vars))) %>%
dplyr::summarise(count = n(), frequency.epi = sum(fg), .groups = 'drop')
res=merge(resultat1,col.interest,all=TRUE)
res_final=cbind(df[1:2],res)
return(res_final)
}
mis.test = function(D, R, threshold) {
D = as.data.frame(D)
R = as.data.frame(R)
mismatch.i = function(i) {
dif = purrr::map2_df(D[-1], R[i,-1], `-`)
dif[dif<0] = 0
dif$mismatch=rowSums(dif)
dif = cbind(ID = D[1],IDr=R[i,1], dif)
dif = dif[which(dif$mismatch <= threshold),]
return(list=dif[c(1,2,ncol(dif))])
}
diff.mat = do.call(rbind, lapply(1:nrow(R), function(x) mismatch.i(x)))
diff.mat = as.data.frame(diff.mat)
return(diff.mat)
}
test.merge = function(x, y) {
merge(x,y,by="IDd")
}
test.merge(mis.test(data_D[,c(1,3:7)],data_R[1,c(1,3:7)],2),
freq(data_D,colnames(data_D)[3:7],data_D[3:7]))
# IDd IDr mismatch BTD A B C D E count frequency.epi
#1 1 1 2 A 0 0 1 1 0 5 0.0086
#2 2 1 2 B 0 0 1 1 0 5 0.0086
#3 3 1 2 AB 0 0 1 1 0 5 0.0086
#4 4 1 2 O 0 0 1 1 0 5 0.0086
#5 5 1 2 AB 0 0 1 1 0 5 0.0086
And here is the fix to your original code.
test.merge=function(D,R,threshold,DF, Vars,col.interest){
R=as.data.frame(R)
D=as.data.frame(D)
DF=as.data.frame(DF)
col.interest=as.data.frame(col.interest)
freq.epi<- function(DF, Vars,col.interest){
resultat1= DF %>%
group_by(across(all_of(Vars))) %>%
dplyr::summarise(count = n(), frequency.epi = sum(fg), .groups = 'drop')
res=merge(resultat1,col.interest,all=TRUE)
res_final=cbind(DF[1:2],res)
return(res_final)
}
# same as remark1 for the arguments
mis.test = function(D, R, threshold) {
D = as.data.frame(D)
R = as.data.frame(R)
mismatch.i = function(i) {
dif = purrr::map2_df(D[-1], R[i,-1], `-`)
dif[dif<0] = 0
dif$mismatch=rowSums(dif)
dif = cbind(ID = D[1],IDr=R[i,1], dif)
dif = dif[which(dif$mismatch <= threshold),]
return(list=dif[c(1,2,ncol(dif))])
}
diff.mat = do.call(rbind, lapply(1:nrow(R), function(x) mismatch.i(x)))
diff.mat = as.data.frame(diff.mat)
return(diff.mat)
}
tab=merge(mis.test(D, R, threshold),freq.epi(DF, Vars, col.interest),by="IDd")
return(tab)
}
test.merge(data_D[,c(1,3:7)],data_R[1,c(1,3:7)],2,data_D, colnames(data_D)[3:7],data_D[3:7])
I am sure this could be optimised and written in a better way (as suggested in 1st part) but since I don’t know the bigger picture here I’ll leave this to OP.
CLICK HERE to find out more related problems solutions.