Verified Commit 70632069 authored by Cody L Marquart's avatar Cody L Marquart
Browse files

Simplified merging of sets for tests

parent 4c9780f6
Pipeline #45848 passed with stages
in 3 minutes and 56 seconds
......@@ -124,7 +124,9 @@ Code = R6::R6Class("Code",
colnames(self$secondRaterSet) = c("ID", "X1");
}
if(is.null(computerSet)) {
self$computerSet = rep(NA, length(codeSet$excerpts));
# self$computerSet = rep(NA, length(codeSet$excerpts));
self$computerSet = matrix(ncol = 2, nrow = 0);
colnames(self$computerSet) = c("ID", "X1");
}
self$ignoredSet = ignoredSet;
......
......@@ -37,13 +37,13 @@ test <- function(code, kappa_threshold = 0.65, baserate_inflation = 0.2) {
first_v_second <- NULL
second_v_classifier <- NULL
if(nrow(code$testSet) > 0) {
to.test <- merge(code.to.use$computerSet, code.to.use$testSet, by = "ID")
to.test <- merge(code.to.use$computerSet, code.to.use$testSet, by = 1)
if(!any(is.na(to.test[, 2:3])))
first_v_classifier_test = calc_statistics(to.test)
}
if(nrow(code.to.use$trainingSet) > 0) {
to.test = merge(code.to.use$computerSet, code.to.use$trainingSet, by = "ID")
to.test = merge(code.to.use$computerSet, code.to.use$trainingSet, by = 1)
to.test[is.na(to.test[,3]), 3] = abs(to.test[is.na(to.test[,3]), 2] - 1)
first_v_classifier_train = calc_statistics(to.test)
......@@ -56,11 +56,11 @@ test <- function(code, kappa_threshold = 0.65, baserate_inflation = 0.2) {
if(nrow(code.to.use$secondRaterSet) > 0) {
first_set <- rbind(code.to.use$testSet, code.to.use$trainingSet)
to.test <- merge(first_set, code.to.use$secondRaterSet, by = "ID", all = TRUE)
to.test <- merge(first_set, code.to.use$secondRaterSet, by = 1, all = TRUE)
to.test <- to.test[rowSums(!is.na(to.test[,2:3]) * 1) > 1, ]
first_v_second <- calc_statistics(to.test)
to.test <- merge(code.to.use$computerSet, code.to.use$secondRaterSet, by = "ID", all = TRUE)
to.test <- merge(code.to.use$computerSet, code.to.use$secondRaterSet, by = 1, all = TRUE)
to.test <- to.test[rowSums(!is.na(to.test[,2:3]) * 1) > 1, ]
second_v_classifier <- calc_statistics(to.test)
}
......
......@@ -13,7 +13,7 @@ differences <- function(code = NULL, wh = "trainingSet", to = "computerSet") {
comparing = merge.sets(code, wh, to);
# comparing[comparing[,2] != comparing[,3], 1]
# comparing[rowSums(comparing[,-c(1)]) == 1, 1]
comparing[comparing[,2] != comparing[,3],1]
comparing[comparing[,2] != comparing[,3], 1]
}
merge.sets <- function(code, wh = "trainingSet", to = "computerSet") {
......@@ -30,9 +30,11 @@ merge.sets <- function(code, wh = "trainingSet", to = "computerSet") {
# compare.to = data.frame(ID=ids.to.use, X1=code[[to]][ids.to.use])
# }
merge(unique(code[[wh]]), unique(compare.to), sort = F, by = "ID")
# merge(unique(code[[wh]]), unique(compare.to), sort = F, by = "ID")
merge(unique(code[[wh]]), unique(compare.to), sort = F, by = 1)
} else {
data.frame(ID=NULL, X1=NULL)
# data.frame(ID=NULL, X1=NULL)
structure(list(ID = NULL, X1 = NULL, X2 = NULL), class = "data.frame")
}
}
......
......@@ -77,30 +77,14 @@ getHandSetIndices2 = function(
positives = positives - 1;
this.set = c(this.set, randIndice) # only adding positive indices to handset
# codeToUse$computerSet = rbind(codeToUse$computerSet, data.frame("ID" = randIndice, "X1" = autocoded))
codeToUse$computerSet = rbind(
codeToUse$computerSet,
structure(
list( ID=randIndice, X1=autocoded),
row.names=c(NA, -1L),
class="data.frame",
names = names(codeToUse$computerSet)
)
)
codeToUse$computerSet = rbind(codeToUse$computerSet, c(randIndice, autocoded))
}
else{ # deal with nonPositive excerpt case
if(maxNonPositives != 0){ # room in handSet to add nonPositive -> add it
this.set = c(this.set, randIndice)
maxNonPositives = maxNonPositives - 1;
# codeToUse$computerSet = rbind(codeToUse$computerSet, data.frame("ID" = randIndice, "X1" = autocoded))
codeToUse$computerSet = rbind(
codeToUse$computerSet,
structure(
list( ID=randIndice, X1=autocoded),
row.names=c(NA, -1L),
class="data.frame",
names = names(codeToUse$computerSet)
)
)
codeToUse$computerSet = rbind(codeToUse$computerSet, c(randIndice, autocoded))
}
else{ # if max number of nonPositve indices is reached, save indice, but don't add to set
codeToUse$touchedIndices = c(codeToUse$touchedIndices, randIndice)
......
Markdown is supported
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment