Verified Commit 012b7fd4 authored by Cody L Marquart's avatar Cody L Marquart
Browse files

bug in handset indices

parent bd22b541
Pipeline #46505 failed with stages
in 2 minutes and 24 seconds
......@@ -28,7 +28,12 @@ calc_statistics <- function(set, kappa_threshold = 0.65, baserate_inflation = 0.
#'
#' @return code object
#' @export
test <- function(code, kappa_threshold = 0.65, baserate_inflation = 0.2) {
test <- function(code, kappa_threshold = 0.65, baserate_inflation = 0.2, ...) {
args <- list(...)
if(!is.null(args$kappaThreshold)) {
warning("Use kappa_threshold instead of kappaThreshold. kappaThreshold will be deprecated in a future version.")
kappa_threshold <- args$kappaThreshold
}
code.to.use <- code$clone(deep=T);
code.to.use$baserateInflation <- baserate_inflation;
......
......@@ -75,13 +75,13 @@ getHandSetIndices2 = function(
autocoded = codeToUse$process(codeToUse$excerpts[randIndice])
if(autocoded == 1){
positives = positives - 1;
# this.set = c(this.set, randIndice) # only adding positive indices to handset
this.set = c(this.set, randIndice) # only adding positive indices to handset
# codeToUse$computerSet = rbind(codeToUse$computerSet, data.frame("ID" = randIndice, "X1" = autocoded))
codeToUse$computerSet = rbind(codeToUse$computerSet, c(randIndice, autocoded))
}
else{ # deal with nonPositive excerpt case
if(maxNonPositives != 0){ # room in handSet to add nonPositive -> add it
# this.set = c(this.set, randIndice)
this.set = c(this.set, randIndice)
maxNonPositives = maxNonPositives - 1;
# codeToUse$computerSet = rbind(codeToUse$computerSet, data.frame("ID" = randIndice, "X1" = autocoded))
codeToUse$computerSet = rbind(codeToUse$computerSet, c(randIndice, autocoded))
......@@ -98,7 +98,7 @@ getHandSetIndices2 = function(
randIndice = sample(indices[!is.na(indices)],1)
autocoded = codeToUse$process(codeToUse$excerpts[randIndice])
indices[randIndice] = NA
# this.set = c(this.set, randIndice)
this.set = c(this.set, randIndice)
codeToUse$computerSet = rbind(codeToUse$computerSet, c(randIndice, autocoded))
}
}
......@@ -108,10 +108,18 @@ getHandSetIndices2 = function(
}
# randomize output order of indices
# codeToUse$testSet = rbind(
# codeToUse$testSet,
# #data.frame(ID=this.set[sample.int(n = handSetLength)], X1=NA)
# data.frame(ID=codeToUse$computerSet[,1][sample.int(n = handSetLength)], X1=NA)
# )
codeToUse$testSet = rbind(
codeToUse$testSet,
#data.frame(ID=this.set[sample.int(n = handSetLength)], X1=NA)
data.frame(ID=codeToUse$computerSet[,1][sample.int(n = handSetLength)], X1=NA)
matrix(
c(c(this.set), rep(NA, handSetLength)),
ncol = 2, nrow = handSetLength,
byrow = F, dimnames = list(NULL, colnames(codeToUse$testSet))
)
)
}
else {
......
......@@ -9,7 +9,7 @@ newcode = create.code(name = "Data", expressions = c("number","data"), excerpts
newcode.h = handcode(code = newcode, excerpts = rs$text, n = 4)
# Run test to see rho/kappa of current test set
newcode.t = test(code = newcode.h, kappaThreshold = 0.65)
newcode.t = test(code = newcode.h, kappa_threshold = 0.65)
# View the summary, with the calcuated statistics
summary(newcode.t)
......
data(RS.data)
rs_text = RS.data$text
test_that("Verify old parameter", {
name <- "Data"
set <- 10:15
exprs <- c("number","priority")
coded <- sapply(rs_text[set], grepl, pattern = paste0(exprs, collapse = "|")) * 1
newcode <- create.code(name, expressions = exprs, excerpts = rs_text) %>%
handcode(this.set = set, results = 0)
testthat::expect_warning(test(code = newcode, kappaThreshold = 0.9))
})
test_that("Verify statistics output", {
name <- "Data"
set <- 10:15
......@@ -11,17 +22,16 @@ test_that("Verify statistics output", {
handcode(this.set = set, results = 0) %>%
test()
expect_equal(length(newcode$statistics), 1)
testthat::expect_equal(length(newcode$statistics), 1)
first_stats <- newcode$statistics[[1]]
expect_equal(first_stats$one_v_classifier$test_set$N, length(set))
expect_equal(
testthat::expect_equal(first_stats$one_v_classifier$test_set$N, length(set))
testthat::expect_equal(
first_stats$one_v_classifier$test_set$set[,2],
coded
)
})
test_that("Verify second rater stats", {
name <- "Data"
set <- 10:15
......@@ -40,11 +50,9 @@ test_that("Verify second rater stats", {
gold_set <- newcode$statistics[[1]]$one_v_two$test_set$set[,2]
silv_set <- newcode$statistics[[1]]$one_v_two$test_set$set[,3]
expect_equal(gold_set, rep(0, length(set)))
expect_equal(silv_set, second_set)
testthat::expect_equal(gold_set, rep(0, length(set)))
testthat::expect_equal(silv_set, second_set)
})
test_that("Verify stats history", {
name <- "Data"
set <- 10:15
......@@ -61,7 +69,6 @@ test_that("Verify stats history", {
second_stats <- newcode$statistics[[2]]
expect_equal(length(newcode$statistics), 2)
})
test_that("Verify clearing of test set", {
name <- "Data"
set <- 10:15
......@@ -81,4 +88,4 @@ test_that("Verify clearing of test set", {
second_stats <- newcode_w_training$statistics[[2]]
expect_equal(newcode_w_training$trainingSet, newcode$testSet)
})
\ No newline at end of file
})
library(magrittr)
data(RS.data)
rs_text = RS.data$text
test_that("multiplication works", {
test_that("Basic creation", {
name = "Data"
newcode = create.code(name, expressions = c("number","data"), excerpts = rs_text)
testthat::expect_equal(newcode$name, name)
})
test_that("Verify holdout sets", {
name <- "Data"
newcode <- create.code(name, expressions = c("number","data"), excerpts = rs_text)
touchable_perc <- length(newcode$touchableExcerpts) / length(newcode$excerpts)
holdout_perc <- length(newcode$holdoutExcerpts) / length(newcode$excerpts)
testthat::expect_equal(0.1, touchable_perc, tolerance = 0.01)
testthat::expect_equal(0.9, holdout_perc, tolerance = 0.01)
})
test_that("Check the touched sets", {
name <- "Data"
set <- 10:15
exprs <- c("number","priority")
coded <- sapply(rs_text[set], grepl, pattern = paste0(exprs, collapse = "|")) * 1
newcode <- create.code(name, expressions = exprs, excerpts = rs_text) %>%
getHandSetIndices2()
newcode2 <- getHandSetIndices2(newcode)
testthat::expect_gt(length(newcode$touchedIndices), expected = 0)
touchable_perc <- length(newcode$touchableExcerpts) / length(newcode$excerpts)
holdout_perc <- length(newcode$holdoutExcerpts) / length(newcode$excerpts)
testthat::expect_equal(0.1, touchable_perc, tolerance = 0.01)
testthat::expect_equal(0.9, holdout_perc, tolerance = 0.01)
})
Markdown is supported
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment