data.table work diary: Nov 11 - Nov 15

Nov 11, ‘24:

1) Created test cases for the mutants of some new/non-standard (that I haven’t looked at before, unlike forder.c and rbindlist.c for e.g.) C files: (7 hours)

// ijoin.c 607
// original:
INTEGER(f1__)[thislen] = i+1;
// mutant:
INTEGER(f1__)[thislen] = i+0;

test_that("ijoin correctly assigns f1__ indices", {
  dt1 <- data.table(a = 1:3, b = 4:6)
  dt2 <- data.table(a = c(2, 3, 4), b = c(5, 6, 7))
  result <- dt1[dt2, on = .(a, b)]
  expect_equal(result$a, c(2, 3))
  expect_equal(result$b, c(5, 6)) # Correct indices assigned(?)
})

// ijoin.c 495
// original:
if (k == to[i]) {
// mutant:
if (k > to[i]) {

test_that("ijoin handles edge case with k == to[i]", {
  dt1 <- data.table(id = c(1, 2, 3))
  dt2 <- data.table(id = c(2, 3, 4))
  result <- dt1[dt2, on = .(id), nomatch = 0L]
  expect_equal(nrow(result), 2) # Only rows matching exactly.
  expect_true(all(result$id == c(2, 3)))
})

// utils.c 255
// original:
if (ALTREP(thiscol) || TRUELENGTH(thiscol)<0) {
// mutant:
if (ALTREP(thiscol) || TRUELENGTH(thiscol)!=0) {

test_that("ALTREP and TRUELENGTH handling in utils", {
  dt <- data.table(a = 1:5)
  attr(dt$a, "truelength") <- -1
  result <- nrow(dt)
  expect_equal(result, 5) # Positive TRUELENGTH.
})

// utils.c 413
// original:
return ScalarInteger(i+1);
// mutant:
return ScalarInteger(i-1);

test_that("Internal function correctly adjusts indices.", {
  dt <- data.table(a = 1:10)
  idx <- which(dt$a == 5)
  expect_equal(idx, 5)
})

2) Minor changes to data.table.threads. (1 hour)

Nov 12, ‘24:

1) More mutation testing. (6 hours)

// ijoin.c 182
// original:
if (length(tt) && length(vv) >= count[i])
// mutant:
if (length(tt) || length(vv) >= count[i])

dt1 <- data.table(x = 1:5)
dt2 <- data.table(x = 3)
res <- dt1[dt2, on = "x"]
data.table(x = c(3)) # Incorrect for logical OR (expecting mutant to return extra rows or incomplete matches)
stopifnot(all.equal(dt1[dt2, on = "x"], res))

// uniqlist.c 158
// original:
if (len > 0) INTEGER(ans)[len - 1] = INTEGER(n)[0] - INTEGER(x)[len - 1] + 1;
// mutant:
if (len >= 0) INTEGER(ans)[len - 1] = INTEGER(n)[0] - INTEGER(x)[len - 1] + 1;

dt <- data.table(x = c(1, 1, 2, 2, 3), y = c("a", "a", "b", "b", "c"))
expected <- duplicated(dt)
mutant <- c(FALSE, FALSE, TRUE, TRUE, TRUE) # Incorrect for negative-length groups.
result <- duplicated(dt)
stopifnot(all.equal(result, expected)) 

// ijoin.c 79
// original:
if (getNumericRounding_C() < 0 || inherits(v, "integer64")) { /*...*/ }
// mutant:
if (getNumericRounding_C() == 0 || inherits(v, "integer64")) { /*...*/ }

setNumericRounding(0) # Default, matching numeric keys later
dt1 <- data.table(x = c(1.1, 2.2, 3.3))
dt2 <- data.table(x = c(1.1, 2.2))
result <- dt1[dt2, on = "x"]
expected <- data.table(x = c(1.1, 2.2))
stopifnot(all.equal(result, expected))

2) Checked and tried to help with #6612 and #6613. (3 hours)

Nov 13, ‘24:

1) Testing more of the interesting mutants. (6 hours)

// fsort.c 165
// original:
int MSBNbits = maxBit > 15 ? 16 : maxBit + 1;
// mutant:
int MSBNbits = maxBit > 15 ? 16 : maxBit + -1;
// fsort.c 223
// original:
int fromBit = toBit > 7 ? toBit - 7 : 0;
// mutant:
int fromBit = toBit > 7 ? toBit - 7 : (0 + 1);
// frolladapative.c 219
// original:
bool truehasna = hasna > 0;
// mutant:
bool truehasna = hasna > 1;
// ijoin.c 275
// original:
++wlen; ++j; ++m;
// mutant:
++wlen; --j; ++m;

# The stopifnot() calls below are the lines one should test before/after making changes to the C files to the mutant version:
x <- as.integer(c(65535, 0, 32768, 1))
res <- fsort(x) 
stopifnot(is.sorted(res))
y <- as.integer(c(255, 1, 128, 64))
res <- fsort(y)
stopifnot(is.sorted(y))
z <- c(1, 2, NA, 4, 5)
res <- frollmean(z, n = 3, adaptive = TRUE)
stopifnot(!anyNA(res))
x <- data.table(id = c(1, 2, 3), value = c("a", "b", "c"))
y <- data.table(id = c(3, 4, 5), value = c("c", "d", "e"))
z <- data.table(x = c(1, 2, 2, 3, 4, 4))
res <- merge(x, y, by = "id", all = TRUE)
stopifnot(nrow(res) == length(unique(c(x$id, y$id))))
grp_count <- attr(forderv(z, retGrp = TRUE), "maxgrpn")
expected_grp_count <- length(unique(z$x))
stopifnot(grp_count == expected_grp_count)

2) Revised a few parts in the research article about atime. (1 hour)

Nov 14, ‘24:

1) Revised more sections in the article. (6 hours)

2) NAU Zoom meetings. (2 hours)

Nov 15, ‘24:

1) Created a PR to use hyperlinks instead of vignette() calls for readability and did the same for vignette titles without links for consistency (#6617). (6 hours)

2) Revised a few statements/paragraphs in the atime article. (3 hours)