- Nov 4, ‘24:
1) Created tests for more mutants. (7 hours)
// fread.c 803
// original:
if (hasNullByte) goto done;
// mutant:
if (!hasNullByte) goto done;
result <- tryCatch(fread("a,b\n1,2\n3,\x00"), error = function(e) e)
stopifnot(inherits(result, "data.table"))
// fread.c 890
// original:
if (nrow > 0) return FALSE;
// mutant:
if (nrow >= 0) return FALSE;
result <- fread("", fill = TRUE)
stopifnot(nrow(result) == 0)
// fwrite.c 150
// original:
if (sep == NULL) sep = ",";
// mutant:
if (sep != NULL) sep = ",";
dt <- data.table(a = 1:3, b = 4:6)
fwrite(dt, "test_output.csv", sep = "|")
result <- fread("test_output.csv", sep = "|")
stopifnot(identical(result, dt))
// reorder.c 101
// original:
while (i < j && v[i] <= pivot) i++;
// mutant:
while (i < j && v[i] < pivot) i++;
dt <- data.table(x = c(3, 2, 5, 1, 4))
result <- forderv(dt$x)
stopifnot(all(dt$x[result] == sort(dt$x)))
- Nov 5, ‘24:
1) Tested more mutants. (9 hours)
// forder.c 167
// original:
*out_min = min ^ 0x80000000u;
// mutant:
/**out_min = min ^ 0x80000000u;*/
dt <- data.table(a = c(-2147483648, 2147483647))
setkey(dt, a)
expect_equal(dt$a, c(-2147483648, 2147483647))
// fread.c 712
// original:
if (*ch!=dec && *ch!='e' && *ch!='E') goto fail;
// mutant:
if (*ch!=dec && *ch!='e' && *ch<='E') goto fail;
data <- fread("a\n1.23E+4\n5.67e-3")
expect_equal(data$a, c(12300, 0.00567))
// forder.c 335
// original:
if (ustr3[i] == ustr3[i-1]) continue;
// mutant:
if (ustr3[i] == ustr3[i%1]) continue;
dt <- data.table(a = c("a", "a", "b", "b", "c"))
setkey(dt, a)
expect_equal(unique(dt$a), c("a", "b", "c"))
// fread.c 572
// original:
if (ch==eof && quoteRule!=2) { target->off--; target->len++; }
// mutant:
if ((1==1) && quoteRule!=2) { target->off--; target->len++; }
data <- fread('"a,b",c\n"d,e",f')
expect_equal(data[[1]], c("a,b", "d,e"))
// fread.c 784
// original:
int_fast8_t extra = e < 0 ? e + 300 : e - 300;
// mutant:
int_fast8_t extra = e < 0 ? e + 300 : e - 0;
data <- fread("a\n1.23E-300\n4.56E+300")
expect_true(all(is.finite(data$a)))
// rbindlist.c 160
// original:
while (wi && dupLink[w]>0) { w=dupLink[w]; --wi; }
// mutant:
while (wi && dupLink[w]>-1) { w=dupLink[w]; --wi; }
dt1 <- data.table(a = 1:3)
dt2 <- data.table(a = 3:5)
result <- rbindlist(list(dt1, dt2), use.names = TRUE, fill = TRUE)
expect_equal(result$a, c(1, 2, 3, 4, 5)) # to check if unique values are maintained across joins.
- Nov 6, ‘24:
1) Continuing the mutation testing work for the remaining cases. (8 hours)
// forder.c 917
// original:
if (radix == nradix && !retgrp) {
// mutant:
if (radix + 1 == nradix && !retgrp) {
dt <- data.table(values = sample(1:10, 100, replace = TRUE))
sorted <- dt[order(values, retGrp = TRUE)]
expect_true(all(diff(sorted$values) >= 0))
// fread.c 355
// original:
if (ch == '\0') return eof;
// mutant:
if (ch == eof) return eof;
filePath <- tempfile()
writeLines(c("x,y", "4,2", "9,6"), file_path)
dt <- fread(file_path)
expect_equal(nrow(dt), 2)
unlink(file_path)
// fmelt.c 517
// original:
int thislen = 0;
// mutant:
int thislen = (0+1);
dt <- data.table(W = 1)
melted <- melt(dt, measure.vars = "W")
expect_equal(nrow(melted), 1)
// fread.c 572
// original:
if (ch == eof && quoteRule != 2) { target->off--; target->len++; }
// mutant:
if (ch == eof) { target->off--; target->len++; }
file_path <- tempfile()
writeLines(c('a,b', '"1,2'), file_path)
dt <- fread(file_path, quoteRule = 1)
expect_true(nrow(dt) == 1 || is.na(dt$a))
unlink(file_path)
// fread.c 365
// original:
while (*ch != '\n' && *ch != '\r' && *ch != '\0') ch++;
// mutant:
while (*ch != '\n' && *ch != '\r' && (*ch != '\0' || ch < eof)) ch++;
file_path <- tempfile()
writeLines(c("a,b\r", "1,2\n", "3,4\r\n"), file_path)
dt <- fread(file_path)
expect_equal(nrow(dt), 3)
unlink(file_path)
- Nov 7, ‘24:
1) Tried to reproduce and resolve the potential issue with missing branch references (partially qualified names, or branches without the author name prefixed) on another GitHub account, since mine doesn’t seem to reproduce the issue. (7 hours)
Modified part of my workflow, added a fallback to default branch behaviour (to retrieve branch references with the username prefix) if GITHUB_HEAD_REF
is missing/empty: (targeted towards affected forks)
- name: Retrieve branch references
run: |
if [ -n "${GITHUB_HEAD_REF}" ]; then
git fetch origin "${GITHUB_HEAD_REF}:${GITHUB_HEAD_REF}" || echo "Failed to fetch branch ${GITHUB_HEAD_REF} from fork"
if git show-ref --verify --quiet "refs/heads/${GITHUB_HEAD_REF}"; then
git switch "${GITHUB_HEAD_REF}"
else
echo "Branch ${GITHUB_HEAD_REF} not found, switching to ${GITHUB_BASE_REF} instead."
git switch "${GITHUB_BASE_REF}"
fi
else
echo "GITHUB_HEAD_REF is empty, switching to ${GITHUB_BASE_REF}"
git switch "${GITHUB_BASE_REF}"
fi
shell: bash
- Nov 8, ‘24:
1) Continued my attempts to make a universal fix for the branch names, but it seems like the issue doesn’t exist anymore. (3 hours)
2) Tested the proposed solution to #6607 (wrote tests after initializing ans
as a numeric, or making it 0.0
instead of 0L
since we require to store larger values as double-precision floating-point numbers to avoid overflow), ensuring it works. (3 hours)
3) Made the suggested changes for data.table.threads
v1.0.1 on CRAN (expected to be on the platform by Monday), updated readme on GitHub. (3 hours)