Skip to content

Commit 75871a6

Browse files
author
hornik
committed
Provide exact (conditional) two-sample inference with ties.
git-svn-id: https://svn.r-project.org/R/trunk@88748 00db46b3-68df-0310-9c12-caf00c1e9a41
1 parent 068c01e commit 75871a6

File tree

1 file changed

+64
-41
lines changed

1 file changed

+64
-41
lines changed

src/library/stats/R/wilcox.test.R

Lines changed: 64 additions & 41 deletions
Original file line numberDiff line numberDiff line change
@@ -114,25 +114,24 @@ function(x, y = NULL, alternative = c("two.sided", "less", "greater"),
114114
n.y <- as.double(length(y))
115115
if(is.null(exact))
116116
exact <- (n.x < 50) && (n.y < 50)
117-
STAT <- .wilcox_test_two_stat(x, y, mu, n.x, n.y, digits.rank)
118-
TIES <- STAT$ties
119-
if(exact && !TIES) {
117+
if(exact) {
120118
METHOD <- sub("test", "exact test", METHOD, fixed = TRUE)
121-
PVAL <- .wilcox_test_two_pval_exact(STAT$statistic,
122-
n.x, n.y,
119+
STAT <- .wilcox_test_two_stat_exact(x, y, mu, n.x, n.y,
120+
digits.rank)
121+
PVAL <- .wilcox_test_two_pval_exact(STAT, n.x, n.y,
123122
alternative)
124123
if(conf.int)
125124
CINT <- .wilcox_test_two_cint_exact(x, y, n.x, n.y,
125+
STAT$z,
126126
alternative,
127127
conf.level)
128128
}
129-
else { ## not exact, maybe ties or zeroes
129+
else { ## not exact
130130
if(correct)
131131
METHOD <- paste(METHOD, "with continuity correction")
132-
PVAL <- .wilcox_test_two_pval_asymp(STAT$statistic,
133-
STAT$mean,
134-
STAT$sd,
135-
alternative,
132+
STAT <- .wilcox_test_two_stat_asymp(x, y, mu, n.x, n.y,
133+
digits.rank)
134+
PVAL <- .wilcox_test_two_pval_asymp(STAT, alternative,
136135
correct)
137136
if(conf.int)
138137
CINT <- .wilcox_test_two_cint_asymp(x, y, n.x, n.y,
@@ -141,11 +140,6 @@ function(x, y = NULL, alternative = c("two.sided", "less", "greater"),
141140
correct,
142141
tol.root,
143142
digits.rank)
144-
if(exact && TIES) {
145-
warning("cannot compute exact p-value with ties")
146-
if(conf.int)
147-
warning("cannot compute exact confidence intervals with ties")
148-
}
149143
}
150144
}
151145

@@ -378,8 +372,18 @@ function(x, n, alternative, conf.level, correct,
378372
} # regular (Wmumin, Wmumax)
379373
list(conf.int = CONF.INT, estimate = ESTIMATE)
380374
}
375+
376+
.wilcox_test_two_stat_exact <-
377+
function(x, y, mu, n.x = length(x), n.y = length(y), digits.rank)
378+
{
379+
r <- c(x - mu, y)
380+
r <- rank(if(is.finite(digits.rank)) signif(r, digits.rank) else r)
381+
TIES <- (length(r) != length(unique(r)))
382+
STATISTIC <- c("W" = sum(r[seq_along(x)]) - n.x * (n.x + 1) / 2)
383+
list(statistic = STATISTIC, z = if(TIES) r else NULL)
384+
}
381385

382-
.wilcox_test_two_stat <-
386+
.wilcox_test_two_stat_asymp <-
383387
function(x, y, mu, n.x = length(x), n.y = length(y), digits.rank)
384388
{
385389
r <- c(x - mu, y)
@@ -392,78 +396,97 @@ function(x, y, mu, n.x = length(x), n.y = length(y), digits.rank)
392396
((n.x + n.y + 1)
393397
- sum(NTIES^3 - NTIES)
394398
/ ((n.x + n.y) * (n.x + n.y - 1))))
395-
list(statistic = STATISTIC, mean = MEAN, sd = SIGMA, ties = TIES)
399+
list(statistic = STATISTIC, ex = MEAN, sd = SIGMA, ties = TIES)
396400
}
397401

398402
.wilcox_test_two_pval_exact <-
399-
function(STATISTIC, n.x, n.y, alternative)
403+
function(STAT, n.x, n.y, alternative)
400404
{
405+
u <- STAT$statistic
406+
z <- STAT$z
401407
switch(alternative,
402408
"two.sided" = {
403-
p <- if(STATISTIC > (n.x * n.y / 2))
404-
pwilcox(STATISTIC - 1, n.x, n.y, lower.tail = FALSE)
409+
p <- if(u > (n.x * n.y / 2))
410+
.pwilcox(u - 1, n.x, n.y, z, lower.tail = FALSE)
405411
else
406-
pwilcox(STATISTIC, n.x, n.y)
412+
.pwilcox(u, n.x, n.y, z)
407413
min(2 * p, 1)
408414
},
409415
"greater" = {
410-
pwilcox(STATISTIC - 1, n.x, n.y, lower.tail = FALSE)
416+
.pwilcox(u - 1, n.x, n.y, z, lower.tail = FALSE)
411417
},
412-
"less" = pwilcox(STATISTIC, n.x, n.y))
418+
"less" = .pwilcox(u, n.x, n.y, z))
413419
}
414420

415421
.wilcox_test_two_cint_exact <-
416-
function(x, y, n.x, n.y, alternative, conf.level)
422+
function(x, y, n.x, n.y, z, alternative, conf.level)
417423
{
418424
## Exact confidence interval for the location parameter
419425
## mean(x) - mean(y) in the two-sample case (cf. the
420426
## one-sample case).
421427
alpha <- 1 - conf.level
422428
diffs <- sort(outer(x, y, `-`))
429+
w <- if(is.null(z))
430+
(n.x * n.y) : 1L
431+
else {
432+
i <- seq_along(x)
433+
m <- n.x * (n.x + 1) / 2
434+
vapply(diffs, \(d) sum(rank(c(x - d, y))[i]) - m, 0)
435+
}
423436
CONF.INT <-
424437
switch(alternative,
425438
"two.sided" = {
426-
qu <- qwilcox(alpha/2, n.x, n.y)
439+
qu <- .qwilcox(alpha/2, n.x, n.y, z)
440+
ql <- n.x * n.y - qu
441+
lci <- if(qu <= min(w)) max(diffs)
442+
else min(diffs[w <= qu])
443+
uci <- if(ql >= max(w)) min(diffs)
444+
else max(diffs[w > ql])
427445
if(qu == 0) qu <- 1
428-
ql <- n.x*n.y - qu
429-
achieved.alpha <- 2*pwilcox(trunc(qu)-1,n.x,n.y)
430-
c(diffs[qu], diffs[ql + 1])
446+
achieved.alpha <-
447+
2 * .pwilcox(trunc(qu) - 1, n.x, n.y, z)
448+
c(uci, lci)
431449
},
432450
"greater" = {
433-
qu <- qwilcox(alpha, n.x, n.y)
451+
qu <- .qwilcox(alpha, n.x, n.y, z)
452+
ql <- n.x * n.y - qu
453+
uci <- if(ql >= max(w)) min(diffs)
454+
else max(diffs[w > ql])
434455
if(qu == 0) qu <- 1
435-
achieved.alpha <- pwilcox(trunc(qu)-1,n.x,n.y)
436-
c(diffs[qu], +Inf)
456+
achieved.alpha <-
457+
.pwilcox(trunc(qu) - 1, n.x, n.y, z)
458+
c(uci, +Inf)
437459
},
438460
"less" = {
439-
qu <- qwilcox(alpha, n.x, n.y)
461+
qu <- .qwilcox(alpha, n.x, n.y, z)
462+
lci <- if(qu <= min(w)) max(diffs)
463+
else min(diffs[w <= qu])
440464
if(qu == 0) qu <- 1
441-
ql <- n.x*n.y - qu
442-
achieved.alpha <- pwilcox(trunc(qu)-1,n.x,n.y)
443-
c(-Inf, diffs[ql + 1])
465+
achieved.alpha <-
466+
.pwilcox(trunc(qu) - 1, n.x, n.y, z)
467+
c(-Inf, lci)
444468
})
445-
if(achieved.alpha-alpha > alpha/2) {
469+
if(achieved.alpha - alpha > alpha / 2) {
446470
warning("Requested conf.level not achievable")
447471
conf.level <- 1 - achieved.alpha
448472
}
449473
attr(CONF.INT, "conf.level") <- conf.level
450474
ESTIMATE <- c("difference in location" = median(diffs))
451475
list(conf.int = CONF.INT, estimate = ESTIMATE)
452-
}
476+
}
453477

454-
455478
.wilcox_test_two_pval_asymp <-
456-
function(STATISTIC, MEAN, SIGMA, alternative, correct)
479+
function(STAT, alternative, correct)
457480
{
458-
z <- STATISTIC - MEAN
481+
z <- STAT$statistic - STAT$ex
459482
CORRECTION <- if(correct)
460483
switch(alternative,
461484
"two.sided" = sign(z) * 0.5,
462485
"greater" = 0.5,
463486
"less" = -0.5)
464487
else
465488
0
466-
z <- (z - CORRECTION) / SIGMA
489+
z <- (z - CORRECTION) / STAT$sd
467490
switch(alternative,
468491
"less" = pnorm(z),
469492
"greater" = pnorm(z, lower.tail = FALSE),

0 commit comments

Comments
 (0)