Skip to content

Commit 133e5d9

Browse files
author
hornik
committed
Provide exact (conditional) one-sample inference with ties.
git-svn-id: https://svn.r-project.org/R/trunk@88753 00db46b3-68df-0310-9c12-caf00c1e9a41
1 parent b1a8872 commit 133e5d9

File tree

1 file changed

+85
-51
lines changed

1 file changed

+85
-51
lines changed

src/library/stats/R/wilcox.test.R

Lines changed: 85 additions & 51 deletions
Original file line numberDiff line numberDiff line change
@@ -67,25 +67,27 @@ function(x, y = NULL, alternative = c("two.sided", "less", "greater"),
6767
n <- as.double(length(x))
6868
if(is.null(exact))
6969
exact <- (n < 50)
70-
STAT <- .wilcox_test_one_stat(x, mu, n, digits.rank)
71-
TIES <- STAT$ties
72-
ZEROES <- STAT$zeroes
73-
if(exact && !TIES && !ZEROES) {
74-
METHOD <- sub("test", "exact test", METHOD, fixed = TRUE)
75-
PVAL <- .wilcox_test_one_pval_exact(STAT$statistic,
76-
n,
77-
alternative)
70+
ZERO <- any(x == mu)
71+
## Argh. Having exact zeroes (after subtracting y if paired)
72+
## and subtracting mu) is a problem. Wilcoxon suggested droping
73+
## them, Pratt suggested keeping them, see e.g.
74+
## <https://en.wikipedia.org/wiki/Wilcoxon_signed-rank_test#Zeros>.
75+
## Unclear how the conditional/permutation distribution works in
76+
## case of zeroes, so for not do not use an exact test then.
77+
if(exact && !ZERO) {
78+
METHOD <- sub("test", "exact test", METHOD, fixed = TRUE)
79+
STAT <- .wilcox_test_one_stat_exact(x, mu, n, digits.rank)
80+
PVAL <- .wilcox_test_one_pval_exact(STAT, n, alternative)
7881
if(conf.int)
7982
CINT <- .wilcox_test_one_cint_exact(x, n,
83+
STAT$z,
8084
alternative,
8185
conf.level)
82-
} else { ## not exact, maybe ties or zeroes
86+
} else { ## not exact, maybe zeroes
8387
if(correct)
8488
METHOD <- paste(METHOD, "with continuity correction")
85-
PVAL <- .wilcox_test_one_pval_asymp(STAT$statistic,
86-
STAT$mean,
87-
STAT$sd,
88-
alternative,
89+
STAT <- .wilcox_test_one_stat_asymp(x, mu, n, digits.rank)
90+
PVAL <- .wilcox_test_one_pval_asymp(STAT, alternative,
8991
correct)
9092
if(conf.int)
9193
CINT <- .wilcox_test_one_cint_asymp(x, n,
@@ -94,12 +96,7 @@ function(x, y = NULL, alternative = c("two.sided", "less", "greater"),
9496
correct,
9597
tol.root,
9698
digits.rank)
97-
if(exact && TIES) {
98-
warning("cannot compute exact p-value with ties")
99-
if(conf.int)
100-
warning("cannot compute exact confidence interval with ties")
101-
}
102-
if(exact && ZEROES) {
99+
if(exact && ZERO) {
103100
warning("cannot compute exact p-value with zeroes")
104101
if(conf.int)
105102
warning("cannot compute exact confidence interval with zeroes")
@@ -157,12 +154,28 @@ function(x, y = NULL, alternative = c("two.sided", "less", "greater"),
157154
RVAL
158155
}
159156

160-
.wilcox_test_one_stat <-
157+
.wilcox_test_one_stat_exact <-
158+
function(x, mu, n = length(x), digits.rank)
159+
{
160+
x <- x - mu
161+
## Should not happen ...
162+
ZERO <- any(x == 0)
163+
if(ZERO) {
164+
x <- x[x != 0]
165+
n <- length(x)
166+
}
167+
r <- rank(abs(if(is.finite(digits.rank)) signif(x, digits.rank) else x))
168+
TIES <- length(r) != length(unique(r))
169+
STATISTIC <- c("V" = sum(r[x > 0]))
170+
list(statistic = STATISTIC, z = if(TIES) r else NULL)
171+
}
172+
173+
.wilcox_test_one_stat_asymp <-
161174
function(x, mu, n = length(x), digits.rank)
162175
{
163176
x <- x - mu
164-
ZEROES <- any(x == 0)
165-
if(ZEROES) {
177+
ZERO <- any(x == 0)
178+
if(ZERO) {
166179
x <- x[x != 0]
167180
n <- length(x)
168181
}
@@ -173,54 +186,75 @@ function(x, mu, n = length(x), digits.rank)
173186
NTIES <- table(r)
174187
SIGMA <- sqrt(n * (n + 1) * (2 * n + 1) / 24
175188
- sum(NTIES^3 - NTIES) / 48)
176-
list(statistic = STATISTIC, mean = MEAN, sd = SIGMA,
177-
ties = TIES, zeroes = ZEROES)
189+
list(statistic = STATISTIC, ex = MEAN, sd = SIGMA,
190+
ties = TIES, zero = ZERO)
178191
}
179192

180193
.wilcox_test_one_pval_exact <-
181-
function(STATISTIC, n, alternative)
194+
function(STAT, n, alternative)
182195
{
196+
q <- STAT$statistic
197+
z <- STAT$z
183198
switch(alternative,
184199
"two.sided" = {
185-
p <- if(STATISTIC > (n * (n + 1) / 4))
186-
psignrank(STATISTIC - 1, n, lower.tail = FALSE)
187-
else psignrank(STATISTIC, n)
200+
p <- if(q > (n * (n + 1) / 4))
201+
.psignrank(q - 1, n, z, lower.tail = FALSE)
202+
else .psignrank(q, n, z)
188203
min(2 * p, 1)
189204
},
190-
"greater" = psignrank(STATISTIC - 1, n, lower.tail = FALSE),
191-
"less" = psignrank(STATISTIC, n))
205+
"greater" = .psignrank(q - 1, n, z, lower.tail = FALSE),
206+
"less" = .psignrank(q, n, z))
192207
}
193208

194209
.wilcox_test_one_cint_exact <-
195-
function(x, n, alternative, conf.level)
210+
function(x, n, z, alternative, conf.level)
196211
{
197212
## Exact confidence interval for the median in the
198213
## one-sample case. When used with paired values this
199214
## gives a confidence interval for mean(x) - mean(y).
200215
alpha <- 1 - conf.level
201216
diffs <- outer(x, x, `+`)
202217
diffs <- sort(diffs[!lower.tri(diffs)]) / 2
218+
w <- if(is.null(z))
219+
(n * (n + 1) / 2) : 1L
220+
else {
221+
vapply(diffs,
222+
\(d) { xx <- x - d; sum(rank(abs(xx))[xx > 0]) },
223+
0)
224+
}
203225
CONF.INT <-
204226
switch(alternative,
205227
"two.sided" = {
206-
qu <- qsignrank(alpha / 2, n)
228+
qu <- .qsignrank(alpha / 2, n, z)
229+
ql <- n * (n + 1) / 2 - qu
230+
lci <- if(qu <= min(w)) max(diffs)
231+
else min(diffs[w <= qu])
232+
uci <- if(ql >= max(w)) min(diffs)
233+
else max(diffs[w > ql])
234+
c(uci, lci)
207235
if(qu == 0) qu <- 1
208-
ql <- n*(n+1)/2 - qu
209-
achieved.alpha <- 2*psignrank(trunc(qu)-1,n)
210-
c(diffs[qu], diffs[ql+1])
236+
achieved.alpha <-
237+
2 * .psignrank(trunc(qu) - 1, n, z)
238+
c(uci, lci)
211239
},
212240
"greater" = {
213-
qu <- qsignrank(alpha, n)
241+
qu <- .qsignrank(alpha, n, z)
242+
ql <- n * (n + 1) / 2 - qu
243+
uci <- if(ql >= max(w)) min(diffs)
244+
else max(diffs[w > ql])
214245
if(qu == 0) qu <- 1
215-
achieved.alpha <- psignrank(trunc(qu)-1,n)
216-
c(diffs[qu], +Inf)
246+
achieved.alpha <-
247+
.psignrank(trunc(qu) - 1, n, z)
248+
c(uci, +Inf)
217249
},
218250
"less" = {
219-
qu <- qsignrank(alpha, n)
251+
qu <- .qsignrank(alpha, n, z)
252+
lci <- if(qu <= min(w)) max(diffs)
253+
else min(diffs[w <= qu])
220254
if(qu == 0) qu <- 1
221-
ql <- n*(n+1)/2 - qu
222-
achieved.alpha <- psignrank(trunc(qu)-1,n)
223-
c(-Inf, diffs[ql+1])
255+
achieved.alpha <-
256+
.psignrank(trunc(qu) - 1, n, z)
257+
c(-Inf, lci)
224258
})
225259
if(achieved.alpha - alpha > alpha/2){
226260
warning("requested conf.level not achievable")
@@ -232,17 +266,17 @@ function(x, n, alternative, conf.level)
232266
}
233267

234268
.wilcox_test_one_pval_asymp <-
235-
function(STATISTIC, MEAN, SIGMA, alternative, correct)
269+
function(STAT, alternative, correct)
236270
{
237-
z <- STATISTIC - MEAN
271+
z <- STAT$statistic - STAT$ex
238272
CORRECTION <- if(correct)
239273
switch(alternative,
240274
"two.sided" = sign(z) * 0.5,
241275
"greater" = 0.5,
242276
"less" = -0.5)
243277
else
244278
0
245-
z <- (z - CORRECTION) / SIGMA
279+
z <- (z - CORRECTION) / STAT$sd
246280
switch(alternative,
247281
"less" = pnorm(z),
248282
"greater" = pnorm(z, lower.tail = FALSE),
@@ -402,20 +436,20 @@ function(x, y, mu, n.x = length(x), n.y = length(y), digits.rank)
402436
.wilcox_test_two_pval_exact <-
403437
function(STAT, n.x, n.y, alternative)
404438
{
405-
u <- STAT$statistic
439+
q <- STAT$statistic
406440
z <- STAT$z
407441
switch(alternative,
408442
"two.sided" = {
409-
p <- if(u > (n.x * n.y / 2))
410-
.pwilcox(u - 1, n.x, n.y, z, lower.tail = FALSE)
443+
p <- if(q > (n.x * n.y / 2))
444+
.pwilcox(q - 1, n.x, n.y, z, lower.tail = FALSE)
411445
else
412-
.pwilcox(u, n.x, n.y, z)
446+
.pwilcox(q, n.x, n.y, z)
413447
min(2 * p, 1)
414448
},
415449
"greater" = {
416-
.pwilcox(u - 1, n.x, n.y, z, lower.tail = FALSE)
450+
.pwilcox(q - 1, n.x, n.y, z, lower.tail = FALSE)
417451
},
418-
"less" = .pwilcox(u, n.x, n.y, z))
452+
"less" = .pwilcox(q, n.x, n.y, z))
419453
}
420454

421455
.wilcox_test_two_cint_exact <-

0 commit comments

Comments
 (0)