Skip to content

Commit 0b8fecc

Browse files
author
hornik
committed
Tweaks and FIXMEs.
git-svn-id: https://svn.r-project.org/R/trunk@88760 00db46b3-68df-0310-9c12-caf00c1e9a41
1 parent 6c5f0e3 commit 0b8fecc

File tree

1 file changed

+62
-22
lines changed

1 file changed

+62
-22
lines changed

src/library/stats/R/wilcox.test.R

Lines changed: 62 additions & 22 deletions
Original file line numberDiff line numberDiff line change
@@ -100,7 +100,7 @@ function(x, y = NULL, alternative = c("two.sided", "less", "greater"),
100100
CINT <- .wilcox_test_one_cint_asymp(x, n,
101101
alternative,
102102
conf.level,
103-
correct,
103+
correct >= 0,
104104
tol.root,
105105
digits.rank)
106106
if(exact && ZERO) {
@@ -141,7 +141,7 @@ function(x, y = NULL, alternative = c("two.sided", "less", "greater"),
141141
CINT <- .wilcox_test_two_cint_asymp(x, y, n.x, n.y,
142142
alternative,
143143
conf.level,
144-
correct,
144+
correct >= 0,
145145
tol.root,
146146
digits.rank)
147147
}
@@ -204,12 +204,14 @@ function(STAT, n, alternative)
204204
z <- STAT$z
205205
switch(alternative,
206206
"two.sided" = {
207+
## FIXME: Is the conditional distribution really
208+
## symmetric about its mean?
207209
p <- if(q > (n * (n + 1) / 4))
208-
.psignrank(q - 1, n, z, lower.tail = FALSE)
210+
.psignrank(q - 1/4, n, z, lower.tail = FALSE)
209211
else .psignrank(q, n, z)
210212
min(2 * p, 1)
211213
},
212-
"greater" = .psignrank(q - 1, n, z, lower.tail = FALSE),
214+
"greater" = .psignrank(q - 1/4, n, z, lower.tail = FALSE),
213215
"less" = .psignrank(q, n, z))
214216
}
215217

@@ -222,6 +224,7 @@ function(x, n, z, alternative, conf.level)
222224
alpha <- 1 - conf.level
223225
diffs <- outer(x, x, `+`)
224226
diffs <- sort(diffs[!lower.tri(diffs)]) / 2
227+
## Of course the 'diffs' are really the Walsh averages.
225228
w <- if(is.null(z))
226229
(n * (n + 1) / 2) : 1L
227230
else {
@@ -232,42 +235,46 @@ function(x, n, z, alternative, conf.level)
232235
CONF.INT <-
233236
switch(alternative,
234237
"two.sided" = {
238+
## FIXME: Is the conditional distribution really
239+
## symmetric about its mean?
235240
qu <- .qsignrank(alpha / 2, n, z)
236241
ql <- n * (n + 1) / 2 - qu
237242
lci <- if(qu <= min(w)) max(diffs)
238243
else min(diffs[w <= qu])
239244
uci <- if(ql >= max(w)) min(diffs)
240245
else max(diffs[w > ql])
241246
c(uci, lci)
242-
if(qu == 0) qu <- 1
243247
achieved.alpha <-
244-
2 * .psignrank(trunc(qu) - 1, n, z)
248+
2 * .psignrank(qu - 1/4, n, z)
245249
c(uci, lci)
246250
},
247251
"greater" = {
252+
## FIXME: Is the conditional distribution really
253+
## symmetric about its mean?
248254
qu <- .qsignrank(alpha, n, z)
249255
ql <- n * (n + 1) / 2 - qu
250256
uci <- if(ql >= max(w)) min(diffs)
251257
else max(diffs[w > ql])
252-
if(qu == 0) qu <- 1
253258
achieved.alpha <-
254-
.psignrank(trunc(qu) - 1, n, z)
259+
.psignrank(qu - 1/4, n, z)
255260
c(uci, +Inf)
256261
},
257262
"less" = {
258263
qu <- .qsignrank(alpha, n, z)
259264
lci <- if(qu <= min(w)) max(diffs)
260265
else min(diffs[w <= qu])
261-
if(qu == 0) qu <- 1
262266
achieved.alpha <-
263-
.psignrank(trunc(qu) - 1, n, z)
267+
.psignrank(qu - 1/4, n, z)
264268
c(-Inf, lci)
265269
})
266270
if(achieved.alpha - alpha > alpha/2){
267271
warning("requested conf.level not achievable")
268272
conf.level <- 1 - signif(achieved.alpha, 2)
269273
}
270274
attr(CONF.INT, "conf.level") <- conf.level
275+
## FIXME: This is the Hodges-Lehmann estimate and not what is
276+
## suggested in Bauer (1972) (as in \CRANpkg{coin}: is this really
277+
## what we want?
271278
ESTIMATE <- c("(pseudo)median" = median(diffs))
272279
list(conf.int = CONF.INT, estimate = ESTIMATE)
273280
}
@@ -360,6 +367,8 @@ function(x, n, alternative, conf.level, correct,
360367
Wmumax <- if(!is.finite(Wmumin)) NA else W(mumax) # if(): warn only once
361368
}
362369
if(n == 0 || !is.finite(Wmumax)) { # incl. "all zero / ties" warning above
370+
## FIXME: in the one-sides cases this gives (-Inf, NaN) and
371+
## (NaN, Inf): is this really what we want?
363372
CONF.INT <-
364373
structure(c(if(alternative == "less" ) -Inf else NaN,
365374
if(alternative == "greater") +Inf else NaN),
@@ -434,6 +443,8 @@ function(x, n, alternative, conf.level, correct,
434443
})
435444
attr(CONF.INT, "conf.level") <- conf.level
436445
correct <- FALSE # for W(): no continuity correction for estimate
446+
## FIXME: Perhaps instead simply give the Hodges-Lehmann
447+
## estimate? In particular as we now use 'correct = FALSE'.
437448
ESTIMATE <- c("(pseudo)median" =
438449
uniroot(W, lower = mumin, upper = mumax,
439450
tol = tol.root)$root)
@@ -474,14 +485,16 @@ function(STAT, n.x, n.y, alternative)
474485
z <- STAT$z
475486
switch(alternative,
476487
"two.sided" = {
488+
## FIXME: Is the conditional distribution really
489+
## symmetric about its mean?
477490
p <- if(q > (n.x * n.y / 2))
478-
.pwilcox(q - 1, n.x, n.y, z, lower.tail = FALSE)
491+
.pwilcox(q - 1/4, n.x, n.y, z, lower.tail = FALSE)
479492
else
480493
.pwilcox(q, n.x, n.y, z)
481494
min(2 * p, 1)
482495
},
483496
"greater" = {
484-
.pwilcox(q - 1, n.x, n.y, z, lower.tail = FALSE)
497+
.pwilcox(q - 1/4, n.x, n.y, z, lower.tail = FALSE)
485498
},
486499
"less" = .pwilcox(q, n.x, n.y, z))
487500
}
@@ -504,25 +517,28 @@ function(x, y, n.x, n.y, z, alternative, conf.level)
504517
CONF.INT <-
505518
switch(alternative,
506519
"two.sided" = {
520+
## FIXME: Is the conditional distribution really
521+
## symmetric about its mean?
507522
qu <- .qwilcox(alpha/2, n.x, n.y, z)
508523
ql <- n.x * n.y - qu
509524
lci <- if(qu <= min(w)) max(diffs)
510525
else min(diffs[w <= qu])
511526
uci <- if(ql >= max(w)) min(diffs)
512527
else max(diffs[w > ql])
513-
if(qu == 0) qu <- 1
514528
achieved.alpha <-
515-
2 * .pwilcox(trunc(qu) - 1, n.x, n.y, z)
529+
2 * .pwilcox(qu - 1/4, n.x, n.y, z)
516530
c(uci, lci)
517531
},
518532
"greater" = {
533+
## FIXME: Is the conditional distribution really
534+
## symmetric about its mean?
519535
qu <- .qwilcox(alpha, n.x, n.y, z)
520536
ql <- n.x * n.y - qu
521537
uci <- if(ql >= max(w)) min(diffs)
522538
else max(diffs[w > ql])
523539
if(qu == 0) qu <- 1
524540
achieved.alpha <-
525-
.pwilcox(trunc(qu) - 1, n.x, n.y, z)
541+
.pwilcox(qu - 1/4, n.x, n.y, z)
526542
c(uci, +Inf)
527543
},
528544
"less" = {
@@ -531,14 +547,17 @@ function(x, y, n.x, n.y, z, alternative, conf.level)
531547
else min(diffs[w <= qu])
532548
if(qu == 0) qu <- 1
533549
achieved.alpha <-
534-
.pwilcox(trunc(qu) - 1, n.x, n.y, z)
550+
.pwilcox(qu - 1/4, n.x, n.y, z)
535551
c(-Inf, lci)
536552
})
537553
if(achieved.alpha - alpha > alpha / 2) {
538554
warning("Requested conf.level not achievable")
539555
conf.level <- 1 - achieved.alpha
540556
}
541557
attr(CONF.INT, "conf.level") <- conf.level
558+
## FIXME: This is the Hodges-Lehmann estimate and not what is
559+
## suggested in Bauer (1972) (as in \CRANpkg{coin}: is this really
560+
## what we want?
542561
ESTIMATE <- c("difference in location" = median(diffs))
543562
list(conf.int = CONF.INT, estimate = ESTIMATE)
544563
}
@@ -661,6 +680,8 @@ function(x, y, n.x, n.y, alternative, conf.level, correct,
661680
})
662681
attr(CONF.INT, "conf.level") <- conf.level
663682
correct <- FALSE # for W(): no continuity correction for estimate
683+
## FIXME: Perhaps instead simply give the Hodges-Lehmann
684+
## estimate? In particular as we now use 'correct = FALSE'.
664685
ESTIMATE <- c("difference in location" =
665686
uniroot(W, lower=mumin, upper=mumax,
666687
tol = tol.root)$root)
@@ -721,6 +742,7 @@ function(x, m, n, z = NULL)
721742
return(dwilcox(x, m, n))
722743

723744
stopifnot(length(z) == m + n)
745+
## FIXME: why floor() and not as.integer()?
724746
if(!all(2 * z == floor(2 * z)) || any(z < 1))
725747
stop("'z' is not a rank vector")
726748

@@ -730,8 +752,10 @@ function(x, m, n, z = NULL)
730752
return(y)
731753

732754
## scores can be x.5: in that case need to multiply by f=2.
755+
## FIXME: why floor() and not as.integer()?
733756
f <- 2 - (max(z - floor(z)) == 0)
734757
d <- .Call(C_cpermdist2,
758+
## FIXME: why not sort(as.integer(f * z)) ?
735759
as.integer(sort(floor(f * z))),
736760
as.integer(m))
737761
w <- seq_along(d)
@@ -755,11 +779,14 @@ function(q, m, n, z = NULL, lower.tail = TRUE)
755779

756780
## Support of U
757781
s <- (0 : (2 * m * n)) / 2
782+
## FIXME: can we simplify to 0 : (m * n) if z is all integer?
758783
## Density
759784
d <- .dwilcox(s, m, n, z)
760785
y[i] <- vapply(q[i],
761-
function(e)
762-
sum(d[s < e + sqrt(.Machine$double.eps)]),
786+
function(e) {
787+
## FIXME: maybe use a smaller fuzz?
788+
sum(d[s < e + sqrt(.Machine$double.eps)])
789+
},
763790
0)
764791
if(lower.tail) y else 1 - y
765792
}
@@ -778,6 +805,7 @@ function(p, m, n, z = NULL, lower.tail = TRUE)
778805
return(y)
779806

780807
s <- (0 : (2 * m * n)) / 2
808+
## FIXME: can we simplify to 0 : (m * n) if z is all integer?
781809
v <- .pwilcox(s, m, n, z)
782810
if(!lower.tail)
783811
p <- 1 - p
@@ -794,14 +822,17 @@ function(x, n, z = NULL)
794822
return(dsignrank(x, n))
795823

796824
stopifnot(length(z) == n)
825+
## FIXME: why floor() and not as.integer()?
797826
if (!all(2 * z == floor(2 * z)) || any(z < 1))
798827
stop("'z' is not a rank vector")
799828
y <- rep.int(NA_real_, length(x))
800829
i <- which(!is.na(x))
801830
if (!any(i))
802831
return(y)
832+
## FIXME: why floor() and not as.integer()?
803833
f <- 2 - (max(z - floor(z)) == 0)
804834
d <- .Call(C_cpermdist1,
835+
## FIXME: why not sort(as.integer(f * z)) ?
805836
as.integer(sort(floor(f * z))))
806837
w <- seq.int(0, length(d) - 1L)
807838
x <- f * x[i]
@@ -813,8 +844,11 @@ function(x, n, z = NULL)
813844
.psignrank <-
814845
function(q, n, z = NULL, lower.tail = TRUE)
815846
{
816-
if(is.null(z))
817-
return(psignrank(q, n, lower.tail = lower.tail))
847+
if(is.null(z)) {
848+
## FIXME: currently
849+
## psignrank(2.5, 2) != psignrank(2, 2) ?
850+
return(psignrank(trunc(q), n, lower.tail = lower.tail))
851+
}
818852

819853
y <- rep.int(NA_real_, length(q))
820854
i <- which(!is.na(q))
@@ -823,11 +857,15 @@ function(q, n, z = NULL, lower.tail = TRUE)
823857

824858
## Support of V
825859
s <- seq.int(0, n * (n + 1)) / 2
860+
## FIXME: can we simplify to seq.int(0, n * (n + 1) / 2) is z is all
861+
## integer?
826862
## Density
827863
d <- .dsignrank(s, n, z)
828864
y[i] <- vapply(q[i],
829-
function(e)
830-
sum(d[s < e + sqrt(.Machine$double.eps)]),
865+
function(e) {
866+
## FIXME: maybe use a smaller fuzz?
867+
sum(d[s < e + sqrt(.Machine$double.eps)])
868+
},
831869
0)
832870
if(lower.tail) y else 1 - y
833871
}
@@ -845,6 +883,8 @@ function(p, n, z = NULL, lower.tail = TRUE)
845883
return(y)
846884

847885
s <- seq.int(0, n * (n + 1)) / 2
886+
## FIXME: can we simplify to seq.int(0, n * (n + 1) / 2) is z is all
887+
## integer?
848888
v <- .psignrank(s, n, z)
849889
if (!lower.tail)
850890
p <- 1 - p

0 commit comments

Comments
 (0)