@@ -114,25 +114,24 @@ function(x, y = NULL, alternative = c("two.sided", "less", "greater"),
114
114
n.y <- as.double(length(y ))
115
115
if (is.null(exact ))
116
116
exact <- (n.x < 50 ) && (n.y < 50 )
117
- STAT <- .wilcox_test_two_stat(x , y , mu , n.x , n.y , digits.rank )
118
- TIES <- STAT $ ties
119
- if (exact && ! TIES ) {
117
+ if (exact ) {
120
118
METHOD <- sub(" test" , " exact test" , METHOD , fixed = TRUE )
121
- PVAL <- .wilcox_test_two_pval_exact(STAT $ statistic ,
122
- n.x , n.y ,
119
+ STAT <- .wilcox_test_two_stat_exact(x , y , mu , n.x , n.y ,
120
+ digits.rank )
121
+ PVAL <- .wilcox_test_two_pval_exact(STAT , n.x , n.y ,
123
122
alternative )
124
123
if (conf.int )
125
124
CINT <- .wilcox_test_two_cint_exact(x , y , n.x , n.y ,
125
+ STAT $ z ,
126
126
alternative ,
127
127
conf.level )
128
128
}
129
- else { # # not exact, maybe ties or zeroes
129
+ else { # # not exact
130
130
if (correct )
131
131
METHOD <- paste(METHOD , " with continuity correction" )
132
- PVAL <- .wilcox_test_two_pval_asymp(STAT $ statistic ,
133
- STAT $ mean ,
134
- STAT $ sd ,
135
- alternative ,
132
+ STAT <- .wilcox_test_two_stat_asymp(x , y , mu , n.x , n.y ,
133
+ digits.rank )
134
+ PVAL <- .wilcox_test_two_pval_asymp(STAT , alternative ,
136
135
correct )
137
136
if (conf.int )
138
137
CINT <- .wilcox_test_two_cint_asymp(x , y , n.x , n.y ,
@@ -141,11 +140,6 @@ function(x, y = NULL, alternative = c("two.sided", "less", "greater"),
141
140
correct ,
142
141
tol.root ,
143
142
digits.rank )
144
- if (exact && TIES ) {
145
- warning(" cannot compute exact p-value with ties" )
146
- if (conf.int )
147
- warning(" cannot compute exact confidence intervals with ties" )
148
- }
149
143
}
150
144
}
151
145
@@ -378,8 +372,18 @@ function(x, n, alternative, conf.level, correct,
378
372
} # regular (Wmumin, Wmumax)
379
373
list (conf.int = CONF.INT , estimate = ESTIMATE )
380
374
}
375
+
376
+ .wilcox_test_two_stat_exact <-
377
+ function (x , y , mu , n.x = length(x ), n.y = length(y ), digits.rank )
378
+ {
379
+ r <- c(x - mu , y )
380
+ r <- rank(if (is.finite(digits.rank )) signif(r , digits.rank ) else r )
381
+ TIES <- (length(r ) != length(unique(r )))
382
+ STATISTIC <- c(" W" = sum(r [seq_along(x )]) - n.x * (n.x + 1 ) / 2 )
383
+ list (statistic = STATISTIC , z = if (TIES ) r else NULL )
384
+ }
381
385
382
- .wilcox_test_two_stat <-
386
+ .wilcox_test_two_stat_asymp <-
383
387
function (x , y , mu , n.x = length(x ), n.y = length(y ), digits.rank )
384
388
{
385
389
r <- c(x - mu , y )
@@ -392,78 +396,97 @@ function(x, y, mu, n.x = length(x), n.y = length(y), digits.rank)
392
396
((n.x + n.y + 1 )
393
397
- sum(NTIES ^ 3 - NTIES )
394
398
/ ((n.x + n.y ) * (n.x + n.y - 1 ))))
395
- list (statistic = STATISTIC , mean = MEAN , sd = SIGMA , ties = TIES )
399
+ list (statistic = STATISTIC , ex = MEAN , sd = SIGMA , ties = TIES )
396
400
}
397
401
398
402
.wilcox_test_two_pval_exact <-
399
- function (STATISTIC , n.x , n.y , alternative )
403
+ function (STAT , n.x , n.y , alternative )
400
404
{
405
+ u <- STAT $ statistic
406
+ z <- STAT $ z
401
407
switch (alternative ,
402
408
" two.sided" = {
403
- p <- if (STATISTIC > (n.x * n.y / 2 ))
404
- pwilcox(STATISTIC - 1 , n.x , n.y , lower.tail = FALSE )
409
+ p <- if (u > (n.x * n.y / 2 ))
410
+ . pwilcox(u - 1 , n.x , n.y , z , lower.tail = FALSE )
405
411
else
406
- pwilcox(STATISTIC , n.x , n.y )
412
+ . pwilcox(u , n.x , n.y , z )
407
413
min(2 * p , 1 )
408
414
},
409
415
" greater" = {
410
- pwilcox(STATISTIC - 1 , n.x , n.y , lower.tail = FALSE )
416
+ . pwilcox(u - 1 , n.x , n.y , z , lower.tail = FALSE )
411
417
},
412
- " less" = pwilcox(STATISTIC , n.x , n.y ))
418
+ " less" = . pwilcox(u , n.x , n.y , z ))
413
419
}
414
420
415
421
.wilcox_test_two_cint_exact <-
416
- function (x , y , n.x , n.y , alternative , conf.level )
422
+ function (x , y , n.x , n.y , z , alternative , conf.level )
417
423
{
418
424
# # Exact confidence interval for the location parameter
419
425
# # mean(x) - mean(y) in the two-sample case (cf. the
420
426
# # one-sample case).
421
427
alpha <- 1 - conf.level
422
428
diffs <- sort(outer(x , y , `-` ))
429
+ w <- if (is.null(z ))
430
+ (n.x * n.y ) : 1L
431
+ else {
432
+ i <- seq_along(x )
433
+ m <- n.x * (n.x + 1 ) / 2
434
+ vapply(diffs , \(d ) sum(rank(c(x - d , y ))[i ]) - m , 0 )
435
+ }
423
436
CONF.INT <-
424
437
switch (alternative ,
425
438
" two.sided" = {
426
- qu <- qwilcox(alpha / 2 , n.x , n.y )
439
+ qu <- .qwilcox(alpha / 2 , n.x , n.y , z )
440
+ ql <- n.x * n.y - qu
441
+ lci <- if (qu < = min(w )) max(diffs )
442
+ else min(diffs [w < = qu ])
443
+ uci <- if (ql > = max(w )) min(diffs )
444
+ else max(diffs [w > ql ])
427
445
if (qu == 0 ) qu <- 1
428
- ql <- n.x * n.y - qu
429
- achieved.alpha <- 2 * pwilcox(trunc(qu )- 1 , n.x ,n.y )
430
- c(diffs [ qu ], diffs [ ql + 1 ] )
446
+ achieved.alpha <-
447
+ 2 * . pwilcox(trunc(qu ) - 1 , n.x , n.y , z )
448
+ c(uci , lci )
431
449
},
432
450
" greater" = {
433
- qu <- qwilcox(alpha , n.x , n.y )
451
+ qu <- .qwilcox(alpha , n.x , n.y , z )
452
+ ql <- n.x * n.y - qu
453
+ uci <- if (ql > = max(w )) min(diffs )
454
+ else max(diffs [w > ql ])
434
455
if (qu == 0 ) qu <- 1
435
- achieved.alpha <- pwilcox(trunc(qu )- 1 ,n.x ,n.y )
436
- c(diffs [qu ], + Inf )
456
+ achieved.alpha <-
457
+ .pwilcox(trunc(qu ) - 1 , n.x , n.y , z )
458
+ c(uci , + Inf )
437
459
},
438
460
" less" = {
439
- qu <- qwilcox(alpha , n.x , n.y )
461
+ qu <- .qwilcox(alpha , n.x , n.y , z )
462
+ lci <- if (qu < = min(w )) max(diffs )
463
+ else min(diffs [w < = qu ])
440
464
if (qu == 0 ) qu <- 1
441
- ql <- n.x * n.y - qu
442
- achieved.alpha <- pwilcox(trunc(qu )- 1 , n.x ,n.y )
443
- c(- Inf , diffs [ ql + 1 ] )
465
+ achieved.alpha <-
466
+ . pwilcox(trunc(qu ) - 1 , n.x , n.y , z )
467
+ c(- Inf , lci )
444
468
})
445
- if (achieved.alpha - alpha > alpha / 2 ) {
469
+ if (achieved.alpha - alpha > alpha / 2 ) {
446
470
warning(" Requested conf.level not achievable" )
447
471
conf.level <- 1 - achieved.alpha
448
472
}
449
473
attr(CONF.INT , " conf.level" ) <- conf.level
450
474
ESTIMATE <- c(" difference in location" = median(diffs ))
451
475
list (conf.int = CONF.INT , estimate = ESTIMATE )
452
- }
476
+ }
453
477
454
-
455
478
.wilcox_test_two_pval_asymp <-
456
- function (STATISTIC , MEAN , SIGMA , alternative , correct )
479
+ function (STAT , alternative , correct )
457
480
{
458
- z <- STATISTIC - MEAN
481
+ z <- STAT $ statistic - STAT $ ex
459
482
CORRECTION <- if (correct )
460
483
switch (alternative ,
461
484
" two.sided" = sign(z ) * 0.5 ,
462
485
" greater" = 0.5 ,
463
486
" less" = - 0.5 )
464
487
else
465
488
0
466
- z <- (z - CORRECTION ) / SIGMA
489
+ z <- (z - CORRECTION ) / STAT $ sd
467
490
switch (alternative ,
468
491
" less" = pnorm(z ),
469
492
" greater" = pnorm(z , lower.tail = FALSE ),
0 commit comments