@@ -399,6 +399,29 @@ def __init__(
399
399
)
400
400
401
401
402
+ def _directional_distance (equations , points ):
403
+ """
404
+ Computes the distance of the points to the planes defined by the equations
405
+ with respect to the direction of the first dimension.
406
+
407
+ equations : ndarray of shape (n_facets, n_dim)
408
+ each row contains the coefficienst for the plane equation of the form
409
+ equations[i, 0]*x_1 + ...
410
+ + equations[i, -2]*x_{n_dim} = equations[i, -1]
411
+ -equations[i, -1] is the offset
412
+ points : ndarray of shape (n_samples, n_dim)
413
+ points to compute the directional distance from
414
+
415
+ Returns
416
+ -------
417
+ directional_distance : ndarray of shape (nsamples, nequations)
418
+ closest distance wrt. the first dimension of the point to the planes
419
+ defined by the equations
420
+ """
421
+ orthogonal_distances = - (points @ equations [:, :- 1 ].T ) - equations [:, - 1 :].T
422
+ return - orthogonal_distances / equations [:, :1 ].T
423
+
424
+
402
425
class DirectionalConvexHull :
403
426
"""
404
427
Performs Sample Selection by constructing a Directional Convex Hull and determining the distance to the hull as outlined in the reference
@@ -411,6 +434,14 @@ class DirectionalConvexHull:
411
434
directional convex hull construction (also known as the low-
412
435
dimensional (LD) hull). By default [0] is used.
413
436
437
+ tolerance : float, default=1.0E-12
438
+ Tolerance for the negative distances to the directional
439
+ convex hull to consider a point below the convex hull. Depending
440
+ if a point is below or above the convex hull the distance is
441
+ differently computed. A very low value can result in a completely
442
+ wrong distances. Distances cannot be distinguished from zero up
443
+ to tolerance. It is recommended to leave the default setting.
444
+
414
445
Attributes
415
446
----------
416
447
@@ -426,22 +457,20 @@ class DirectionalConvexHull:
426
457
Interpolater for the features in the high-
427
458
dimensional space
428
459
429
- interpolator_y_ : scipy.interpolate.interpnd.LinearNDInterpolator
430
- Interpolater for the targets y
431
-
432
460
References
433
461
----------
434
462
.. [dch] A. Anelli, E. A. Engel, C. J. Pickard and M. Ceriotti,
435
463
Physical Review Materials, 2018.
436
464
"""
437
465
438
- def __init__ (self , low_dim_idx = None ):
466
+ def __init__ (self , low_dim_idx = None , tolerance = 1e-12 ):
439
467
self .low_dim_idx = low_dim_idx
440
468
441
469
if low_dim_idx is None :
442
470
self .low_dim_idx = [0 ]
443
471
else :
444
472
self .low_dim_idx = low_dim_idx
473
+ self .tolerance = tolerance
445
474
446
475
def fit (self , X , y ):
447
476
"""
@@ -490,39 +519,43 @@ def fit(self, X, y):
490
519
# create high-dimensional feature matrix
491
520
high_dim_feats = X [:, self .high_dim_idx_ ]
492
521
# get scipy convex hull
493
- convex_hull = ConvexHull (convex_hull_data )
522
+ self . convex_hull_ = ConvexHull (convex_hull_data , incremental = True )
494
523
# get normal equations to the hull simplices
495
- y_normal = convex_hull .equations [:, 0 ]
524
+ y_normal = self . convex_hull_ .equations [:, 0 ]
496
525
497
526
# get vertices_idx of the convex hull
498
- self .selected_idx_ = np .unique (
499
- convex_hull .simplices [np .where (y_normal < 0 )[0 ]].flatten ()
500
- )
527
+ directional_facets_idx = np .where (y_normal < 0 )[0 ]
528
+ self .directional_simplices_ = self .convex_hull_ .simplices [
529
+ directional_facets_idx
530
+ ]
531
+ self ._directional_equations_ = self .convex_hull_ .equations [
532
+ directional_facets_idx
533
+ ]
534
+
535
+ self .selected_idx_ = np .unique (self .directional_simplices_ .flatten ())
536
+ self ._directional_points_ = convex_hull_data [self .selected_idx_ ]
501
537
502
538
# required for the score_feature_matrix function
503
539
self .interpolator_high_dim_ = _linear_interpolator (
504
540
points = convex_hull_data [self .selected_idx_ , 1 :],
505
541
values = high_dim_feats [self .selected_idx_ ],
506
542
)
507
543
508
- # required to compute the distance of the low-dimensional feature to the convex hull
509
- self .interpolator_y_ = _linear_interpolator (
510
- points = convex_hull_data [self .selected_idx_ , 1 :],
511
- values = convex_hull_data [self .selected_idx_ , 0 ],
512
- )
513
-
514
544
return self
515
545
546
+ @property
547
+ def directional_vertices_ (self ):
548
+ return self .selected_idx_
549
+
516
550
def _check_X_y (self , X , y ):
517
- return check_X_y (X , y , ensure_min_features = 2 , multi_output = False )
551
+ return check_X_y (X , y , ensure_min_features = 1 , multi_output = False )
518
552
519
553
def _check_is_fitted (self , X ):
520
554
check_is_fitted (
521
555
self ,
522
556
[
523
557
"high_dim_idx_" ,
524
558
"interpolator_high_dim_" ,
525
- "interpolator_y_" ,
526
559
"selected_idx_" ,
527
560
],
528
561
)
@@ -556,7 +589,7 @@ def score_samples(self, X, y):
556
589
557
590
Returns
558
591
-------
559
- dch_distance : numpy.array of shape (n_samples, len(high_dim_idx_))
592
+ dch_distance : ndarray of shape (n_samples, len(high_dim_idx_))
560
593
The distance (residuals) of samples to the convex hull in
561
594
the higher-dimensional space.
562
595
"""
@@ -565,17 +598,46 @@ def score_samples(self, X, y):
565
598
566
599
# features used for the convex hull construction
567
600
low_dim_feats = X [:, self .low_dim_idx ]
601
+ hull_space_data = np .hstack ((y .reshape (- 1 , 1 ), low_dim_feats ))
568
602
569
603
# the X points projected on the convex surface
570
- interpolated_y = self .interpolator_y_ ( low_dim_feats ).reshape (y .shape )
604
+ return self ._directional_convex_hull_distance ( hull_space_data ).reshape (y .shape )
571
605
572
- if np .any (np .isnan (interpolated_y )):
573
- warnings .warn (
574
- "There are samples in X with a low-dimensional part that is outside of the range of the convex surface. Distance will contain nans." ,
575
- UserWarning ,
576
- )
606
+ def _directional_convex_hull_distance (self , points ):
607
+ """
608
+ Get the distance to the fitted directional convex hull in the target dimension y.
609
+
610
+ If a point lies above the convex hull, it will have a positive distance.
611
+ If a point lies below the convex hull, it will have a negative distance - this should only
612
+ occur if you pass a point that the convex hull has not been fitted on in the `fit` function.
613
+ If a point lies on the convex hull, it will have a distance of zero.
577
614
578
- return y - interpolated_y
615
+ Parameters
616
+ ----------
617
+ points : The points for which you would like to calculate the distance to the
618
+ fitted directional convex hull.
619
+ """
620
+ # directional distances to all plane equations
621
+ all_directional_distances = _directional_distance (
622
+ self ._directional_equations_ , points
623
+ )
624
+ print (all_directional_distances )
625
+ # we get negative distances for each plane to check if any distance is below the threshold
626
+ below_directional_convex_hull = np .any (
627
+ all_directional_distances < - self .tolerance , axis = 1
628
+ )
629
+ # directional distances to corresponding plane equation
630
+ directional_distances = np .zeros (len (points ))
631
+ directional_distances [~ below_directional_convex_hull ] = np .min (
632
+ all_directional_distances [~ below_directional_convex_hull ], axis = 1
633
+ )
634
+ # some distances can be positive, so we take the max of all negative distances
635
+ negative_directional_distances = all_directional_distances .copy ()
636
+ negative_directional_distances [all_directional_distances > 0 ] = - np .inf
637
+ directional_distances [below_directional_convex_hull ] = np .max (
638
+ negative_directional_distances [below_directional_convex_hull ], axis = 1
639
+ )
640
+ return directional_distances
579
641
580
642
def score_feature_matrix (self , X ):
581
643
"""
0 commit comments