Skip to content

Commit 4b1815a

Browse files
niklas88hcahca
authored andcommitted
s390/pci: Allow re-add of a reserved but not yet removed device
The architecture assumes that PCI functions can be removed synchronously as PCI events are processed. This however clashes with the reference counting of struct pci_dev which allows device drivers to hold on to a struct pci_dev reference even as the underlying device is removed. To bridge this gap commit 2a671f7 ("s390/pci: fix use after free of zpci_dev") keeps the struct zpci_dev in ZPCI_FN_STATE_RESERVED state until common code releases the struct pci_dev. Only when all references are dropped, the struct zpci_dev can be removed and freed. Later commit a46044a ("s390/pci: fix zpci_zdev_put() on reserve") moved the deletion of the struct zpci_dev from the zpci_list in zpci_release_device() to the point where the device is reserved. This was done to prevent handling events for a device that is already being removed, e.g. when the platform generates both PCI event codes 0x304 and 0x308. In retrospect, deletion from the zpci_list in the release function without holding the zpci_list_lock was also racy. A side effect of this handling is that if the underlying device re-appears while the struct zpci_dev is in the ZPCI_FN_STATE_RESERVED state, the new and old instances of the struct zpci_dev and/or struct pci_dev may clash. For example when trying to create the IOMMU sysfs files for the new instance. In this case, re-adding the new instance is aborted. The old instance is removed, and the device will remain absent until the platform issues another event. Fix this by allowing the struct zpci_dev to be brought back up right until it is finally removed. To this end also keep the struct zpci_dev in the zpci_list until it is finally released when all references have been dropped. Deletion from the zpci_list from within the release function is made safe by using kref_put_lock() with the zpci_list_lock. This ensures that the releasing code holds the last reference. Cc: [email protected] Fixes: a46044a ("s390/pci: fix zpci_zdev_put() on reserve") Reviewed-by: Gerd Bayer <[email protected]> Tested-by: Gerd Bayer <[email protected]> Signed-off-by: Niklas Schnelle <[email protected]> Signed-off-by: Heiko Carstens <[email protected]>
1 parent 47c3978 commit 4b1815a

File tree

3 files changed

+45
-16
lines changed

3 files changed

+45
-16
lines changed

arch/s390/pci/pci.c

Lines changed: 22 additions & 10 deletions
Original file line numberDiff line numberDiff line change
@@ -70,6 +70,13 @@ EXPORT_SYMBOL_GPL(zpci_aipb);
7070
struct airq_iv *zpci_aif_sbv;
7171
EXPORT_SYMBOL_GPL(zpci_aif_sbv);
7272

73+
void zpci_zdev_put(struct zpci_dev *zdev)
74+
{
75+
if (!zdev)
76+
return;
77+
kref_put_lock(&zdev->kref, zpci_release_device, &zpci_list_lock);
78+
}
79+
7380
struct zpci_dev *get_zdev_by_fid(u32 fid)
7481
{
7582
struct zpci_dev *tmp, *zdev = NULL;
@@ -925,21 +932,20 @@ int zpci_deconfigure_device(struct zpci_dev *zdev)
925932
* @zdev: the zpci_dev that was reserved
926933
*
927934
* Handle the case that a given zPCI function was reserved by another system.
928-
* After a call to this function the zpci_dev can not be found via
929-
* get_zdev_by_fid() anymore but may still be accessible via existing
930-
* references though it will not be functional anymore.
931935
*/
932936
void zpci_device_reserved(struct zpci_dev *zdev)
933937
{
934-
/*
935-
* Remove device from zpci_list as it is going away. This also
936-
* makes sure we ignore subsequent zPCI events for this device.
937-
*/
938-
spin_lock(&zpci_list_lock);
939-
list_del(&zdev->entry);
940-
spin_unlock(&zpci_list_lock);
938+
lockdep_assert_held(&zdev->state_lock);
939+
/* We may declare the device reserved multiple times */
940+
if (zdev->state == ZPCI_FN_STATE_RESERVED)
941+
return;
941942
zdev->state = ZPCI_FN_STATE_RESERVED;
942943
zpci_dbg(3, "rsv fid:%x\n", zdev->fid);
944+
/*
945+
* The underlying device is gone. Allow the zdev to be freed
946+
* as soon as all other references are gone by accounting for
947+
* the removal as a dropped reference.
948+
*/
943949
zpci_zdev_put(zdev);
944950
}
945951

@@ -948,6 +954,12 @@ void zpci_release_device(struct kref *kref)
948954
struct zpci_dev *zdev = container_of(kref, struct zpci_dev, kref);
949955

950956
WARN_ON(zdev->state != ZPCI_FN_STATE_RESERVED);
957+
/*
958+
* We already hold zpci_list_lock thanks to kref_put_lock().
959+
* This makes sure no new reference can be taken from the list.
960+
*/
961+
list_del(&zdev->entry);
962+
spin_unlock(&zpci_list_lock);
951963

952964
if (zdev->has_hp_slot)
953965
zpci_exit_slot(zdev);

arch/s390/pci/pci_bus.h

Lines changed: 2 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -21,11 +21,8 @@ int zpci_bus_scan_device(struct zpci_dev *zdev);
2121
void zpci_bus_remove_device(struct zpci_dev *zdev, bool set_error);
2222

2323
void zpci_release_device(struct kref *kref);
24-
static inline void zpci_zdev_put(struct zpci_dev *zdev)
25-
{
26-
if (zdev)
27-
kref_put(&zdev->kref, zpci_release_device);
28-
}
24+
25+
void zpci_zdev_put(struct zpci_dev *zdev);
2926

3027
static inline void zpci_zdev_get(struct zpci_dev *zdev)
3128
{

arch/s390/pci/pci_event.c

Lines changed: 21 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -335,6 +335,22 @@ static void zpci_event_hard_deconfigured(struct zpci_dev *zdev, u32 fh)
335335
zdev->state = ZPCI_FN_STATE_STANDBY;
336336
}
337337

338+
static void zpci_event_reappear(struct zpci_dev *zdev)
339+
{
340+
lockdep_assert_held(&zdev->state_lock);
341+
/*
342+
* The zdev is in the reserved state. This means that it was presumed to
343+
* go away but there are still undropped references. Now, the platform
344+
* announced its availability again. Bring back the lingering zdev
345+
* to standby. This is safe because we hold a temporary reference
346+
* now so that it won't go away. Account for the re-appearance of the
347+
* underlying device by incrementing the reference count.
348+
*/
349+
zdev->state = ZPCI_FN_STATE_STANDBY;
350+
zpci_zdev_get(zdev);
351+
zpci_dbg(1, "rea fid:%x, fh:%x\n", zdev->fid, zdev->fh);
352+
}
353+
338354
static void __zpci_event_availability(struct zpci_ccdf_avail *ccdf)
339355
{
340356
struct zpci_dev *zdev = get_zdev_by_fid(ccdf->fid);
@@ -358,8 +374,10 @@ static void __zpci_event_availability(struct zpci_ccdf_avail *ccdf)
358374
break;
359375
}
360376
} else {
377+
if (zdev->state == ZPCI_FN_STATE_RESERVED)
378+
zpci_event_reappear(zdev);
361379
/* the configuration request may be stale */
362-
if (zdev->state != ZPCI_FN_STATE_STANDBY)
380+
else if (zdev->state != ZPCI_FN_STATE_STANDBY)
363381
break;
364382
zdev->state = ZPCI_FN_STATE_CONFIGURED;
365383
}
@@ -375,6 +393,8 @@ static void __zpci_event_availability(struct zpci_ccdf_avail *ccdf)
375393
break;
376394
}
377395
} else {
396+
if (zdev->state == ZPCI_FN_STATE_RESERVED)
397+
zpci_event_reappear(zdev);
378398
zpci_update_fh(zdev, ccdf->fh);
379399
}
380400
break;

0 commit comments

Comments
 (0)