Skip to content
Merged
Changes from 3 commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
107 changes: 71 additions & 36 deletions src/sshfs_mount/sshfs_mount_handler.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -56,7 +56,7 @@
// Check if snap support is installed in the instance
if (session.exec("which snap").exit_code() != 0)
{
mpl::log(mpl::Level::warning, category, fmt::format("Snap support is not installed in '{}'", name));
mpl::warn(category, "Snap support is not installed in '{}'", name);
throw std::runtime_error(
fmt::format("Snap support needs to be installed in '{}' in order to support mounts.\n"
"Please see https://docs.snapcraft.io/installing-snapd for information on\n"
Expand All @@ -70,15 +70,14 @@
// Check if multipass-sshfs is already installed
if (session.exec("sudo snap list multipass-sshfs").exit_code(std::chrono::seconds(15)) == 0)
{
mpl::log(mpl::Level::debug, category,
fmt::format("The multipass-sshfs snap is already installed on '{}'", name));
mpl::debug(category, "The multipass-sshfs snap is already installed on '{}'", name);
return true;
}

// Check if /snap exists for "classic" snap support
if (session.exec("[ -e /snap ]").exit_code() != 0)
{
mpl::log(mpl::Level::warning, category, fmt::format("Classic snap support symlink is needed in '{}'", name));
mpl::warn(category, "Classic snap support symlink is needed in '{}'", name);
throw std::runtime_error(
fmt::format("Classic snap support is not enabled for '{}'!\n\n"
"Please see https://docs.snapcraft.io/installing-snapd for information on\n"
Expand All @@ -92,22 +91,18 @@
void install_sshfs_for(const std::string& name, mp::SSHSession& session, const std::chrono::milliseconds& timeout)
try
{
mpl::log(mpl::Level::info, category, fmt::format("Installing the multipass-sshfs snap in '{}'", name));
mpl::info(category, "Installing the multipass-sshfs snap in '{}'", name);
auto proc = session.exec("sudo snap install multipass-sshfs");
if (proc.exit_code(timeout) != 0)
{
auto error_msg = proc.read_std_error();
mpl::log(mpl::Level::error,
category,
fmt::format("Failed to install 'multipass-sshfs': {}", mpu::trim_end(error_msg)));
mpl::error(category, "Failed to install 'multipass-sshfs': {}", mpu::trim_end(error_msg));
throw mp::SSHFSMissingError();
}
}
catch (const mp::ExitlessSSHProcessException& e)
{
mpl::log(mpl::Level::error,
category,
fmt::format("Could not install 'multipass-sshfs' in '{}': {}", name, e.what()));
mpl::error(category, "Could not install 'multipass-sshfs' in '{}': {}", name, e.what());
throw mp::SSHFSMissingError();
}
} // namespace
Expand All @@ -130,10 +125,7 @@
this->mount_spec.get_gid_mappings(),
this->mount_spec.get_uid_mappings()}
{
mpl::log(
mpl::Level::info,
category,
fmt::format("initializing mount {} => {} in '{}'", this->mount_spec.get_source_path(), target, vm->vm_name));
mpl::info(category, "initializing mount {} => {} in '{}'", this->mount_spec.get_source_path(), target, vm->vm_name);
}

bool SSHFSMountHandler::is_active()
Expand All @@ -151,8 +143,7 @@
}
catch (const std::exception& e)
{
mpl::log(mpl::Level::warning, category,
fmt::format("Failed checking SSHFS mount \"{}\" in instance '{}': {}", target, vm->vm_name, e.what()));
mpl::warn(category, "Failed checking SSHFS mount \"{}\" in instance '{}': {}", target, vm->vm_name, e.what());

Check warning on line 146 in src/sshfs_mount/sshfs_mount_handler.cpp

View check run for this annotation

Codecov / codecov/patch

src/sshfs_mount/sshfs_mount_handler.cpp#L146

Added line #L146 was not covered by tests
return false;
}

Expand Down Expand Up @@ -184,25 +175,29 @@
QObject::connect(process.get(), &Process::finished, [this](const ProcessState& exit_state) {
if (exit_state.completed_successfully())
{
mpl::log(mpl::Level::info, category,
fmt::format("Mount \"{}\" in instance '{}' has stopped", target, vm->vm_name));
mpl::info(category, "Mount \"{}\" in instance '{}' has stopped", target, vm->vm_name);

Check warning on line 178 in src/sshfs_mount/sshfs_mount_handler.cpp

View check run for this annotation

Codecov / codecov/patch

src/sshfs_mount/sshfs_mount_handler.cpp#L178

Added line #L178 was not covered by tests
}
else
{
// not error as it failing can indicate we need to install sshfs in the VM
mpl::log(mpl::Level::warning, category,
fmt::format("Mount \"{}\" in instance '{}' has stopped unsuccessfully: {}", target, vm->vm_name,
exit_state.failure_message()));
mpl::warn(category,
"Mount \"{}\" in instance '{}' has stopped unsuccessfully: {}",
target,
vm->vm_name,
exit_state.failure_message());
}
});
QObject::connect(process.get(), &Process::error_occurred, [this](auto error, auto error_string) {
mpl::log(mpl::Level::error, category,
fmt::format("There was an error with sshfs_server for instance '{}' with path \"{}\": {} - {}",
vm->vm_name, target, mpu::qenum_to_string(error), error_string));
mpl::error(category,

Check warning on line 191 in src/sshfs_mount/sshfs_mount_handler.cpp

View check run for this annotation

Codecov / codecov/patch

src/sshfs_mount/sshfs_mount_handler.cpp#L191

Added line #L191 was not covered by tests
"There was an error with sshfs_server for instance '{}' with path \"{}\": {} - {}",
vm->vm_name,
target,

Check warning on line 194 in src/sshfs_mount/sshfs_mount_handler.cpp

View check run for this annotation

Codecov / codecov/patch

src/sshfs_mount/sshfs_mount_handler.cpp#L193-L194

Added lines #L193 - L194 were not covered by tests
mpu::qenum_to_string(error),
error_string);
});

mpl::log(mpl::Level::info, category, fmt::format("process program '{}'", process->program()));
mpl::log(mpl::Level::info, category, fmt::format("process arguments '{}'", process->arguments().join(", ")));
mpl::info(category, "process program '{}'", process->program());
mpl::info(category, "process arguments '{}'", process->arguments().join(", "));

start_and_block_until_connected(process.get());
// after the process is started, it must be moved to the main thread
Expand All @@ -223,23 +218,63 @@

void SSHFSMountHandler::deactivate_impl(bool force)
{
mpl::log(mpl::Level::info, category, fmt::format("Stopping mount \"{}\" in instance '{}'", target, vm->vm_name));
mpl::info(category, fmt::format("Stopping mount \"{}\" in instance '{}'", target, vm->vm_name));
QObject::disconnect(process.get(), &Process::error_occurred, nullptr, nullptr);
if (process->terminate(); !process->wait_for_finished(5000))

try
{
auto err = fmt::format("Failed to terminate SSHFS mount process: {}", process->read_all_standard_error());
if (force)
mpl::log(
mpl::Level::warning, category,
fmt::format("Failed to gracefully stop mount \"{}\" in instance '{}': {}", target, vm->vm_name, err));
else
throw std::runtime_error{err};
if (process->terminate(); !process->wait_for_finished(5000))
{
auto err = fmt::format("Failed to terminate SSHFS mount process: {}", process->read_all_standard_error());
if (force)
Copy link
Collaborator

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Shouldn't we process->kill() in this case? Question for @andrei-toterman too.

Copy link
Member Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Honestly, I have no clue. I reckon it would depend on whether destroying the process object also implies that it's going to be forcefully killed or not. If not, it might be a good idea to kill it. I think @andrei-toterman can better reason than I can here.

Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Yes, I think we should manually kill here. Yes, destroying the object implies killing the process, but in our case, we store the process object inside a qt_delete_later_unique_ptr, so the process won't be killed when we request it to, but only when the control goes back to the Qt event loop. If we were using a regular unique pointer, the .reset() below would kill the process immediately.

mpl::warn(category,
"Failed to gracefully stop mount \"{}\" in instance '{}': {}",
target,
vm->vm_name,
err);
else
throw std::runtime_error{err};

Check warning on line 236 in src/sshfs_mount/sshfs_mount_handler.cpp

View check run for this annotation

Codecov / codecov/patch

src/sshfs_mount/sshfs_mount_handler.cpp#L236

Added line #L236 was not covered by tests
}
}
catch (...)

Check warning on line 239 in src/sshfs_mount/sshfs_mount_handler.cpp

View check run for this annotation

Codecov / codecov/patch

src/sshfs_mount/sshfs_mount_handler.cpp#L239

Added line #L239 was not covered by tests
{
// Give up waiting for the `finished` signal before the process is destroyed.
QObject::disconnect(process.get(), &Process::finished, nullptr, nullptr);
throw;

Check warning on line 243 in src/sshfs_mount/sshfs_mount_handler.cpp

View check run for this annotation

Codecov / codecov/patch

src/sshfs_mount/sshfs_mount_handler.cpp#L242-L243

Added lines #L242 - L243 were not covered by tests
}
process.reset();
}

SSHFSMountHandler::~SSHFSMountHandler()
{
deactivate(/*force=*/true);

/*
* Ensure that the signals are disconnected.
*
* The `is_active` function has some interesting logic going on, which in turn
* prevents `deactivate_impl` to be run. That means the disconnect for `error_occured`
* does not happen, and it can possibly be triggered while the mount handler & process
* objects are not alive. Sounds fun? I don't think so either.
*
* We're disconnecting the signals here because it should be done as a part
* of the destructor regardless of other stuff.
*
* FIXME: Find a way to associate slot lifetime with the process itself.
*/
if (process)
{
constexpr std::string_view warn_msg_fmtstr =
"Stopped listening to sshfs_server process only upon SSHFSMountHandler destruction";
// The disconnect() is a no-op when nothing is connected.
if (QObject::disconnect(process.get(), &Process::finished, nullptr, nullptr))
{
mpl::warn(category, warn_msg_fmtstr);
}
if (QObject::disconnect(process.get(), &Process::error_occurred, nullptr, nullptr))
{
mpl::warn(category, warn_msg_fmtstr);
}
}
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

That is already the case, technically. The connection is broken automatically when the process is destroyed. But that is not aligned with the destruction of the mount handler, since the process is behind a qt_delete_later_unique_ptr. So the mount handler is destroyed, it calls Qt's deleteLater on the process object, and after control goes back to the Qt event loop, the process object is destroyed, which kills the process, which emits the finished signal, whose slot references this mount handler, and that is what leads to the crash, IIUC. So what we need to do is tie the lifetime of those connections to the lifetime of this mount handler, since the slots reference the mount handlers. And this is basically what you are doing here with the manual disconnects. But that makes it so when the process is actually killed later, we won't get those log messages (maybe we don't even need them in this case, so that's fine). One other avenue worth exploring is to move to a regular unique pointer instead of using deleteLater. That way, the process object would be destroyed (so disconnected and killed too) together with the mount handler. Unfortunately I do not remember why we're using the deleteLater option in this case, so aother option would be to see if the issue still occurs if the slots do not reference the mount handler (i.e. no [this] in the slot lambdas by pre-computing the part of the log message that access the mount handler data), and that way we won't need to manually disconnect those things, and the slots should be able to run when Qt actually deletes the process.

Copy link
Member Author

@xmkg xmkg Mar 19, 2025

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Everything makes more sense now @andrei-toterman, thanks for the explanation!

One other avenue worth exploring is to move to a regular unique pointer instead of using deleteLater

Yeah, that seems a good option, but I wonder about the same question you've asked: "why we opted to use the qt_delete_later_unique_ptr in the first place?"

But that makes it so when the process is actually killed later, we won't get those log messages (maybe we don't even need them in this case, so that's fine)

The log messages are just here to indicate that an abnormal thing has just happened. This kind of detachment shouldn't happen normally.
EDIT: I realized that you're referring to the signal handler log messages. Good point.

After reading both of your comments I think we're better off using std::unique_ptr<> here because it would also solve the kill() problem:

If we were using a regular unique pointer, the .reset() below would kill the process immediately.

I'll dig into what's the reason behind using the delete_later thingy. It might be a legacy QT GUI requirement, and if that's the case we can ditch it for good.

Copy link
Member Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

So it seems like the QT objects' destruction should happen in the event loop to ensure signal-slot safety for the QT side of things. We can use shared_from_this(), or a weak_ptr to track the liveliness of the mount handler, but that would mean the mount handler object has to be a shared_ptr. That might make things more complicated than they need to be.

Back to the drawing board. A good part of the problem comes from the "is_active()" implementation. The base class uses the is_active function to check if the mount handler has called the activate_impl, and succeeded. But SSHFSMountHandler's is_active implementation does something else entirely:

a-) checks whether the process object is alive
b-) checks whether the process is still running
c-) checks whether the mount is present in the guest or not

The current implementation looks more like a "health check" function than an is_active function. One of them being not true is enough to prevent deactivate_impl from running, which contradicts the design of the MountHandler. In reality, the handler can be "active" while being in an error state, or being disfunctional. As far as my understanding goes, every activate() must be eventually finalized by a deactivate(). Hence, I think we're better off without the SSHFSMountHandler::is_active() override. We can introduce an is_healthy() function if that's something needed, but I didn't see any use of is_active() other than the base class activate() and deactivate() functions.

Let me know what you think @ricab @andrei-toterman.

Copy link
Collaborator

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Yep, that makes much more sense to me too!

Copy link
Member Author

@xmkg xmkg Mar 21, 2025

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

So, I've simplified things quite a lot:

  • Moved the disconnect() to the QtDeleteLater custom deleter itself:
    Rationale: In all cases, the owner scope of the qt_delete_later_unique_ptr is not a part of the QT object hierarchy, and the signal handlers are also part of the owner scope, which means signal handlers' lifetime is bound to the owner scope. Therefore it's better to ensure that all signals are disconnected before deferring deletion to the QT event loop.
  • Added process->kill() to the deactivate_impl's force{} block.
  • Added a comment explaining a potential gotcha for the one and only other user of the qt_delete_later_unique_ptr, NewReleaseMonitor.

@ricab @andrei-toterman let me know what you think of the new approach.

}
} // namespace multipass
Loading