diff --git a/pageserver/src/http/routes.rs b/pageserver/src/http/routes.rs
index 26cd02e5edcb..6457d55dff5f 100644
--- a/pageserver/src/http/routes.rs
+++ b/pageserver/src/http/routes.rs
@@ -398,9 +398,17 @@ async fn tenant_attach_handler(request: Request
) -> Result,
let state = get_state(&request);
if let Some(remote_storage) = &state.remote_storage {
- mgr::attach_tenant(state.conf, tenant_id, remote_storage.clone(), &ctx)
- .instrument(info_span!("tenant_attach", tenant = %tenant_id))
- .await?;
+ mgr::attach_tenant(
+ state.conf,
+ tenant_id,
+ // XXX: Attach should provide the config, especially during tenant migration.
+ // See https://github.com/neondatabase/neon/issues/1555
+ TenantConfOpt::default(),
+ remote_storage.clone(),
+ &ctx,
+ )
+ .instrument(info_span!("tenant_attach", tenant = %tenant_id))
+ .await?;
} else {
return Err(ApiError::BadRequest(anyhow!(
"attach_tenant is not possible because pageserver was configured without remote storage"
diff --git a/pageserver/src/tenant.rs b/pageserver/src/tenant.rs
index 9ab02624074b..cccad3e4d5ea 100644
--- a/pageserver/src/tenant.rs
+++ b/pageserver/src/tenant.rs
@@ -602,12 +602,9 @@ impl Tenant {
remote_storage: GenericRemoteStorage,
ctx: &RequestContext,
) -> anyhow::Result> {
- // XXX: Attach should provide the config, especially during tenant migration.
- // See https://github.com/neondatabase/neon/issues/1555
- let tenant_conf = TenantConfOpt::default();
-
- Self::attach_idempotent_create_marker_file(conf, tenant_id)
- .context("create attach marker file")?;
+ // TODO dedup with spawn_load
+ let tenant_conf =
+ Self::load_tenant_config(conf, tenant_id).context("load tenant config")?;
let wal_redo_manager = Arc::new(PostgresRedoManager::new(conf, tenant_id));
let tenant = Arc::new(Tenant::new(
@@ -644,45 +641,6 @@ impl Tenant {
Ok(tenant)
}
- fn attach_idempotent_create_marker_file(
- conf: &'static PageServerConf,
- tenant_id: TenantId,
- ) -> anyhow::Result<()> {
- // Create directory with marker file to indicate attaching state.
- // The load_local_tenants() function in tenant::mgr relies on the marker file
- // to determine whether a tenant has finished attaching.
- let tenant_dir = conf.tenant_path(&tenant_id);
- let marker_file = conf.tenant_attaching_mark_file_path(&tenant_id);
- debug_assert_eq!(marker_file.parent().unwrap(), tenant_dir);
- // TODO: should use tokio::fs here, but
- // 1. caller is not async, for good reason (it holds tenants map lock)
- // 2. we'd need to think about cancel safety. Turns out dropping a tokio::fs future
- // doesn't wait for the activity in the fs thread pool.
- crashsafe::create_dir_all(&tenant_dir).context("create tenant directory")?;
- match fs::OpenOptions::new()
- .write(true)
- .create_new(true)
- .open(&marker_file)
- {
- Ok(_) => {}
- Err(e) if e.kind() == std::io::ErrorKind::AlreadyExists => {
- // Either this is a retry of attach or there is a concurrent task also doing attach for this tenant.
- // We cannot distinguish this here.
- // The caller is responsible for ensuring there's no concurrent attach for a tenant.
- {} // fsync again, we don't know if that already happened
- }
- err => {
- err.context("create tenant attaching marker file")?;
- unreachable!("we covered the Ok() case above");
- }
- }
- crashsafe::fsync_file_and_parent(&marker_file)
- .context("fsync tenant attaching marker file and parent")?;
- debug_assert!(tenant_dir.is_dir());
- debug_assert!(marker_file.is_file());
- Ok(())
- }
-
///
/// Background task that downloads all data for a tenant and brings it to Active state.
///
@@ -2118,6 +2076,7 @@ impl Tenant {
// enough to just fsync it always.
crashsafe::fsync(target_config_parent)?;
+ // XXX we're not fsyncing the parent dir, need to do that in case `creating_tenant`
Ok(())
};
@@ -2761,15 +2720,23 @@ fn remove_timeline_and_uninit_mark(timeline_dir: &Path, uninit_mark: &Path) -> a
Ok(())
}
+pub(crate) enum CreateTenantFilesMode {
+ Create,
+ Attach,
+}
+
pub(crate) fn create_tenant_files(
conf: &'static PageServerConf,
tenant_conf: TenantConfOpt,
tenant_id: TenantId,
+ mode: CreateTenantFilesMode,
) -> anyhow::Result {
let target_tenant_directory = conf.tenant_path(&tenant_id);
anyhow::ensure!(
- !target_tenant_directory.exists(),
- "cannot create new tenant repo: '{tenant_id}' directory already exists",
+ !target_tenant_directory
+ .try_exists()
+ .context("check existence of tenant directory")?,
+ "tenant directory already exists",
);
let temporary_tenant_dir =
@@ -2791,6 +2758,7 @@ pub(crate) fn create_tenant_files(
conf,
tenant_conf,
tenant_id,
+ mode,
&temporary_tenant_dir,
&target_tenant_directory,
);
@@ -2815,9 +2783,28 @@ fn try_create_target_tenant_dir(
conf: &'static PageServerConf,
tenant_conf: TenantConfOpt,
tenant_id: TenantId,
+ mode: CreateTenantFilesMode,
temporary_tenant_dir: &Path,
target_tenant_directory: &Path,
) -> Result<(), anyhow::Error> {
+ match mode {
+ CreateTenantFilesMode::Create => {} // needs no attach marker, writing tenant conf + atomic rename of dir is good enough
+ CreateTenantFilesMode::Attach => {
+ let attach_marker_path = temporary_tenant_dir.join(TENANT_ATTACHING_MARKER_FILENAME);
+ let file = std::fs::OpenOptions::new()
+ .create_new(true)
+ .write(true)
+ .open(&attach_marker_path)
+ .with_context(|| {
+ format!("could not create attach marker file {attach_marker_path:?}")
+ })?;
+ file.sync_all().with_context(|| {
+ format!("could not sync attach marker file: {attach_marker_path:?}")
+ })?;
+ // fsync of the directory in which the file resides comes later in this function
+ }
+ }
+
let temporary_tenant_timelines_dir = rebase_directory(
&conf.timelines_path(&tenant_id),
target_tenant_directory,
@@ -2844,6 +2831,11 @@ fn try_create_target_tenant_dir(
anyhow::bail!("failpoint tenant-creation-before-tmp-rename");
});
+ // Make sure the current tenant directory entries are durable before renaming.
+ // Without this, a crash may reorder any of the directory entry creations above.
+ crashsafe::fsync(temporary_tenant_dir)
+ .with_context(|| format!("sync temporary tenant directory {temporary_tenant_dir:?}"))?;
+
fs::rename(temporary_tenant_dir, target_tenant_directory).with_context(|| {
format!(
"move tenant {} temporary directory {} into the permanent one {}",
diff --git a/pageserver/src/tenant/mgr.rs b/pageserver/src/tenant/mgr.rs
index 8191880bb562..1542d34a6632 100644
--- a/pageserver/src/tenant/mgr.rs
+++ b/pageserver/src/tenant/mgr.rs
@@ -19,7 +19,7 @@ use crate::config::PageServerConf;
use crate::context::{DownloadBehavior, RequestContext};
use crate::task_mgr::{self, TaskKind};
use crate::tenant::config::TenantConfOpt;
-use crate::tenant::{Tenant, TenantState};
+use crate::tenant::{create_tenant_files, CreateTenantFilesMode, Tenant, TenantState};
use crate::IGNORED_TENANT_FILE_NAME;
use utils::fs_ext::PathExt;
@@ -282,9 +282,15 @@ pub async fn create_tenant(
// We're holding the tenants lock in write mode while doing local IO.
// If this section ever becomes contentious, introduce a new `TenantState::Creating`
// and do the work in that state.
- let tenant_directory = super::create_tenant_files(conf, tenant_conf, tenant_id)?;
+ let tenant_directory = super::create_tenant_files(conf, tenant_conf, tenant_id, CreateTenantFilesMode::Create)?;
+ // TODO: tenant directory remains on disk if we bail out from here on.
+ // See https://github.com/neondatabase/neon/issues/4233
+
let created_tenant =
schedule_local_tenant_processing(conf, &tenant_directory, remote_storage, ctx)?;
+ // TODO: tenant object & its background loops remain, untracked in tenant map, if we fail here.
+ // See https://github.com/neondatabase/neon/issues/4233
+
let crated_tenant_id = created_tenant.tenant_id();
anyhow::ensure!(
tenant_id == crated_tenant_id,
@@ -466,19 +472,32 @@ pub async fn list_tenants() -> Result, TenantMapLis
pub async fn attach_tenant(
conf: &'static PageServerConf,
tenant_id: TenantId,
+ tenant_conf: TenantConfOpt,
remote_storage: GenericRemoteStorage,
ctx: &RequestContext,
) -> Result<(), TenantMapInsertError> {
tenant_map_insert(tenant_id, |vacant_entry| {
- let tenant_path = conf.tenant_path(&tenant_id);
+ let tenant_dir = create_tenant_files(conf, tenant_conf, tenant_id, CreateTenantFilesMode::Attach)?;
+ // TODO: tenant directory remains on disk if we bail out from here on.
+ // See https://github.com/neondatabase/neon/issues/4233
+
+ // Without the attach marker, schedule_local_tenant_processing will treat the attached tenant as fully attached
+ let marker_file_exists = conf
+ .tenant_attaching_mark_file_path(&tenant_id)
+ .try_exists()
+ .context("check for attach marker file existence")?;
+ anyhow::ensure!(marker_file_exists, "create_tenant_files should have created the attach marker file");
+
+ let attached_tenant = schedule_local_tenant_processing(conf, &tenant_dir, Some(remote_storage), ctx)?;
+ // TODO: tenant object & its background loops remain, untracked in tenant map, if we fail here.
+ // See https://github.com/neondatabase/neon/issues/4233
+
+ let attached_tenant_id = attached_tenant.tenant_id();
anyhow::ensure!(
- !tenant_path.exists(),
- "Cannot attach tenant {tenant_id}, local tenant directory already exists"
+ tenant_id == attached_tenant_id,
+ "loaded created tenant has unexpected tenant id (expect {tenant_id} != actual {attached_tenant_id})",
);
-
- let tenant =
- Tenant::spawn_attach(conf, tenant_id, remote_storage, ctx).context("spawn_attach")?;
- vacant_entry.insert(tenant);
+ vacant_entry.insert(Arc::clone(&attached_tenant));
Ok(())
})
.await
diff --git a/test_runner/regress/test_remote_storage.py b/test_runner/regress/test_remote_storage.py
index cce9cdc17529..02f1aac99ca6 100644
--- a/test_runner/regress/test_remote_storage.py
+++ b/test_runner/regress/test_remote_storage.py
@@ -83,9 +83,7 @@ def test_remote_storage_backup_and_restore(
env.pageserver.allowed_errors.append(".*failed to load remote timeline.*")
# we have a bunch of pytest.raises for these below
env.pageserver.allowed_errors.append(".*tenant .*? already exists, state:.*")
- env.pageserver.allowed_errors.append(
- ".*Cannot attach tenant .*?, local tenant directory already exists.*"
- )
+ env.pageserver.allowed_errors.append(".*tenant directory already exists.*")
env.pageserver.allowed_errors.append(".*simulated failure of remote operation.*")
pageserver_http = env.pageserver.http_client()
diff --git a/test_runner/regress/test_tenant_detach.py b/test_runner/regress/test_tenant_detach.py
index 847ae4b2b84f..82664cff94f7 100644
--- a/test_runner/regress/test_tenant_detach.py
+++ b/test_runner/regress/test_tenant_detach.py
@@ -685,12 +685,10 @@ def test_load_attach_negatives(
pageserver_http.tenant_ignore(tenant_id)
- env.pageserver.allowed_errors.append(
- ".*Cannot attach tenant .*?, local tenant directory already exists.*"
- )
+ env.pageserver.allowed_errors.append(".*tenant directory already exists.*")
with pytest.raises(
expected_exception=PageserverApiException,
- match=f"Cannot attach tenant {tenant_id}, local tenant directory already exists",
+ match="tenant directory already exists",
):
pageserver_http.tenant_attach(tenant_id)
@@ -734,12 +732,10 @@ def test_ignore_while_attaching(
pageserver_http.tenant_ignore(tenant_id)
# Cannot attach it due to some local files existing
- env.pageserver.allowed_errors.append(
- ".*Cannot attach tenant .*?, local tenant directory already exists.*"
- )
+ env.pageserver.allowed_errors.append(".*tenant directory already exists.*")
with pytest.raises(
expected_exception=PageserverApiException,
- match=f"Cannot attach tenant {tenant_id}, local tenant directory already exists",
+ match="tenant directory already exists",
):
pageserver_http.tenant_attach(tenant_id)