Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Implement mkdir #202

Merged
merged 5 commits into from
Apr 27, 2023
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
6 changes: 6 additions & 0 deletions doc/SEMANTICS.md
Original file line number Diff line number Diff line change
Expand Up @@ -78,6 +78,12 @@ Space allocation (`fallocate`, `posix_fallocate`) are not supported.

Basic read-only directory operations (`opendir`, `readdir`, `closedir`) are supported.

Creating directories (`mkdir`) is not currently supported, but will be [in the future](https://github.com/awslabs/mountpoint-s3/issues/77):

* `mkdir` will create a new empty directory in the file system, but not affect the S3 bucket.
* Note that this is different from e.g. the S3 Console, which creates "directory markers" (i.e. zero-byte objects with `<directory-name>/` key) in the bucket.
* If a file is created under the new (or a nested) directory and committed to S3, Mountpoint for Amazon S3 will revert to using the default mapping of S3 object keys. This implies that the directory will be visible as long as there are keys which contain it as a prefix.

Renaming files and directories (`rename`, `renameat`) is not currently supported.

File deletion (`unlink`) is not currently supported, but will be [in the future](https://github.com/awslabs/mountpoint-s3/issues/78).
Expand Down
11 changes: 11 additions & 0 deletions mountpoint-s3-client/src/mock_client.rs
Original file line number Diff line number Diff line change
Expand Up @@ -74,6 +74,17 @@ impl MockClient {
pub fn remove_object(&self, key: &str) {
self.objects.write().unwrap().remove(key);
}

/// Returns `true` if this mock client's bucket contains the specified key
pub fn contains_key(&self, key: &str) -> bool {
self.objects.read().unwrap().contains_key(key)
}

/// Returns `true` if this mock client's bucket contains the specified common prefix
pub fn contains_prefix(&self, prefix: &str) -> bool {
let prefix = format!("{prefix}/");
self.objects.read().unwrap().keys().any(|k| k.starts_with(&prefix))
}
}

pub struct MockObject {
Expand Down
25 changes: 24 additions & 1 deletion mountpoint-s3/src/fs.rs
Original file line number Diff line number Diff line change
Expand Up @@ -373,7 +373,30 @@ where
return Err(libc::EINVAL);
}

let lookup = self.superblock.create(&self.client, parent, name).await?;
let lookup = self
.superblock
.create(&self.client, parent, name, InodeKind::File)
.await?;
let attr = self.make_attr(&lookup);

Ok(Entry {
ttl: self.config.stat_ttl,
attr,
generation: 0,
})
}

pub async fn mkdir(
&self,
parent: InodeNo,
name: &OsStr,
_mode: libc::mode_t,
_umask: u32,
) -> Result<Entry, libc::c_int> {
let lookup = self
.superblock
.create(&self.client, parent, name, InodeKind::Directory)
.await?;
let attr = self.make_attr(&lookup);

Ok(Entry {
Expand Down
11 changes: 11 additions & 0 deletions mountpoint-s3/src/fuse.rs
Original file line number Diff line number Diff line change
Expand Up @@ -223,6 +223,17 @@ where
}
}

#[instrument(level="debug", skip_all, fields(req=_req.unique(), parent=parent, name=?name))]
fn mkdir(&self, _req: &Request<'_>, parent: u64, name: &OsStr, mode: u32, umask: u32, reply: ReplyEntry) {
// mode_t is u32 on Linux but u16 on macOS, so cast it here
let mode = mode as libc::mode_t;

match block_on(self.fs.mkdir(parent, name, mode, umask).in_current_span()) {
Ok(entry) => reply.entry(&entry.ttl, &entry.attr, entry.generation),
Err(e) => reply.error(e),
}
}

#[instrument(level="debug", skip_all, fields(req=_req.unique(), ino=ino, fh=fh, offset=offset, length=data.len()))]
fn write(
&self,
Expand Down
199 changes: 167 additions & 32 deletions mountpoint-s3/src/inode.rs
Original file line number Diff line number Diff line change
Expand Up @@ -80,10 +80,7 @@ impl Superblock {
sync: RwLock::new(InodeState {
stat: InodeStat::for_directory(mount_time, Instant::now()), // TODO expiry
write_status: WriteStatus::Remote,
kind_data: InodeKindData::Directory {
children: Default::default(),
writing_children: Default::default(),
},
kind_data: InodeKindData::default_for(InodeKind::Directory),
}),
};
let root = Inode { inner: Arc::new(root) };
Expand Down Expand Up @@ -312,12 +309,13 @@ impl Superblock {
})
}

/// Create a new regular file inode ready to be opened in write-only mode
/// Create a new regular file or directory inode ready to be opened in write-only mode
pub async fn create<OC: ObjectClient>(
&self,
client: &OC,
dir: InodeNo,
name: &OsStr,
kind: InodeKind,
) -> Result<LookedUp, InodeError> {
trace!(parent=?dir, ?name, "create");

Expand Down Expand Up @@ -347,15 +345,15 @@ impl Superblock {
}

let expiry = Instant::now(); // TODO local inode stats never expire?
// Objects don't have an ETag until they are uploaded to S3
let stat = InodeStat::for_file(0, OffsetDateTime::now_utc(), expiry, None);
let kind = InodeKind::File;
let stat = match kind {
InodeKind::File => InodeStat::for_file(0, OffsetDateTime::now_utc(), expiry, None), // Objects don't have an ETag until they are uploaded to S3
InodeKind::Directory => InodeStat::for_directory(self.inner.mount_time, expiry),
};
let state = InodeState {
stat: stat.clone(),
kind_data: InodeKindData::File {},
kind_data: InodeKindData::default_for(kind),
write_status: WriteStatus::LocalUnopened,
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Not sure what status we should be using, but we might want to update this to WriteStatus::Remote when the first file is created.

Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Yeah, this transition is the tricky one to think through -- a local directory becomes remote when any child (including recursive children) finishes writing. So the finish_writing path on a file probably needs to walk up the directory tree until it finds an already remote directory?

Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Transition from local to remote implemented in finish_writing of nested files.

};

let inode = self
.inner
.create_inode_locked(&parent_inode, &mut parent_state, name, kind, state, true)?;
Expand Down Expand Up @@ -427,17 +425,9 @@ impl SuperblockInner {
}
}
UpdateStatus::RemoteKey(RemoteLookup { stat, kind }) => {
let kind_data = match kind {
InodeKind::File => InodeKindData::File {},
InodeKind::Directory => InodeKindData::Directory {
children: Default::default(),
writing_children: Default::default(),
},
};

let state = InodeState {
stat: stat.clone(),
kind_data,
kind_data: InodeKindData::default_for(kind),
write_status: WriteStatus::Remote,
};
self.create_inode_locked(&parent, &mut parent_state, name, kind, state, false)
Expand Down Expand Up @@ -622,26 +612,54 @@ impl WriteHandle {
}
}

/// Update status of the inode and its parent
/// Update status of the inode and of containing "local" directories.
pub fn finish_writing(self, object_size: usize) -> Result<(), InodeError> {
let inode = self.inner.get(self.ino)?;
let parent = self.inner.get(self.parent_ino)?;

// acquire a lock on the parent first
let mut parent_state = parent.inner.sync.write().unwrap();
// Collect ancestor inodes that may need updating,
// from parent to first remote ancestor.
let ancestors = {
let mut ancestors = Vec::new();
let mut ancestor_ino = self.parent_ino;
let mut visited = HashSet::new();
loop {
assert!(visited.insert(ancestor_ino), "cycle detected in inode ancestors");
let ancestor = self.inner.get(ancestor_ino)?;
ancestors.push(ancestor.clone());
if ancestor.ino() == ROOT_INODE_NO
|| ancestor.inner.sync.read().unwrap().write_status == WriteStatus::Remote
{
break;
}
ancestor_ino = ancestor.parent();
Comment on lines +627 to +634
Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Might be worth a little bit of infinite loop paranoia here: track a set of seen ancestors and assert! that we never visit an ancestor we've already seen. That should be impossible today, but can imagine it changing if we ever did symlinks, for example.

}
ancestors
};

// Acquire locks on ancestors in descending order to avoid deadlocks.
let mut ancestors_states: Vec<_> = ancestors
.iter()
.rev()
.map(|inode| inode.inner.sync.write().unwrap())
.collect();

let mut state = inode.inner.sync.write().unwrap();
match state.write_status {
WriteStatus::LocalOpen => {
state.write_status = WriteStatus::Remote;
state.stat.size = object_size;
match &mut parent_state.kind_data {
InodeKindData::File { .. } => unreachable!("we know parent is a directory"),
InodeKindData::Directory {
children: _,
writing_children,
} => {
writing_children.remove(&inode.ino());

// Walk up the ancestors from parent to first remote ancestor to transition
// the inode and all "local" containing directories to "remote".
let children_inos = std::iter::once(self.ino).chain(ancestors.iter().map(|ancestor| ancestor.ino()));
for (ancestor_state, child_ino) in ancestors_states.iter_mut().rev().zip(children_inos) {
match &mut ancestor_state.kind_data {
InodeKindData::File { .. } => unreachable!("we know the ancestor is a directory"),
InodeKindData::Directory { writing_children, .. } => {
writing_children.remove(&child_ino);
}
}
ancestor_state.write_status = WriteStatus::Remote;
}
// TODO force the file to be revalidated the next time it's `stat`ed?
Ok(())
Expand Down Expand Up @@ -943,6 +961,18 @@ enum InodeKindData {
},
}

impl InodeKindData {
fn default_for(kind: InodeKind) -> Self {
match kind {
InodeKind::File => Self::File {},
InodeKind::Directory => Self::Directory {
children: Default::default(),
writing_children: Default::default(),
},
}
}
}

#[derive(Debug, Clone)]
pub struct InodeStat {
#[allow(unused)] // TODO revalidate
Expand Down Expand Up @@ -1246,7 +1276,12 @@ mod tests {
for i in 0..5 {
let filename = format!("file{i}.txt");
let new_inode = superblock
.create(&client, FUSE_ROOT_INODE, OsStr::from_bytes(filename.as_bytes()))
.create(
&client,
FUSE_ROOT_INODE,
OsStr::from_bytes(filename.as_bytes()),
InodeKind::File,
)
.await
.unwrap();
superblock
Expand Down Expand Up @@ -1297,7 +1332,12 @@ mod tests {
for i in 0..5 {
let filename = format!("newfile{i}.txt");
let new_inode = superblock
.create(&client, FUSE_ROOT_INODE, OsStr::from_bytes(filename.as_bytes()))
.create(
&client,
FUSE_ROOT_INODE,
OsStr::from_bytes(filename.as_bytes()),
InodeKind::File,
)
.await
.unwrap();
superblock
Expand All @@ -1318,6 +1358,101 @@ mod tests {
}
}

#[test_case(""; "unprefixed")]
#[test_case("test_prefix/"; "prefixed")]
#[tokio::test]
async fn test_create_local_dir(prefix: &str) {
let client_config = MockClientConfig {
bucket: "test_bucket".to_string(),
part_size: 1024 * 1024,
};
let client = Arc::new(MockClient::new(client_config));
let prefix = Prefix::new(prefix).expect("valid prefix");
let superblock = Superblock::new("test_bucket", &prefix);

// Create local directory
let dirname = "local_dir";
superblock
.create(&client, FUSE_ROOT_INODE, dirname.as_ref(), InodeKind::Directory)
.await
.unwrap();

let lookedup = superblock
.lookup(&client, FUSE_ROOT_INODE, dirname.as_ref())
.await
.expect("lookup should succeed on local dirs");
assert_eq!(
lookedup.inode.inner.sync.read().unwrap().write_status,
WriteStatus::LocalUnopened
);

let dir_handle = superblock.readdir(&client, FUSE_ROOT_INODE, 2).await.unwrap();
let entries = dir_handle.collect(&client).await.unwrap();
assert_eq!(
entries.iter().map(|entry| entry.inode.name()).collect::<Vec<_>>(),
vec![dirname]
);

// Check that local directories are not present in the client
let prefix = format!("{prefix}{dirname}");
assert!(!client.contains_prefix(&prefix));
}

#[tokio::test]
async fn test_finish_writing_convert_parent_local_dirs_to_remote() {
let client_config = MockClientConfig {
bucket: "test_bucket".to_string(),
part_size: 1024 * 1024,
};
let client = Arc::new(MockClient::new(client_config));
let superblock = Superblock::new("test_bucket", &Default::default());

let nested_dirs = (0..5).map(|i| format!("level{i}")).collect::<Vec<_>>();
let leaf_dir_ino = {
let mut parent_dir_ino = FUSE_ROOT_INODE;
for dirname in &nested_dirs {
let dir_lookedup = superblock
.create(&client, parent_dir_ino, dirname.as_ref(), InodeKind::Directory)
.await
.unwrap();

assert_eq!(
dir_lookedup.inode.inner.sync.read().unwrap().write_status,
WriteStatus::LocalUnopened
);

parent_dir_ino = dir_lookedup.inode.ino();
}
parent_dir_ino
};

// Create object under leaf dir
let filename = "newfile.txt";
let new_inode = superblock
.create(
&client,
leaf_dir_ino,
OsStr::from_bytes(filename.as_bytes()),
InodeKind::File,
)
.await
.unwrap();

let writehandle = superblock
.write(&client, new_inode.inode.ino(), leaf_dir_ino)
.await
.unwrap();

// Invoke [finish_writing], without actually adding the
// object to the client
writehandle.finish_writing(0).unwrap();

// All nested dirs disappear
let dirname = nested_dirs.first().unwrap();
let lookedup = superblock.lookup(&client, FUSE_ROOT_INODE, dirname.as_ref()).await;
assert!(matches!(lookedup, Err(InodeError::FileDoesNotExist)));
}

#[tokio::test]
async fn test_inode_reuse() {
let client_config = MockClientConfig {
Expand Down
Loading