-
Notifications
You must be signed in to change notification settings - Fork 848
New issue
Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.
By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.
Already on GitHub? Sign in to your account
Generate ETag
s for InMemory
and LocalFileSystem
(#4879)
#4922
Changes from all commits
File filter
Filter by extension
Conversations
Jump to
Diff view
Diff view
There are no files selected for viewing
Original file line number | Diff line number | Diff line change |
---|---|---|
|
@@ -690,12 +690,28 @@ pub struct GetOptions { | |
/// Request will succeed if the `ObjectMeta::e_tag` matches | ||
/// otherwise returning [`Error::Precondition`] | ||
/// | ||
/// <https://datatracker.ietf.org/doc/html/rfc9110#name-if-match> | ||
/// See <https://datatracker.ietf.org/doc/html/rfc9110#name-if-match> | ||
/// | ||
/// Examples: | ||
/// | ||
/// ```text | ||
/// If-Match: "xyzzy" | ||
/// If-Match: "xyzzy", "r2d2xxxx", "c3piozzzz" | ||
/// If-Match: * | ||
/// ``` | ||
pub if_match: Option<String>, | ||
/// Request will succeed if the `ObjectMeta::e_tag` does not match | ||
/// otherwise returning [`Error::NotModified`] | ||
/// | ||
/// <https://datatracker.ietf.org/doc/html/rfc9110#section-13.1.2> | ||
/// See <https://datatracker.ietf.org/doc/html/rfc9110#section-13.1.2> | ||
/// | ||
/// Examples: | ||
/// | ||
/// ```text | ||
/// If-None-Match: "xyzzy" | ||
/// If-None-Match: "xyzzy", "r2d2xxxx", "c3piozzzz" | ||
/// If-None-Match: * | ||
/// ``` | ||
pub if_none_match: Option<String>, | ||
/// Request will succeed if the object has been modified since | ||
/// | ||
|
@@ -718,25 +734,41 @@ pub struct GetOptions { | |
|
||
impl GetOptions { | ||
/// Returns an error if the modification conditions on this request are not satisfied | ||
fn check_modified( | ||
&self, | ||
location: &Path, | ||
last_modified: DateTime<Utc>, | ||
) -> Result<()> { | ||
if let Some(date) = self.if_modified_since { | ||
if last_modified <= date { | ||
return Err(Error::NotModified { | ||
path: location.to_string(), | ||
source: format!("{} >= {}", date, last_modified).into(), | ||
/// | ||
/// <https://datatracker.ietf.org/doc/html/rfc7232#section-6> | ||
fn check_preconditions(&self, meta: &ObjectMeta) -> Result<()> { | ||
// The use of the invalid etag "*" means no ETag is equivalent to never matching | ||
let etag = meta.e_tag.as_deref().unwrap_or("*"); | ||
let last_modified = meta.last_modified; | ||
|
||
if let Some(m) = &self.if_match { | ||
if m != "*" && m.split(',').map(str::trim).all(|x| x != etag) { | ||
return Err(Error::Precondition { | ||
path: meta.location.to_string(), | ||
source: format!("{etag} does not match {m}").into(), | ||
}); | ||
} | ||
} | ||
|
||
if let Some(date) = self.if_unmodified_since { | ||
} else if let Some(date) = self.if_unmodified_since { | ||
if last_modified > date { | ||
return Err(Error::Precondition { | ||
path: location.to_string(), | ||
source: format!("{} < {}", date, last_modified).into(), | ||
path: meta.location.to_string(), | ||
source: format!("{date} < {last_modified}").into(), | ||
}); | ||
} | ||
} | ||
|
||
if let Some(m) = &self.if_none_match { | ||
if m == "*" || m.split(',').map(str::trim).any(|x| x == etag) { | ||
return Err(Error::NotModified { | ||
path: meta.location.to_string(), | ||
source: format!("{etag} matches {m}").into(), | ||
}); | ||
} | ||
} else if let Some(date) = self.if_modified_since { | ||
if last_modified <= date { | ||
return Err(Error::NotModified { | ||
path: meta.location.to_string(), | ||
source: format!("{date} >= {last_modified}").into(), | ||
}); | ||
} | ||
} | ||
|
@@ -940,6 +972,7 @@ mod test_util { | |
mod tests { | ||
use super::*; | ||
use crate::test_util::flatten_list_stream; | ||
use chrono::TimeZone; | ||
use rand::{thread_rng, Rng}; | ||
use tokio::io::AsyncWriteExt; | ||
|
||
|
@@ -1347,33 +1380,32 @@ mod tests { | |
Err(e) => panic!("{e}"), | ||
} | ||
|
||
if let Some(tag) = meta.e_tag { | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. I opted to make these tests "mandatory" as now all stores should support them |
||
let options = GetOptions { | ||
if_match: Some(tag.clone()), | ||
..GetOptions::default() | ||
}; | ||
storage.get_opts(&path, options).await.unwrap(); | ||
|
||
let options = GetOptions { | ||
if_match: Some("invalid".to_string()), | ||
..GetOptions::default() | ||
}; | ||
let err = storage.get_opts(&path, options).await.unwrap_err(); | ||
assert!(matches!(err, Error::Precondition { .. }), "{err}"); | ||
|
||
let options = GetOptions { | ||
if_none_match: Some(tag.clone()), | ||
..GetOptions::default() | ||
}; | ||
let err = storage.get_opts(&path, options).await.unwrap_err(); | ||
assert!(matches!(err, Error::NotModified { .. }), "{err}"); | ||
|
||
let options = GetOptions { | ||
if_none_match: Some("invalid".to_string()), | ||
..GetOptions::default() | ||
}; | ||
storage.get_opts(&path, options).await.unwrap(); | ||
} | ||
let tag = meta.e_tag.unwrap(); | ||
let options = GetOptions { | ||
if_match: Some(tag.clone()), | ||
..GetOptions::default() | ||
}; | ||
storage.get_opts(&path, options).await.unwrap(); | ||
|
||
let options = GetOptions { | ||
if_match: Some("invalid".to_string()), | ||
..GetOptions::default() | ||
}; | ||
let err = storage.get_opts(&path, options).await.unwrap_err(); | ||
assert!(matches!(err, Error::Precondition { .. }), "{err}"); | ||
|
||
let options = GetOptions { | ||
if_none_match: Some(tag.clone()), | ||
..GetOptions::default() | ||
}; | ||
let err = storage.get_opts(&path, options).await.unwrap_err(); | ||
assert!(matches!(err, Error::NotModified { .. }), "{err}"); | ||
|
||
let options = GetOptions { | ||
if_none_match: Some("invalid".to_string()), | ||
..GetOptions::default() | ||
}; | ||
storage.get_opts(&path, options).await.unwrap(); | ||
} | ||
|
||
/// Returns a chunk of length `chunk_length` | ||
|
@@ -1685,8 +1717,86 @@ mod tests { | |
assert!(stream.next().await.is_none()); | ||
} | ||
|
||
// Tests TODO: | ||
// GET nonexisting location (in_memory/file) | ||
// DELETE nonexisting location | ||
// PUT overwriting | ||
#[test] | ||
fn test_preconditions() { | ||
let mut meta = ObjectMeta { | ||
location: Path::from("test"), | ||
last_modified: Utc.timestamp_nanos(100), | ||
size: 100, | ||
e_tag: Some("123".to_string()), | ||
}; | ||
|
||
let mut options = GetOptions::default(); | ||
options.check_preconditions(&meta).unwrap(); | ||
|
||
options.if_modified_since = Some(Utc.timestamp_nanos(50)); | ||
options.check_preconditions(&meta).unwrap(); | ||
|
||
options.if_modified_since = Some(Utc.timestamp_nanos(100)); | ||
options.check_preconditions(&meta).unwrap_err(); | ||
|
||
options.if_modified_since = Some(Utc.timestamp_nanos(101)); | ||
options.check_preconditions(&meta).unwrap_err(); | ||
|
||
options = GetOptions::default(); | ||
|
||
options.if_unmodified_since = Some(Utc.timestamp_nanos(50)); | ||
options.check_preconditions(&meta).unwrap_err(); | ||
|
||
options.if_unmodified_since = Some(Utc.timestamp_nanos(100)); | ||
options.check_preconditions(&meta).unwrap(); | ||
|
||
options.if_unmodified_since = Some(Utc.timestamp_nanos(101)); | ||
options.check_preconditions(&meta).unwrap(); | ||
|
||
options = GetOptions::default(); | ||
|
||
options.if_match = Some("123".to_string()); | ||
options.check_preconditions(&meta).unwrap(); | ||
|
||
options.if_match = Some("123,354".to_string()); | ||
options.check_preconditions(&meta).unwrap(); | ||
|
||
options.if_match = Some("354, 123,".to_string()); | ||
options.check_preconditions(&meta).unwrap(); | ||
|
||
options.if_match = Some("354".to_string()); | ||
options.check_preconditions(&meta).unwrap_err(); | ||
|
||
options.if_match = Some("*".to_string()); | ||
options.check_preconditions(&meta).unwrap(); | ||
|
||
// If-Match takes precedence | ||
options.if_unmodified_since = Some(Utc.timestamp_nanos(200)); | ||
options.check_preconditions(&meta).unwrap(); | ||
|
||
options = GetOptions::default(); | ||
|
||
options.if_none_match = Some("123".to_string()); | ||
options.check_preconditions(&meta).unwrap_err(); | ||
|
||
options.if_none_match = Some("*".to_string()); | ||
options.check_preconditions(&meta).unwrap_err(); | ||
|
||
options.if_none_match = Some("1232".to_string()); | ||
options.check_preconditions(&meta).unwrap(); | ||
|
||
options.if_none_match = Some("23, 123".to_string()); | ||
options.check_preconditions(&meta).unwrap_err(); | ||
|
||
// If-None-Match takes precedence | ||
options.if_modified_since = Some(Utc.timestamp_nanos(10)); | ||
options.check_preconditions(&meta).unwrap_err(); | ||
|
||
// Check missing ETag | ||
meta.e_tag = None; | ||
options = GetOptions::default(); | ||
|
||
options.if_none_match = Some("*".to_string()); // Fails if any file exists | ||
options.check_preconditions(&meta).unwrap_err(); | ||
|
||
options = GetOptions::default(); | ||
options.if_match = Some("*".to_string()); // Passes if file exists | ||
options.check_preconditions(&meta).unwrap(); | ||
} | ||
} |
Original file line number | Diff line number | Diff line change |
---|---|---|
|
@@ -365,23 +365,12 @@ impl ObjectStore for LocalFileSystem { | |
} | ||
|
||
async fn get_opts(&self, location: &Path, options: GetOptions) -> Result<GetResult> { | ||
if options.if_match.is_some() || options.if_none_match.is_some() { | ||
return Err(super::Error::NotSupported { | ||
source: "ETags not supported by LocalFileSystem".to_string().into(), | ||
}); | ||
} | ||
|
||
let location = location.clone(); | ||
let path = self.config.path_to_filesystem(&location)?; | ||
maybe_spawn_blocking(move || { | ||
let (file, metadata) = open_file(&path)?; | ||
if options.if_unmodified_since.is_some() | ||
|| options.if_modified_since.is_some() | ||
{ | ||
options.check_modified(&location, last_modified(&metadata))?; | ||
} | ||
|
||
let meta = convert_metadata(metadata, location)?; | ||
options.check_preconditions(&meta)?; | ||
|
||
Ok(GetResult { | ||
payload: GetResultPayload::File(file, path), | ||
|
@@ -994,7 +983,7 @@ fn convert_entry(entry: DirEntry, location: Path) -> Result<ObjectMeta> { | |
convert_metadata(metadata, location) | ||
} | ||
|
||
fn last_modified(metadata: &std::fs::Metadata) -> DateTime<Utc> { | ||
fn last_modified(metadata: &Metadata) -> DateTime<Utc> { | ||
metadata | ||
.modified() | ||
.expect("Modified file time should be supported on this platform") | ||
|
@@ -1006,15 +995,35 @@ fn convert_metadata(metadata: Metadata, location: Path) -> Result<ObjectMeta> { | |
let size = usize::try_from(metadata.len()).context(FileSizeOverflowedUsizeSnafu { | ||
path: location.as_ref(), | ||
})?; | ||
let inode = get_inode(&metadata); | ||
let mtime = last_modified.timestamp_micros(); | ||
|
||
// Use an ETag scheme based on that used by many popular HTTP servers | ||
// <https://httpd.apache.org/docs/2.2/mod/core.html#fileetag> | ||
// <https://stackoverflow.com/questions/47512043/how-etags-are-generated-and-configured> | ||
let etag = format!("{inode:x}-{mtime:x}-{size:x}"); | ||
|
||
Ok(ObjectMeta { | ||
location, | ||
last_modified, | ||
size, | ||
e_tag: None, | ||
e_tag: Some(etag), | ||
}) | ||
} | ||
|
||
#[cfg(unix)] | ||
/// We include the inode when available to yield an ETag more resistant to collisions | ||
/// and as used by popular web servers such as [Apache](https://httpd.apache.org/docs/2.2/mod/core.html#fileetag) | ||
fn get_inode(metadata: &Metadata) -> u64 { | ||
std::os::unix::fs::MetadataExt::ino(metadata) | ||
} | ||
|
||
#[cfg(not(unix))] | ||
/// On platforms where an inode isn't available, fallback to just relying on size and mtime | ||
fn get_inode(metadata: &Metadata) -> u64 { | ||
0 | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. The inode isn't strictly necessary, it is still a pretty strong ETag without it, but on Unix systems we might as well make use of it. A happy consequence of the way most operations create a new file and then link it into place, is that the inode is actually a pretty good change detector There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. I don't understand this comment. If the inode isn't needed, then what value does including it add? Either modification time and size (along with path) uniquely determines the result, or it isn't... Perhaps the inode can detect file modifications that do not change the modification timestamp (some sort of symlink shenanigans, perhaps 🤔 ) There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. In any event I think this "inode is not necessary" context should be encoded as comments in the source code There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. I see you have clarified the comment that inode is used to be more resistent to collisions -- sounds good to me |
||
} | ||
|
||
/// Convert walkdir results and converts not-found errors into `None`. | ||
/// Convert broken symlinks to `None`. | ||
fn convert_walkdir_result( | ||
|
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
Can you please add an explicit mention of
*
handling as well as support for the,
handling to the docstrings (so someone can see it on docs.rs)?