Skip to content

Commit

Permalink
HADOOP-19278. S3A: Remove option to delete directory markers (apache#…
Browse files Browse the repository at this point in the history
…7052)

S3A no longer supports the ability to disable deletion of parent directory
markers on file or directory creation.

The option "fs.s3a.directory.marker.retention" is no longer supported.

This is incompatible with all hadoop versions before 3.2.2 and with hadoop 3.3.0
when applications using the s3a connector attempt to write to the same repository.
  • Loading branch information
steveloughran authored Jan 8, 2025
1 parent a487209 commit d44ac28
Show file tree
Hide file tree
Showing 66 changed files with 290 additions and 2,875 deletions.
53 changes: 0 additions & 53 deletions hadoop-tools/hadoop-aws/pom.xml
Original file line number Diff line number Diff line change
Expand Up @@ -48,10 +48,6 @@
<!-- Set a longer timeout for integration test (in milliseconds) -->
<test.integration.timeout>200000</test.integration.timeout>

<!-- should directory marker retention be audited? -->
<fs.s3a.directory.marker.audit>false</fs.s3a.directory.marker.audit>
<!-- marker retention policy -->
<fs.s3a.directory.marker.retention></fs.s3a.directory.marker.retention>

<!-- Is prefetch enabled? -->
<fs.s3a.prefetch.enabled>unset</fs.s3a.prefetch.enabled>
Expand Down Expand Up @@ -126,9 +122,6 @@
<fs.s3a.scale.test.huge.filesize>${fs.s3a.scale.test.huge.filesize}</fs.s3a.scale.test.huge.filesize>
<fs.s3a.scale.test.huge.huge.partitionsize>${fs.s3a.scale.test.huge.partitionsize}</fs.s3a.scale.test.huge.huge.partitionsize>
<fs.s3a.scale.test.timeout>${fs.s3a.scale.test.timeout}</fs.s3a.scale.test.timeout>
<!-- Markers-->
<fs.s3a.directory.marker.retention>${fs.s3a.directory.marker.retention}</fs.s3a.directory.marker.retention>
<fs.s3a.directory.marker.audit>${fs.s3a.directory.marker.audit}</fs.s3a.directory.marker.audit>
<!-- Prefetch -->
<fs.s3a.prefetch.enabled>${fs.s3a.prefetch.enabled}</fs.s3a.prefetch.enabled>
</systemPropertyVariables>
Expand Down Expand Up @@ -167,8 +160,6 @@
<fs.s3a.scale.test.huge.filesize>${fs.s3a.scale.test.huge.filesize}</fs.s3a.scale.test.huge.filesize>
<fs.s3a.scale.test.huge.huge.partitionsize>${fs.s3a.scale.test.huge.partitionsize}</fs.s3a.scale.test.huge.huge.partitionsize>
<fs.s3a.scale.test.timeout>${fs.s3a.scale.test.timeout}</fs.s3a.scale.test.timeout>
<fs.s3a.directory.marker.retention>${fs.s3a.directory.marker.retention}</fs.s3a.directory.marker.retention>

<test.default.timeout>${test.integration.timeout}</test.default.timeout>
<!-- Prefetch -->
<fs.s3a.prefetch.enabled>${fs.s3a.prefetch.enabled}</fs.s3a.prefetch.enabled>
Expand Down Expand Up @@ -221,9 +212,6 @@
<fs.s3a.scale.test.huge.filesize>${fs.s3a.scale.test.huge.filesize}</fs.s3a.scale.test.huge.filesize>
<fs.s3a.scale.test.huge.huge.partitionsize>${fs.s3a.scale.test.huge.partitionsize}</fs.s3a.scale.test.huge.huge.partitionsize>
<fs.s3a.scale.test.timeout>${fs.s3a.scale.test.timeout}</fs.s3a.scale.test.timeout>
<!-- Markers-->
<fs.s3a.directory.marker.retention>${fs.s3a.directory.marker.retention}</fs.s3a.directory.marker.retention>
<fs.s3a.directory.marker.audit>${fs.s3a.directory.marker.audit}</fs.s3a.directory.marker.audit>
<!-- Prefetch -->
<fs.s3a.prefetch.enabled>${fs.s3a.prefetch.enabled}</fs.s3a.prefetch.enabled>
<!-- are root tests enabled. Set to false when running parallel jobs on same bucket -->
Expand Down Expand Up @@ -285,9 +273,6 @@
<fs.s3a.scale.test.enabled>${fs.s3a.scale.test.enabled}</fs.s3a.scale.test.enabled>
<fs.s3a.scale.test.huge.filesize>${fs.s3a.scale.test.huge.filesize}</fs.s3a.scale.test.huge.filesize>
<fs.s3a.scale.test.timeout>${fs.s3a.scale.test.timeout}</fs.s3a.scale.test.timeout>
<!-- Markers-->
<fs.s3a.directory.marker.retention>${fs.s3a.directory.marker.retention}</fs.s3a.directory.marker.retention>
<fs.s3a.directory.marker.audit>${fs.s3a.directory.marker.audit}</fs.s3a.directory.marker.audit>
<!-- Prefetch -->
<fs.s3a.prefetch.enabled>${fs.s3a.prefetch.enabled}</fs.s3a.prefetch.enabled>
<test.unique.fork.id>job-${job.id}</test.unique.fork.id>
Expand All @@ -314,44 +299,6 @@
</properties>
</profile>

<!-- Directory marker retention options, all from the -Dmarkers value-->
<profile>
<id>keep-markers</id>
<activation>
<property>
<name>markers</name>
<value>keep</value>
</property>
</activation>
<properties >
<fs.s3a.directory.marker.retention>keep</fs.s3a.directory.marker.retention>
</properties>
</profile>
<profile>
<id>delete-markers</id>
<activation>
<property>
<name>markers</name>
<value>delete</value>
</property>
</activation>
<properties >
<fs.s3a.directory.marker.retention>delete</fs.s3a.directory.marker.retention>
</properties>
</profile>
<profile>
<id>auth-markers</id>
<activation>
<property>
<name>markers</name>
<value>authoritative</value>
</property>
</activation>
<properties >
<fs.s3a.directory.marker.retention>authoritative</fs.s3a.directory.marker.retention>
</properties>
</profile>

<!-- Turn on prefetching-->
<profile>
<id>prefetch</id>
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -848,6 +848,7 @@ private Constants() {
"fs.s3a." + Constants.AWS_SERVICE_IDENTIFIER_STS.toLowerCase()
+ ".signing-algorithm";

@Deprecated
public static final String S3N_FOLDER_SUFFIX = "_$folder$";
public static final String FS_S3A_BLOCK_SIZE = "fs.s3a.block.size";
public static final String FS_S3A = "s3a";
Expand All @@ -868,10 +869,13 @@ private Constants() {
/**
* Paths considered "authoritative".
* When S3guard was supported, this skipped checks to s3 on directory listings.
* It is also use to optionally disable marker retentation purely on these
* paths -a feature which is still retained/available.
* It was also possilbe to use to optionally disable marker retentation purely on these
* paths -a feature which is no longer available.
* As no feature uses this any more, it is declared as deprecated.
* */
@Deprecated
public static final String AUTHORITATIVE_PATH = "fs.s3a.authoritative.path";
@Deprecated
public static final String[] DEFAULT_AUTHORITATIVE_PATH = {};

/**
Expand Down Expand Up @@ -1339,45 +1343,52 @@ private Constants() {

/**
* Policy for directory markers.
* This is a new feature of HADOOP-13230 which addresses
* some scale, performance and permissions issues -but
* at the risk of backwards compatibility.
* No longer supported as "keep" is the sole policy.
*/
@Deprecated
public static final String DIRECTORY_MARKER_POLICY =
"fs.s3a.directory.marker.retention";

/**
* Delete directory markers. This is the backwards compatible option.
* Delete directory markers.
* No longer supported as "keep" is the sole policy.
* Value: {@value}.
*/
@Deprecated
public static final String DIRECTORY_MARKER_POLICY_DELETE =
"delete";

/**
* Retain directory markers.
* No longer needed, so marked as deprecated to flag usages.
* Value: {@value}.
*/
@Deprecated
public static final String DIRECTORY_MARKER_POLICY_KEEP =
"keep";

/**
* Retain directory markers in authoritative directory trees only.
* No longer required as "keep" is the sole policy.
* Value: {@value}.
*/
@Deprecated
public static final String DIRECTORY_MARKER_POLICY_AUTHORITATIVE =
"authoritative";

/**
* Default retention policy: {@value}.
* No longer required as "keep" is the sole policy.
*/
@Deprecated
public static final String DEFAULT_DIRECTORY_MARKER_POLICY =
DIRECTORY_MARKER_POLICY_KEEP;


/**
* {@code PathCapabilities} probe to verify that an S3A Filesystem
* has the changes needed to safely work with buckets where
* directoy markers have not been deleted.
* directory markers have not been deleted.
* Value: {@value}.
*/
public static final String STORE_CAPABILITY_DIRECTORY_MARKER_AWARE
Expand All @@ -1394,23 +1405,28 @@ private Constants() {
/**
* {@code PathCapabilities} probe to indicate that the filesystem
* deletes directory markers.
* Always false.
* Value: {@value}.
*/
@Deprecated
public static final String STORE_CAPABILITY_DIRECTORY_MARKER_POLICY_DELETE
= "fs.s3a.capability.directory.marker.policy.delete";

/**
* {@code PathCapabilities} probe to indicate that the filesystem
* keeps directory markers in authoritative paths only.
* This probe always returns false.
* Value: {@value}.
*/
@Deprecated
public static final String
STORE_CAPABILITY_DIRECTORY_MARKER_POLICY_AUTHORITATIVE =
"fs.s3a.capability.directory.marker.policy.authoritative";

/**
* {@code PathCapabilities} probe to indicate that a path
* keeps directory markers.
* This probe always returns true.
* Value: {@value}.
*/
public static final String STORE_CAPABILITY_DIRECTORY_MARKER_ACTION_KEEP
Expand All @@ -1419,6 +1435,7 @@ private Constants() {
/**
* {@code PathCapabilities} probe to indicate that a path
* deletes directory markers.
* This probe always returns false.
* Value: {@value}.
*/
public static final String STORE_CAPABILITY_DIRECTORY_MARKER_ACTION_DELETE
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -50,9 +50,7 @@
import java.util.concurrent.CompletableFuture;
import java.util.StringJoiner;

import static org.apache.hadoop.fs.s3a.Constants.S3N_FOLDER_SUFFIX;
import static org.apache.hadoop.fs.s3a.Invoker.onceInTheFuture;
import static org.apache.hadoop.fs.s3a.S3AUtils.ACCEPT_ALL;
import static org.apache.hadoop.fs.s3a.S3AUtils.createFileStatus;
import static org.apache.hadoop.fs.s3a.S3AUtils.maybeAddTrailingSlash;
import static org.apache.hadoop.fs.s3a.S3AUtils.objectRepresentsDirectory;
Expand All @@ -76,8 +74,8 @@ public class Listing extends AbstractStoreOperation {

private static final Logger LOG = S3AFileSystem.LOG;

static final FileStatusAcceptor ACCEPT_ALL_BUT_S3N =
new AcceptAllButS3nDirs();
static final FileStatusAcceptor ACCEPT_ALL_OBJECTS =
new AcceptAllObjects();

private final ListingOperationCallbacks listingOperationCallbacks;

Expand Down Expand Up @@ -116,7 +114,7 @@ public static RemoteIterator<S3AFileStatus> toProvidedFileStatusIterator(
S3AFileStatus[] fileStatuses) {
return filteringRemoteIterator(
remoteIteratorFromArray(fileStatuses),
Listing.ACCEPT_ALL_BUT_S3N::accept);
Listing.ACCEPT_ALL_OBJECTS::accept);
}

/**
Expand All @@ -132,7 +130,7 @@ public static RemoteIterator<S3AFileStatus> toProvidedFileStatusIterator(
* @throws IOException IO Problems
*/
@Retries.RetryRaw
public FileStatusListingIterator createFileStatusListingIterator(
public RemoteIterator<S3AFileStatus> createFileStatusListingIterator(
Path listPath,
S3ListRequest request,
PathFilter filter,
Expand Down Expand Up @@ -212,7 +210,7 @@ public RemoteIterator<S3ALocatedFileStatus> getListFilesAssumingDir(
.createListObjectsRequest(key,
delimiter,
span),
ACCEPT_ALL,
S3AUtils.ACCEPT_ALL,
acceptor,
span));
}
Expand All @@ -235,7 +233,7 @@ public RemoteIterator<S3ALocatedFileStatus> getLocatedFileStatusIteratorForDir(
listingOperationCallbacks
.createListObjectsRequest(key, "/", span),
filter,
new AcceptAllButSelfAndS3nDirs(dir),
new AcceptAllButSelf(dir),
span));
}

Expand Down Expand Up @@ -263,8 +261,8 @@ public RemoteIterator<S3ALocatedFileStatus> getLocatedFileStatusIteratorForDir(
return createFileStatusListingIterator(
path,
request,
ACCEPT_ALL,
new AcceptAllButSelfAndS3nDirs(path),
S3AUtils.ACCEPT_ALL,
new AcceptAllButSelf(path),
span);
}

Expand Down Expand Up @@ -462,7 +460,7 @@ private boolean buildNextStatusBatch(S3ListResult objects) throws IOException {
if (LOG.isDebugEnabled()) {
LOG.debug("{}: {}", keyPath, stringify(s3Object));
}
// Skip over keys that are ourselves and old S3N _$folder$ files
// Skip over keys that are ourselves
if (acceptor.accept(keyPath, s3Object) && filter.accept(keyPath)) {
S3AFileStatus status = createFileStatus(keyPath, s3Object,
blockSize, userName, s3Object.eTag(),
Expand Down Expand Up @@ -722,13 +720,12 @@ public void close() {
}

/**
* Accept all entries except the base path and those which map to S3N
* pseudo directory markers.
* Accept all entries except the base path.
*/
static class AcceptFilesOnly implements FileStatusAcceptor {
private final Path qualifiedPath;

public AcceptFilesOnly(Path qualifiedPath) {
AcceptFilesOnly(Path qualifiedPath) {
this.qualifiedPath = qualifiedPath;
}

Expand All @@ -743,7 +740,6 @@ public AcceptFilesOnly(Path qualifiedPath) {
@Override
public boolean accept(Path keyPath, S3Object s3Object) {
return !keyPath.equals(qualifiedPath)
&& !s3Object.key().endsWith(S3N_FOLDER_SUFFIX)
&& !objectRepresentsDirectory(s3Object.key());
}

Expand All @@ -765,29 +761,28 @@ public boolean accept(FileStatus status) {
}

/**
* Accept all entries except those which map to S3N pseudo directory markers.
* Accept all entries.
*/
static class AcceptAllButS3nDirs implements FileStatusAcceptor {
static class AcceptAllObjects implements FileStatusAcceptor {

public boolean accept(Path keyPath, S3Object s3Object) {
return !s3Object.key().endsWith(S3N_FOLDER_SUFFIX);
return true;
}

public boolean accept(Path keyPath, String prefix) {
return !keyPath.toString().endsWith(S3N_FOLDER_SUFFIX);
return true;
}

public boolean accept(FileStatus status) {
return !status.getPath().toString().endsWith(S3N_FOLDER_SUFFIX);
return true;
}

}

/**
* Accept all entries except the base path and those which map to S3N
* pseudo directory markers.
* Accept all entries except the base path.
*/
public static class AcceptAllButSelfAndS3nDirs implements FileStatusAcceptor {
public static class AcceptAllButSelf implements FileStatusAcceptor {

/** Base path. */
private final Path qualifiedPath;
Expand All @@ -796,22 +791,20 @@ public static class AcceptAllButSelfAndS3nDirs implements FileStatusAcceptor {
* Constructor.
* @param qualifiedPath an already-qualified path.
*/
public AcceptAllButSelfAndS3nDirs(Path qualifiedPath) {
public AcceptAllButSelf(Path qualifiedPath) {
this.qualifiedPath = qualifiedPath;
}

/**
* Reject a s3Object entry if the key path is the qualified Path, or
* it ends with {@code "_$folder$"}.
* Reject a s3Object entry if the key path is the qualified Path.
* @param keyPath key path of the entry
* @param s3Object s3Object entry
* @return true if the entry is accepted (i.e. that a status entry
* should be generated.)
*/
@Override
public boolean accept(Path keyPath, S3Object s3Object) {
return !keyPath.equals(qualifiedPath) &&
!s3Object.key().endsWith(S3N_FOLDER_SUFFIX);
return !keyPath.equals(qualifiedPath);
}

/**
Expand Down
Loading

0 comments on commit d44ac28

Please sign in to comment.