Skip to content

Commit

Permalink
Allow configuring hive directory listing cache with table prefixes
Browse files Browse the repository at this point in the history
  • Loading branch information
raunaqmorarka authored and electrum committed Oct 11, 2019
1 parent 9337bd8 commit 01ca48b
Show file tree
Hide file tree
Showing 2 changed files with 28 additions and 9 deletions.
12 changes: 12 additions & 0 deletions presto-docs/src/main/sphinx/connector/hive.rst
Original file line number Diff line number Diff line change
Expand Up @@ -190,6 +190,18 @@ Property Name Description

``hive.s3select-pushdown.max-connections`` Maximum number of simultaneously open connections to S3 for 500
:ref:`s3selectpushdown`.

``hive.file-status-cache-tables`` Cache directory listing for specified tables.
Examples: ``schema.table1,schema.table2`` to cache directory
listing only for ``table1`` and ``table2``.
``schema1.*,schema2.*`` to cache directory listing for all
tables in the schemas ``schema1`` and ``schema2``.
``*`` to cache directory listing for all tables.

``hive.file-status-cache-size`` Maximum no. of file status entries cached for a path. 10,00,000

``hive.file-status-cache-expire-time`` Duration of time after a directory listing is cached that it ``1m``
should be automatically removed from cache.
================================================== ============================================================ ============

Hive Thrift Metastore Configuration Properties
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -19,7 +19,7 @@
import com.google.common.collect.ImmutableList;
import io.airlift.units.Duration;
import io.prestosql.plugin.hive.metastore.Table;
import io.prestosql.spi.connector.SchemaTableName;
import io.prestosql.spi.connector.SchemaTablePrefix;
import org.apache.hadoop.fs.FileSystem;
import org.apache.hadoop.fs.LocatedFileStatus;
import org.apache.hadoop.fs.Path;
Expand All @@ -32,17 +32,16 @@
import java.util.ArrayList;
import java.util.Iterator;
import java.util.List;
import java.util.Set;
import java.util.concurrent.TimeUnit;
import java.util.stream.Collectors;

import static com.google.common.base.Preconditions.checkArgument;
import static com.google.common.collect.ImmutableList.toImmutableList;

public class CachingDirectoryLister
implements DirectoryLister
{
private final Cache<Path, List<LocatedFileStatus>> cache;
private final Set<SchemaTableName> tableNames;
private final List<SchemaTablePrefix> tablePrefixes;

@Inject
public CachingDirectoryLister(HiveConfig hiveClientConfig)
Expand All @@ -58,16 +57,24 @@ public CachingDirectoryLister(Duration expireAfterWrite, long maxSize, List<Stri
.expireAfterWrite(expireAfterWrite.toMillis(), TimeUnit.MILLISECONDS)
.recordStats()
.build();
this.tableNames = tables.stream()
this.tablePrefixes = tables.stream()
.map(CachingDirectoryLister::parseTableName)
.collect(Collectors.toSet());
.collect(toImmutableList());
}

private static SchemaTableName parseTableName(String tableName)
private static SchemaTablePrefix parseTableName(String tableName)
{
if (tableName.equals("*")) {
return new SchemaTablePrefix();
}
String[] parts = tableName.split("\\.");
checkArgument(parts.length == 2, "Invalid schemaTableName: %s", tableName);
return new SchemaTableName(parts[0], parts[1]);
String schema = parts[0];
String table = parts[1];
if (table.equals("*")) {
return new SchemaTablePrefix(schema);
}
return new SchemaTablePrefix(schema, table);
}

@Override
Expand All @@ -80,7 +87,7 @@ public RemoteIterator<LocatedFileStatus> list(FileSystem fs, Table table, Path p
}
RemoteIterator<LocatedFileStatus> iterator = fs.listLocatedStatus(path);

if (!tableNames.contains(table.getSchemaTableName())) {
if (tablePrefixes.stream().noneMatch(prefix -> prefix.matches(table.getSchemaTableName()))) {
return iterator;
}
return cachingRemoteIterator(iterator, path);
Expand Down

0 comments on commit 01ca48b

Please sign in to comment.