Skip to content

Commit

Permalink
Merge pull request #7 from jdhok/develop
Browse files Browse the repository at this point in the history
Preserve user specified join type
  • Loading branch information
Jaideep Dhok committed May 5, 2014
2 parents e6a8d12 + 25d778d commit 02a3bb8
Show file tree
Hide file tree
Showing 3 changed files with 101 additions and 48 deletions.
Original file line number Diff line number Diff line change
Expand Up @@ -1009,10 +1009,11 @@ private void appendWhereClause(CubeDimensionTable dim,
LOG.debug("Skipping already added where clause for " + dim);
return;
}
String whereClause = dimStorageTableToWhereClause.get(
String storageCondition = dimStorageTableToWhereClause.get(
storageTableToQuery.get(dim).iterator().next());
if (whereClause != null) {
appendWhereClause(whereString, whereClause, hasMore);

if (storageCondition != null) {
appendWhereClause(whereString, storageCondition, hasMore);
}
}

Expand Down
108 changes: 81 additions & 27 deletions ql/src/java/org/apache/hadoop/hive/ql/cube/parse/JoinResolver.java
Original file line number Diff line number Diff line change
Expand Up @@ -69,16 +69,26 @@
public class JoinResolver implements ContextRewriter {

private static final Log LOG = LogFactory.getLog(JoinResolver.class);

/**
* Store join chain information resolved by join resolver
*/
public static class AutoJoinContext {
private final Map<CubeDimensionTable, List<TableRelationship>> joinChain;
private final Map<AbstractCubeTable, String> partialJoinConditions;
private final Set<String> partitionPushedTables;
private final boolean partialJoinChains;
private final Map<AbstractCubeTable, JoinType> tableJoinTypeMap;

public AutoJoinContext(Map<CubeDimensionTable, List<TableRelationship>> joinChain,
Map<AbstractCubeTable, String> partialJoinConditions) {
Map<AbstractCubeTable, String> partialJoinConditions,
boolean partialJoinChains,
Map<AbstractCubeTable, JoinType> tableJoinTypeMap) {
this.joinChain = joinChain;
this.partialJoinConditions = partialJoinConditions;
partitionPushedTables = new HashSet<String>();
this.partialJoinChains = partialJoinChains;
this.tableJoinTypeMap = tableJoinTypeMap;
}

public Map<CubeDimensionTable, List<TableRelationship>> getJoinChain() {
Expand All @@ -99,36 +109,28 @@ public String getMergedJoinClause(Configuration conf,
}

Set<String> clauses = new LinkedHashSet<String>();

String joinTypeCfg = conf.get(CubeQueryConfUtil.JOIN_TYPE_KEY);
String joinTypeStr = "";
JoinType joinType = JoinType.INNER;

if (StringUtils.isNotBlank(joinTypeCfg)) {
joinType = JoinType.valueOf(joinTypeCfg.toUpperCase());
switch (joinType) {
case FULLOUTER:
joinTypeStr = "full outer";
break;
case INNER:
joinTypeStr = "inner";
break;
case LEFTOUTER:
joinTypeStr = "left outer";
break;
case LEFTSEMI:
joinTypeStr = "left semi";
break;
case UNIQUE:
joinTypeStr = "unique";
break;
case RIGHTOUTER:
joinTypeStr = "right outer";
break;
// this flag is set to true if user has specified a partial join chain
if (!partialJoinChains) {
// User has not specified any join conditions. In this case, we rely on configuration for the join type
if (StringUtils.isNotBlank(joinTypeCfg)) {
joinType = JoinType.valueOf(joinTypeCfg.toUpperCase());
joinTypeStr = getJoinTypeStr(joinType);
}
}

for (List<TableRelationship> chain : joinChain.values()) {
for (Map.Entry<CubeDimensionTable, List<TableRelationship>> entry : joinChain.entrySet()) {
List<TableRelationship> chain = entry.getValue();
CubeDimensionTable table = entry.getKey();

if (partialJoinChains) {
joinType = tableJoinTypeMap.get(table);
joinTypeStr = getJoinTypeStr(joinType);
}

for (TableRelationship rel : chain) {
StringBuilder clause = new StringBuilder(joinTypeStr)
.append(" join ");
Expand All @@ -148,11 +150,25 @@ public String getMergedJoinClause(Configuration conf,
if (JoinType.INNER == joinType || JoinType.LEFTOUTER == joinType || JoinType.LEFTSEMI == joinType) {
// For inner and left joins push filter of right table
userFilter = partialJoinConditions.get(rel.getToTable());
if (partialJoinConditions.containsKey(rel.getFromTable())) {
if (StringUtils.isNotBlank(userFilter)) {
userFilter += (" AND " + partialJoinConditions.get(rel.getFromTable()));
} else {
userFilter = partialJoinConditions.get(rel.getFromTable());
}
}
storageFilter = getStorageFilter(dimStorageTableToWhereClause, storageTableToQuery, rel.getToTable());
partitionPushedTables.add(rel.getToTable().getName());
} else if (JoinType.RIGHTOUTER == joinType) {
// For right outer joins, push filters of left table
userFilter = partialJoinConditions.get(rel.getFromTable());
if (partialJoinConditions.containsKey(rel.getToTable())) {
if (StringUtils.isNotBlank(userFilter)) {
userFilter += (" AND " + partialJoinConditions.get(rel.getToTable()));
} else {
userFilter = partialJoinConditions.get(rel.getToTable());
}
}
storageFilter = getStorageFilter(dimStorageTableToWhereClause, storageTableToQuery, rel.getFromTable());
partitionPushedTables.add(rel.getFromTable().getName());
} else if (JoinType.FULLOUTER == joinType) {
Expand Down Expand Up @@ -199,6 +215,21 @@ public String getMergedJoinClause(Configuration conf,
return StringUtils.join(clauses, " ");
}

private String getJoinTypeStr(JoinType joinType) {
if (joinType == null) {
return "";
}
switch (joinType) {
case FULLOUTER: return "full outer";
case INNER: return "inner";
case LEFTOUTER: return "left outer";
case LEFTSEMI: return "left semi";
case UNIQUE: return "unique";
case RIGHTOUTER: return "right outer";
default: return "";
}
}

private String getStorageFilter(Map<String, String> dimStorageTableToWhereClause,
Map<AbstractCubeTable, Set<String>> storageTableToQuery,
AbstractCubeTable table) {
Expand Down Expand Up @@ -227,11 +258,14 @@ public Set<String> getPushedPartitionTables() {

private CubeMetastoreClient metastore;
private final Map<AbstractCubeTable, String> partialJoinConditions;
private final Map<AbstractCubeTable, JoinType> tableJoinTypeMap;
private boolean partialJoinChain;
private AbstractCubeTable target;
private HiveConf conf;

public JoinResolver(Configuration conf) {
partialJoinConditions = new HashMap<AbstractCubeTable, String>();
tableJoinTypeMap = new HashMap<AbstractCubeTable, JoinType>();
}

private CubeMetastoreClient getMetastoreClient() throws HiveException {
Expand Down Expand Up @@ -359,7 +393,7 @@ public void autoResolveJoins(CubeQueryContext cubeql) throws HiveException {
}
}
}
AutoJoinContext joinCtx = new AutoJoinContext(joinChain, partialJoinConditions);
AutoJoinContext joinCtx = new AutoJoinContext(joinChain, partialJoinConditions, partialJoinChain, tableJoinTypeMap);
cubeql.setAutoJoinCtx(joinCtx);
cubeql.setJoinsResolvedAutomatically(joinsResolved);
}
Expand All @@ -381,7 +415,8 @@ private void searchDimensionTables(ASTNode node) throws HiveException {
if (node == null) {
return;
}

// User has specified join conditions partially. We need to store join conditions as well as join types
partialJoinChain = true;
if (isJoinToken(node)) {
ASTNode left = (ASTNode) node.getChild(0);
ASTNode right = (ASTNode) node.getChild(1);
Expand All @@ -397,7 +432,7 @@ private void searchDimensionTables(ASTNode node) throws HiveException {
joinCond = HQLParser.getString((ASTNode) node.getChild(2));
}
partialJoinConditions.put(dimensionTable, joinCond);

tableJoinTypeMap.put(dimensionTable, getJoinType(node));
if (isJoinToken(left)) {
searchDimensionTables(left);
} else {
Expand All @@ -411,6 +446,25 @@ private void searchDimensionTables(ASTNode node) throws HiveException {

}

private JoinType getJoinType(ASTNode node) {
switch(node.getToken().getType()) {
case TOK_LEFTOUTERJOIN:
return JoinType.LEFTOUTER;
case TOK_LEFTSEMIJOIN:
return JoinType.LEFTSEMI;
case TOK_RIGHTOUTERJOIN:
return JoinType.RIGHTOUTER;
case TOK_FULLOUTERJOIN:
return JoinType.FULLOUTER;
case TOK_JOIN:
return JoinType.INNER;
case TOK_UNIQUEJOIN:
return JoinType.UNIQUE;
default:
return JoinType.INNER;
}
}

// Recursively find out join conditions
private QBJoinTree genJoinTree(QB qb, ASTNode joinParseTree,
CubeQueryContext cubeql)
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -225,28 +225,24 @@ public void testAutoJoinResolver() throws Exception {
@Test
public void testPartialJoinResolver() throws Exception {
String query = "SELECT citytable.name, testDim4.name, msr2 " +
"FROM testCube join citytable ON citytable.name = 'FOOBAR'" +
" join testDim4 on testDim4.name='TESTDIM4NAME'" +
"FROM testCube left outer join citytable ON citytable.name = 'FOOBAR'" +
" right outer join testDim4 on testDim4.name='TESTDIM4NAME'" +
" WHERE " + twoDaysRange;
CubeQueryContext rewrittenQuery = driver.rewrite(query);
String hql = rewrittenQuery.toHQL();
System.out.println("testPartialJoinResolver Partial join hql: " + hql);
String resolvedClause = rewrittenQuery.getAutoResolvedJoinChain();
System.out.println("@@resolved join chain " + resolvedClause);
Set<String> expectedClauses = new HashSet<String>();
expectedClauses.add(getDbName() + "c1_citytable citytable on testcube.cityid = citytable.id and ((( citytable . name ) = 'FOOBAR' )) and (citytable.dt = 'latest')");
expectedClauses.add(getDbName() + "c1_testdim4 testdim4 on testdim3.testdim4id = testdim4.id and ((( testdim4 . name ) = 'TESTDIM4NAME' )) and (testdim4.dt = 'latest')");
expectedClauses.add(getDbName() + "c1_testdim3 testdim3 on testdim2.testdim3id = testdim3.id and (testdim3.dt = 'latest')");
expectedClauses.add(getDbName() + "c1_testdim2 testdim2 on testcube.dim2 = testdim2.id and (testdim2.dt = 'latest')");

Set<String> actualClauses = new HashSet<String>();
for (String clause : StringUtils.splitByWholeSeparator(rewrittenQuery.getAutoResolvedJoinChain(), "join")) {
if (StringUtils.isNotBlank(clause)) {
actualClauses.add(clause.trim());
}
}
System.out.println("testPartialJoinResolverExpected" + expectedClauses);
System.out.println("testPartialJoinResolverActual" + actualClauses);
assertEquals(actualClauses, expectedClauses);
assertTrue(hql.contains(getDbName()+ "c1_testfact2_raw testcube" +
" left outer join " + getDbName() + "c1_citytable citytable " +
"on testcube.cityid = citytable.id and ((( citytable . name ) = 'FOOBAR' )) and (citytable.dt = 'latest')"));

assertTrue(hql.contains("right outer join " + getDbName() + "c1_testdim2 testdim2 " +
"on testcube.dim2 = testdim2.id " +
"right outer join " + getDbName() + "c1_testdim3 testdim3 " +
"on testdim2.testdim3id = testdim3.id and (testdim2.dt = 'latest') " +
"right outer join " + getDbName() + "c1_testdim4 testdim4 on testdim3.testdim4id = testdim4.id " +
"and ((( testdim4 . name ) = 'TESTDIM4NAME' )) and (testdim3.dt = 'latest')"));
}

@Test
Expand Down Expand Up @@ -301,7 +297,9 @@ public void testPreserveTableAlias() throws Exception {
System.out.println("testPreserveTableAlias@@HQL:" + hql);
System.out.println("testPreserveTableAlias@@Resolved join clause - " + ctx.getAutoResolvedJoinChain());
// Check that aliases are preserved in the join clause
assertEquals("left outer join "+ getDbName() + "c1_citytable c on t.cityid = c.id and (c.dt = 'latest')", ctx.getAutoResolvedJoinChain().trim());
// Conf will be ignored in this case since user has specified partial join
assertEquals("inner join "+ getDbName() + "c1_citytable c on t.cityid = c.id and (c.dt = 'latest')",
ctx.getAutoResolvedJoinChain().trim());
String whereClause = hql.substring(hql.indexOf("WHERE"));
// Check that the partition condition is not added again in where clause
assertFalse(whereClause.contains("c.dt = 'latest'"));
Expand Down

0 comments on commit 02a3bb8

Please sign in to comment.