merge with branch-2.1 from apache/hive

InMobi · Jul 26, 2016 · b4f3754 · b4f3754
2 parents 6a4f235 + 5015e20
commit b4f3754
Show file tree

Hide file tree

Showing 1,979 changed files with 306,721 additions and 89,134 deletions.
diff --git a/.reviewboardrc b/.reviewboardrc
@@ -28,6 +28,6 @@
 
 REPOSITORY='hive-git'
 REVIEWBOARD_URL='https://reviews.apache.org'
-TRACKING_BRANCH='origin/master'
+TRACKING_BRANCH='origin/branch-2.1'
 TARGET_GROUPS='hive'
-GUESS_FIELDS='true'
+GUESS_FIELDS='yes'
diff --git a/NOTICE b/NOTICE
@@ -1,5 +1,5 @@
 Apache Hive
-Copyright 2008-2015 The Apache Software Foundation
+Copyright 2008-2016 The Apache Software Foundation
 
 This product includes software developed by The Apache Software
 Foundation (http://www.apache.org/).

diff --git a/README.txt b/README.txt
@@ -1,11 +1,13 @@
 Apache Hive (TM) @VERSION@
 ======================
 
-The Apache Hive (TM) data warehouse software facilitates querying and
-managing large datasets residing in distributed storage. Built on top
-of Apache Hadoop (TM), it provides:
+The Apache Hive (TM) data warehouse software facilitates reading, 
+writing, and managing large datasets residing in distributed storage
+using SQL. Built on top of Apache Hadoop (TM), it provides:
 
-* Tools to enable easy data extract/transform/load (ETL)
+* Tools to enable easy access to data via SQL, thus enabling data 
+  warehousing tasks such as extract/transform/load (ETL), reporting, 
+  and data analysis
 
 * A mechanism to impose structure on a variety of data formats
 
@@ -15,17 +17,11 @@ of Apache Hadoop (TM), it provides:
 * Query execution using Apache Hadoop MapReduce, Apache Tez
   or Apache Spark frameworks.
 
-Hive implements a dialect of SQL (Hive QL) that focuses on analytics
-and presents a rich set of SQL semantics including OLAP functions,
-subqueries, common table expressions and more. Hive allows SQL
-developers or users with SQL tools to easily query, analyze and
-process data stored in Hadoop.
-Hive also allows programmers familiar with the MapReduce framework
-to plug in their custom mappers and reducers to perform more
-sophisticated analysis that may not be supported by the built-in
-capabilities of the language. QL can also be extended with custom
-scalar functions (UDF's), aggregations (UDAF's), and table
-functions (UDTF's).
+Hive provides standard SQL functionality, including many of the later
+2003 and 2011 features for analytics.  These include OLAP functions, 
+subqueries, common table expressions, and more.  Hive's SQL can also be
+extended with user code via user defined functions (UDFs), user defined
+aggregates (UDAFs), and user defined table functions (UDTFs).
 
 Hive users have a choice of 3 runtimes when executing SQL queries.
 Users can choose between Apache Hadoop MapReduce, Apache Tez or
@@ -46,12 +42,10 @@ at any time. In each case, Hive is best suited for use cases
 where the amount of data processed is large enough to require a
 distributed system.
 
-Hive is not designed for online transaction processing and does
-not support row level insert/updates. It is best used for batch
-jobs over large sets of immutable data (like web logs). What
-Hive values most are scalability (scale out with more machines
-added dynamically to the Hadoop cluster), extensibility (with
-MapReduce framework and UDF/UDAF/UDTF), fault-tolerance, and
+Hive is not designed for online transaction processing. It is best used
+for traditional data warehousing tasks.  Hive is designed to maximize
+scalability (scale out with more machines added dynamically to the Hadoop
+cluster), performance, extensibility, fault-tolerance, and
 loose-coupling with its input formats.
 
 
@@ -79,9 +73,9 @@ Getting Started
 Requirements
 ============
 
-- Java 1.7
+- Java 1.7 or 1.8
 
-- Hadoop 1.x, 2.x
+- Hadoop 1.x, 2.x (2.x required for Hive 2.x)
 
 
 Upgrading from older versions of Hive

diff --git a/RELEASE_NOTES.txt b/RELEASE_NOTES.txt
diff --git a/...-handler/src/java/org/apache/hadoop/hive/accumulo/predicate/AccumuloPredicateHandler.java b/...-handler/src/java/org/apache/hadoop/hive/accumulo/predicate/AccumuloPredicateHandler.java
@@ -149,7 +149,7 @@ public CompareOp getCompareOp(String udfType, IndexSearchCondition sc)
       return clz.newInstance();
     } catch (ClassCastException e) {
       throw new SerDeException("Column type mismatch in WHERE clause "
-          + sc.getComparisonExpr().getExprString() + " found type "
+          + sc.getIndexExpr().getExprString() + " found type "
           + sc.getConstantDesc().getTypeString() + " instead of "
           + sc.getColumnDesc().getTypeString());
     } catch (IllegalAccessException e) {
@@ -181,7 +181,7 @@ public PrimitiveComparison getPrimitiveComparison(String type, IndexSearchCondit
       return clz.newInstance();
     } catch (ClassCastException e) {
       throw new SerDeException("Column type mismatch in WHERE clause "
-          + sc.getComparisonExpr().getExprString() + " found type "
+          + sc.getIndexExpr().getExprString() + " found type "
           + sc.getConstantDesc().getTypeString() + " instead of "
           + sc.getColumnDesc().getTypeString());
     } catch (IllegalAccessException e) {

diff --git a/accumulo-handler/src/java/org/apache/hadoop/hive/accumulo/predicate/PushdownTuple.java b/accumulo-handler/src/java/org/apache/hadoop/hive/accumulo/predicate/PushdownTuple.java
@@ -60,7 +60,7 @@ public PushdownTuple(IndexSearchCondition sc, PrimitiveComparison pCompare, Comp
     } catch (ClassCastException cce) {
       log.info(StringUtils.stringifyException(cce));
       throw new SerDeException(" Column type mismatch in where clause "
-          + sc.getComparisonExpr().getExprString() + " found type "
+          + sc.getIndexExpr().getExprString() + " found type "
           + sc.getConstantDesc().getTypeString() + " instead of "
           + sc.getColumnDesc().getTypeString());
     } catch (HiveException e) {

diff --git a/ant/src/org/apache/hadoop/hive/ant/GenVectorCode.java b/ant/src/org/apache/hadoop/hive/ant/GenVectorCode.java
@@ -933,6 +933,7 @@ public class GenVectorCode extends Task {
       // Casts
       {"ColumnUnaryFunc", "Cast", "long", "double", "", "", "(long)", "", ""},
       {"ColumnUnaryFunc", "Cast", "double", "long", "", "", "(double)", "", ""},
+      {"ColumnUnaryFunc", "CastLongToFloatVia", "double", "long", "", "", "(float)", "", ""},
       {"ColumnUnaryFunc", "CastDoubleToBooleanVia", "long", "double", "MathExpr.toBool", "",
         "", "", ""},
       {"ColumnUnaryFunc", "CastLongToBooleanVia", "long", "long", "MathExpr.toBool", "",

diff --git a/beeline/src/java/org/apache/hive/beeline/BeeLine.java b/beeline/src/java/org/apache/hive/beeline/BeeLine.java
@@ -93,6 +93,9 @@
 import org.apache.hadoop.io.IOUtils;
 import org.apache.hive.beeline.cli.CliOptionsProcessor;
 
+import org.apache.hive.jdbc.Utils;
+import org.apache.hive.jdbc.Utils.JdbcConnectionParams;
+
 /**
  * A console SQL shell with command completion.
  * <p>
@@ -139,7 +142,6 @@ public class BeeLine implements Closeable {
   private static final Options options = new Options();
 
   public static final String BEELINE_DEFAULT_JDBC_DRIVER = "org.apache.hive.jdbc.HiveDriver";
-  public static final String BEELINE_DEFAULT_JDBC_URL = "jdbc:hive2://";
   public static final String DEFAULT_DATABASE_NAME = "default";
 
   private static final String SCRIPT_OUTPUT_PREFIX = ">>>";
@@ -297,6 +299,12 @@ public class BeeLine implements Closeable {
         .withDescription("the JDBC URL to connect to")
         .create('u'));
 
+    // -r
+    options.addOption(OptionBuilder
+        .withLongOpt("reconnect")
+        .withDescription("Reconnect to last saved connect url (in conjunction with !save)")
+        .create('r'));
+
     // -n <username>
     options.addOption(OptionBuilder
         .hasArg()
@@ -739,6 +747,10 @@ int initArgs(String[] args) {
       pass = cl.getOptionValue("p");
     }
     url = cl.getOptionValue("u");
+    if ((url == null) && cl.hasOption("reconnect")){
+      // If url was not specified with -u, but -r was present, use that.
+      url = getOpts().getLastConnectedUrl();
+    }
     getOpts().setInitFiles(cl.getOptionValues("i"));
     getOpts().setScriptFile(cl.getOptionValue("f"));
     if (cl.getOptionValues('e') != null) {
@@ -756,6 +768,14 @@ int initArgs(String[] args) {
     */
 
     if (url != null) {
+      if (user == null) {
+        user = Utils.parsePropertyFromUrl(url, JdbcConnectionParams.AUTH_USER);
+      }
+
+      if (pass == null) {
+        pass = Utils.parsePropertyFromUrl(url, JdbcConnectionParams.AUTH_PASSWD);
+      }
+
       String com = constructCmd(url, user, pass, driver, false);
       String comForDebug = constructCmd(url, user, pass, driver, true);
       debug("issuing: " + comForDebug);
@@ -884,7 +904,7 @@ int runInit() {
   }
 
   private int embeddedConnect() {
-    if (!execCommandWithPrefix("!connect " + BEELINE_DEFAULT_JDBC_URL + " '' ''")) {
+    if (!execCommandWithPrefix("!connect " + Utils.URL_PREFIX + " '' ''")) {
       return ERRNO_OTHER;
     } else {
       return ERRNO_OK;

diff --git a/beeline/src/java/org/apache/hive/beeline/BeeLineOpts.java b/beeline/src/java/org/apache/hive/beeline/BeeLineOpts.java
@@ -28,6 +28,8 @@
 import java.io.IOException;
 import java.io.InputStream;
 import java.io.OutputStream;
+import java.lang.annotation.Retention;
+import java.lang.annotation.RetentionPolicy;
 import java.lang.reflect.InvocationTargetException;
 import java.lang.reflect.Method;
 import java.util.Arrays;
@@ -36,6 +38,7 @@
 import java.util.List;
 import java.util.Map;
 import java.util.Properties;
+import java.util.Set;
 import java.util.TreeSet;
 
 import jline.Terminal;
@@ -56,6 +59,8 @@ class BeeLineOpts implements Completer {
   public static final String DEFAULT_NULL_STRING = "NULL";
   public static final char DEFAULT_DELIMITER_FOR_DSV = '|';
 
+  public static String URL_ENV_PREFIX = "BEELINE_URL_";
+
   private final BeeLine beeLine;
   private boolean autosave = false;
   private boolean silent = false;
@@ -102,6 +107,36 @@ class BeeLineOpts implements Completer {
   private Map<String, String> hiveConfVariables = new HashMap<String, String>();
   private boolean helpAsked;
 
+  private String lastConnectedUrl = null;
+
+  private TreeSet<String> cachedPropertyNameSet = null;
+
+  @Retention(RetentionPolicy.RUNTIME)
+  public @interface Ignore {
+    // marker annotations for functions that Reflector should ignore / pretend it does not exist
+
+    // NOTE: BeeLineOpts uses Reflector in an extensive way to call getters and setters on itself
+    // If you want to add any getters or setters to this class, but not have it interfere with
+    // saved variables in beeline.properties, careful use of this marker is needed.
+    // Also possible to get this by naming these functions obtainBlah instead of getBlah
+    // and so on, but that is not explicit and will likely surprise people looking at the
+    // code in the future. Better to be explicit in intent.
+  }
+
+  public interface Env {
+    // Env interface to mock out dealing with Environment variables
+    // This allows us to interface with Environment vars through
+    // BeeLineOpts while allowing tests to mock out Env setting if needed.
+    String get(String envVar);
+  }
+
+  public static Env env = new Env() {
+    @Override
+    public String get(String envVar) {
+      return System.getenv(envVar); // base env impl simply defers to System.getenv.
+    }
+  };
+
   public BeeLineOpts(BeeLine beeLine, Properties props) {
     this.beeLine = beeLine;
     if (terminal.getWidth() > 0) {
@@ -177,24 +212,35 @@ public void save(OutputStream out) throws IOException {
 
   String[] propertyNames()
       throws IllegalAccessException, InvocationTargetException {
-    TreeSet<String> names = new TreeSet<String>();
+    Set<String> names = propertyNamesSet(); // make sure we initialize if necessary
+    return names.toArray(new String[names.size()]);
+  }
 
-    // get all the values from getXXX methods
-    Method[] m = getClass().getDeclaredMethods();
-    for (int i = 0; m != null && i < m.length; i++) {
-      if (!(m[i].getName().startsWith("get"))) {
-        continue;
+  Set<String> propertyNamesSet()
+    throws IllegalAccessException, InvocationTargetException {
+    if (cachedPropertyNameSet == null){
+      TreeSet<String> names = new TreeSet<String>();
+
+      // get all the values from getXXX methods
+      Method[] m = getClass().getDeclaredMethods();
+      for (int i = 0; m != null && i < m.length; i++) {
+        if (!(m[i].getName().startsWith("get"))) {
+          continue;
+        }
+        if (m[i].getAnnotation(Ignore.class) != null){
+          continue; // not actually a getter
+        }
+        if (m[i].getParameterTypes().length != 0) {
+          continue;
+        }
+        String propName = m[i].getName().substring(3).toLowerCase();
+        names.add(propName);
       }
-      if (m[i].getParameterTypes().length != 0) {
-        continue;
-      }
-      String propName = m[i].getName().substring(3).toLowerCase();
-      names.add(propName);
+      cachedPropertyNameSet = names;
     }
-    return names.toArray(new String[names.size()]);
+    return cachedPropertyNameSet;
   }
 
-
   public Properties toProperties()
       throws IllegalAccessException, InvocationTargetException,
       ClassNotFoundException {
@@ -496,6 +542,7 @@ public int getMaxHeight() {
     return maxHeight;
   }
 
+  @Ignore
   public File getPropertiesFile() {
     return rcFile;
   }
@@ -528,6 +575,7 @@ public void setNullEmptyString(boolean nullStringEmpty) {
     this.nullEmptyString = nullStringEmpty;
   }
 
+  @Ignore
   public String getNullString(){
     return nullEmptyString ? "" : DEFAULT_NULL_STRING;
   }
@@ -567,5 +615,23 @@ public void setHelpAsked(boolean helpAsked) {
   public boolean isHelpAsked() {
     return helpAsked;
   }
+
+  public String getLastConnectedUrl(){
+    return lastConnectedUrl;
+  }
+
+  public void setLastConnectedUrl(String lastConnectedUrl){
+    this.lastConnectedUrl = lastConnectedUrl;
+  }
+
+  @Ignore
+  public static Env getEnv(){
+    return env;
+  }
+
+  @Ignore
+  public static void setEnv(Env envToUse){
+    env = envToUse;
+  }
 }