Skip to content

Commit

Permalink
Update bigquery dependency and add support for BYTES datatype (#1045)
Browse files Browse the repository at this point in the history
* Update BigQuery dependency

* Add support for BYTES data type
  • Loading branch information
mziccard authored Jun 10, 2016
1 parent 3b35a9e commit 6539fbf
Show file tree
Hide file tree
Showing 12 changed files with 226 additions and 116 deletions.
2 changes: 1 addition & 1 deletion gcloud-java-bigquery/pom.xml
Original file line number Diff line number Diff line change
Expand Up @@ -31,7 +31,7 @@
<dependency>
<groupId>com.google.apis</groupId>
<artifactId>google-api-services-bigquery</artifactId>
<version>v2-rev270-1.21.0</version>
<version>v2-rev303-1.22.0</version>
<scope>compile</scope>
<exclusions>
<exclusion>
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -34,7 +34,7 @@ public final class CsvOptions extends FormatOptions {
private final String encoding;
private final String fieldDelimiter;
private final String quote;
private final Integer skipLeadingRows;
private final Long skipLeadingRows;

public static final class Builder {

Expand All @@ -43,18 +43,27 @@ public static final class Builder {
private String encoding;
private String fieldDelimiter;
private String quote;
private Integer skipLeadingRows;
private Long skipLeadingRows;

private Builder() {}

private Builder(CsvOptions csvOptions) {
this.allowJaggedRows = csvOptions.allowJaggedRows;
this.allowQuotedNewLines = csvOptions.allowQuotedNewLines;
this.encoding = csvOptions.encoding;
this.fieldDelimiter = csvOptions.fieldDelimiter;
this.quote = csvOptions.quote;
this.skipLeadingRows = csvOptions.skipLeadingRows;
}

/**
* Set whether BigQuery should accept rows that are missing trailing optional columns. If
* {@code true}, BigQuery treats missing trailing columns as null values. If {@code false},
* records with missing trailing columns are treated as bad records, and if there are too many
* bad records, an invalid error is returned in the job result. By default, rows with missing
* trailing columns are considered bad records.
*/
public Builder allowJaggedRows(Boolean allowJaggedRows) {
public Builder allowJaggedRows(boolean allowJaggedRows) {
this.allowJaggedRows = allowJaggedRows;
return this;
}
Expand All @@ -63,7 +72,7 @@ public Builder allowJaggedRows(Boolean allowJaggedRows) {
* Sets whether BigQuery should allow quoted data sections that contain newline characters in a
* CSV file. By default quoted newline are not allowed.
*/
public Builder allowQuotedNewLines(Boolean allowQuotedNewLines) {
public Builder allowQuotedNewLines(boolean allowQuotedNewLines) {
this.allowQuotedNewLines = allowQuotedNewLines;
return this;
}
Expand Down Expand Up @@ -104,7 +113,7 @@ public Builder fieldDelimiter(String fieldDelimiter) {
* string to ISO-8859-1 encoding, and then uses the first byte of the encoded string to split
* the data in its raw, binary state. The default value is a double-quote ('"'). If your data
* does not contain quoted sections, set the property value to an empty string. If your data
* contains quoted newline characters, you must also set {@link #allowQuotedNewLines(Boolean)}
* contains quoted newline characters, you must also set {@link #allowQuotedNewLines(boolean)}
* property to {@code true}.
*/
public Builder quote(String quote) {
Expand All @@ -117,7 +126,7 @@ public Builder quote(String quote) {
* data. The default value is 0. This property is useful if you have header rows in the file
* that should be skipped.
*/
public Builder skipLeadingRows(Integer skipLeadingRows) {
public Builder skipLeadingRows(long skipLeadingRows) {
this.skipLeadingRows = skipLeadingRows;
return this;
}
Expand Down Expand Up @@ -186,21 +195,15 @@ public String quote() {
* Returns the number of rows at the top of a CSV file that BigQuery will skip when reading the
* data.
*/
public Integer skipLeadingRows() {
public Long skipLeadingRows() {
return skipLeadingRows;
}

/**
* Returns a builder for the {@code CsvOptions} object.
*/
public Builder toBuilder() {
return new Builder()
.allowJaggedRows(allowJaggedRows)
.allowQuotedNewLines(allowQuotedNewLines)
.encoding(encoding)
.fieldDelimiter(fieldDelimiter)
.quote(quote)
.skipLeadingRows(skipLeadingRows);
return new Builder(this);
}

@Override
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -73,7 +73,7 @@ public static class Type implements Serializable {
private static final long serialVersionUID = 2841484762609576959L;

public enum Value {
STRING, INTEGER, FLOAT, BOOLEAN, TIMESTAMP, RECORD
BYTES, STRING, INTEGER, FLOAT, BOOLEAN, TIMESTAMP, RECORD
}

private final Value value;
Expand Down Expand Up @@ -108,6 +108,13 @@ public List<Field> fields() {
return fields;
}

/**
* Returns a {@link Value#BYTES} field value.
*/
public static Type bytes() {
return new Type(Value.BYTES);
}

/**
* Returns a {@link Value#STRING} field value.
*/
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -23,6 +23,7 @@
import com.google.common.base.Function;
import com.google.common.base.MoreObjects;
import com.google.common.collect.Lists;
import com.google.common.io.BaseEncoding;

import java.io.Serializable;
import java.util.List;
Expand Down Expand Up @@ -54,7 +55,7 @@ public FieldValue apply(Object pb) {
public enum Attribute {
/**
* A primitive field value. A {@code FieldValue} is primitive when the corresponding field has
* type {@link Field.Type#bool()}, {@link Field.Type#string()},
* type {@link Field.Type#bytes()}, {@link Field.Type#bool()}, {@link Field.Type#string()},
* {@link Field.Type#floatingPoint()}, {@link Field.Type#integer()},
* {@link Field.Type#timestamp()} or the value is set to {@code null}.
*/
Expand All @@ -80,7 +81,7 @@ public enum Attribute {
* Returns the attribute of this Field Value.
*
* @return {@link Attribute#PRIMITIVE} if the field is a primitive type
* ({@link Field.Type#bool()}, {@link Field.Type#string()},
* ({@link Field.Type#bytes()}, {@link Field.Type#bool()}, {@link Field.Type#string()},
* {@link Field.Type#floatingPoint()}, {@link Field.Type#integer()},
* {@link Field.Type#timestamp()}) or is {@code null}. Returns {@link Attribute#REPEATED} if
* the corresponding field has ({@link Field.Mode#REPEATED}) mode. Returns
Expand Down Expand Up @@ -108,8 +109,8 @@ public Object value() {

/**
* Returns this field's value as a {@link String}. This method should only be used if the
* corresponding field has primitive type ({@link Field.Type#bool()}, {@link Field.Type#string()},
* {@link Field.Type#floatingPoint()}, {@link Field.Type#integer()},
* corresponding field has primitive type ({@link Field.Type#bytes()}, {@link Field.Type#bool()},
* {@link Field.Type#string()}, {@link Field.Type#floatingPoint()}, {@link Field.Type#integer()},
* {@link Field.Type#timestamp()}).
*
* @throws ClassCastException if the field is not a primitive type
Expand All @@ -121,6 +122,22 @@ public String stringValue() {
return (String) value;
}

/**
* Returns this field's value as a byte array. This method should only be used if the
* corresponding field has primitive type ({@link Field.Type#bytes()}.
*
* @throws ClassCastException if the field is not a primitive type
* @throws NullPointerException if {@link #isNull()} returns {@code true}
* @throws IllegalStateException if the field value is not encoded in base64
*/
public byte[] bytesValue() {
try {
return BaseEncoding.base64().decode(stringValue());
} catch (IllegalArgumentException ex) {
throw new IllegalStateException(ex);
}
}

/**
* Returns this field's value as a {@code long}. This method should only be used if the
* corresponding field has {@link Field.Type#integer()} type.
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -49,7 +49,9 @@ public final class InsertAllRequest implements Serializable {

/**
* A Google Big Query row to be inserted into a table. Each {@code RowToInsert} has an associated
* id used by BigQuery to detect duplicate insertion requests on a best-effort basis.
* id used by BigQuery to detect duplicate insertion requests on a best-effort basis. Please
* notice that data for fields of type {@link Field.Type#bytes()} must be provided as a base64
* encoded string.
*
* <p>Example usage of creating a row to insert:
* <pre> {@code
Expand All @@ -58,8 +60,9 @@ public final class InsertAllRequest implements Serializable {
* recordContent.put("subfieldName1", "value");
* recordContent.put("subfieldName2", repeatedFieldValue);
* Map<String, Object> rowContent = new HashMap<String, Object>();
* rowContent.put("fieldName1", true);
* rowContent.put("fieldName2", recordContent);
* rowContent.put("booleanFieldName", true);
* rowContent.put("bytesFieldName", "DQ4KDQ==");
* rowContent.put("recordFieldName", recordContent);
* RowToInsert row = new RowToInsert("rowId", rowContent);
* }</pre>
*
Expand Down Expand Up @@ -116,7 +119,8 @@ public boolean equals(Object obj) {
}

/**
* Creates a row to be inserted with associated id.
* Creates a row to be inserted with associated id. Please notice that data for fields of type
* {@link Field.Type#bytes()} must be provided as a base64 encoded string.
*
* @param id id of the row, used to identify duplicates
* @param content the actual content of the row
Expand All @@ -126,7 +130,8 @@ public static RowToInsert of(String id, Map<String, Object> content) {
}

/**
* Creates a row to be inserted without associated id.
* Creates a row to be inserted without associated id. Please notice that data for fields of
* type {@link Field.Type#bytes()} must be provided as a base64 encoded string.
*
* @param content the actual content of the row
*/
Expand Down Expand Up @@ -174,7 +179,8 @@ public Builder addRow(RowToInsert rowToInsert) {
}

/**
* Adds a row to be inserted with associated id.
* Adds a row to be inserted with associated id. Please notice that data for fields of type
* {@link Field.Type#bytes()} must be provided as a base64 encoded string.
*
* <p>Example usage of adding a row with associated id:
* <pre> {@code
Expand All @@ -184,8 +190,9 @@ public Builder addRow(RowToInsert rowToInsert) {
* recordContent.put("subfieldName1", "value");
* recordContent.put("subfieldName2", repeatedFieldValue);
* Map<String, Object> rowContent = new HashMap<String, Object>();
* rowContent.put("fieldName1", true);
* rowContent.put("fieldName2", recordContent);
* rowContent.put("booleanFieldName", true);
* rowContent.put("bytesFieldName", "DQ4KDQ==");
* rowContent.put("recordFieldName", recordContent);
* builder.addRow("rowId", rowContent);
* }</pre>
*/
Expand All @@ -195,7 +202,8 @@ public Builder addRow(String id, Map<String, Object> content) {
}

/**
* Adds a row to be inserted without an associated id.
* Adds a row to be inserted without an associated id. Please notice that data for fields of
* type {@link Field.Type#bytes()} must be provided as a base64 encoded string.
*
* <p>Example usage of adding a row without an associated id:
* <pre> {@code
Expand All @@ -205,8 +213,9 @@ public Builder addRow(String id, Map<String, Object> content) {
* recordContent.put("subfieldName1", "value");
* recordContent.put("subfieldName2", repeatedFieldValue);
* Map<String, Object> rowContent = new HashMap<String, Object>();
* rowContent.put("fieldName1", true);
* rowContent.put("fieldName2", recordContent);
* rowContent.put("booleanFieldName", true);
* rowContent.put("bytesFieldName", "DQ4KDQ==");
* rowContent.put("recordFieldName", recordContent);
* builder.addRow(rowContent);
* }</pre>
*/
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -21,6 +21,7 @@
import com.google.api.services.bigquery.model.JobConfigurationLoad;
import com.google.common.base.MoreObjects.ToStringHelper;
import com.google.common.collect.ImmutableList;
import com.google.common.primitives.Ints;

import java.util.List;
import java.util.Objects;
Expand Down Expand Up @@ -97,12 +98,18 @@ private Builder(com.google.api.services.bigquery.model.JobConfiguration configur
|| loadConfigurationPb.getQuote() != null
|| loadConfigurationPb.getSkipLeadingRows() != null) {
CsvOptions.Builder builder = CsvOptions.builder()
.allowJaggedRows(loadConfigurationPb.getAllowJaggedRows())
.allowQuotedNewLines(loadConfigurationPb.getAllowQuotedNewlines())
.encoding(loadConfigurationPb.getEncoding())
.fieldDelimiter(loadConfigurationPb.getFieldDelimiter())
.quote(loadConfigurationPb.getQuote())
.skipLeadingRows(loadConfigurationPb.getSkipLeadingRows());
.quote(loadConfigurationPb.getQuote());
if (loadConfigurationPb.getAllowJaggedRows() != null) {
builder.allowJaggedRows(loadConfigurationPb.getAllowJaggedRows());
}
if (loadConfigurationPb.getAllowQuotedNewlines() != null) {
builder.allowQuotedNewLines(loadConfigurationPb.getAllowQuotedNewlines());
}
if (loadConfigurationPb.getSkipLeadingRows() != null) {
builder.skipLeadingRows(loadConfigurationPb.getSkipLeadingRows());
}
this.formatOptions = builder.build();
}
this.maxBadRecords = loadConfigurationPb.getMaxBadRecords();
Expand Down Expand Up @@ -300,8 +307,11 @@ com.google.api.services.bigquery.model.JobConfiguration toPb() {
.setAllowJaggedRows(csvOptions.allowJaggedRows())
.setAllowQuotedNewlines(csvOptions.allowQuotedNewLines())
.setEncoding(csvOptions.encoding())
.setQuote(csvOptions.quote())
.setSkipLeadingRows(csvOptions.skipLeadingRows());
.setQuote(csvOptions.quote());
if (csvOptions.skipLeadingRows() != null) {
// todo(mziccard) remove checked cast or comment when #1044 is closed
loadConfigurationPb.setSkipLeadingRows(Ints.checkedCast(csvOptions.skipLeadingRows()));
}
}
if (schema != null) {
loadConfigurationPb.setSchema(schema.toPb());
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -23,6 +23,7 @@
import com.google.cloud.bigquery.JobInfo.WriteDisposition;
import com.google.common.base.MoreObjects;
import com.google.common.collect.ImmutableList;
import com.google.common.primitives.Ints;

import java.io.Serializable;
import java.util.List;
Expand Down Expand Up @@ -90,12 +91,18 @@ private Builder(com.google.api.services.bigquery.model.JobConfiguration configur
|| loadConfigurationPb.getQuote() != null
|| loadConfigurationPb.getSkipLeadingRows() != null) {
CsvOptions.Builder builder = CsvOptions.builder()
.allowJaggedRows(loadConfigurationPb.getAllowJaggedRows())
.allowQuotedNewLines(loadConfigurationPb.getAllowQuotedNewlines())
.encoding(loadConfigurationPb.getEncoding())
.fieldDelimiter(loadConfigurationPb.getFieldDelimiter())
.quote(loadConfigurationPb.getQuote())
.skipLeadingRows(loadConfigurationPb.getSkipLeadingRows());
.quote(loadConfigurationPb.getQuote());
if (loadConfigurationPb.getAllowJaggedRows() != null) {
builder.allowJaggedRows(loadConfigurationPb.getAllowJaggedRows());
}
if (loadConfigurationPb.getAllowQuotedNewlines() != null) {
builder.allowQuotedNewLines(loadConfigurationPb.getAllowQuotedNewlines());
}
if (loadConfigurationPb.getSkipLeadingRows() != null) {
builder.skipLeadingRows(loadConfigurationPb.getSkipLeadingRows());
}
this.formatOptions = builder.build();
}
this.maxBadRecords = loadConfigurationPb.getMaxBadRecords();
Expand Down Expand Up @@ -271,8 +278,11 @@ com.google.api.services.bigquery.model.JobConfiguration toPb() {
.setAllowJaggedRows(csvOptions.allowJaggedRows())
.setAllowQuotedNewlines(csvOptions.allowQuotedNewLines())
.setEncoding(csvOptions.encoding())
.setQuote(csvOptions.quote())
.setSkipLeadingRows(csvOptions.skipLeadingRows());
.setQuote(csvOptions.quote());
if (csvOptions.skipLeadingRows() != null) {
// todo(mziccard) remove checked cast or comment when #1044 is closed
loadConfigurationPb.setSkipLeadingRows(Ints.checkedCast(csvOptions.skipLeadingRows()));
}
}
if (schema != null) {
loadConfigurationPb.setSchema(schema.toPb());
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -30,7 +30,7 @@ public class CsvOptionsTest {
private static final Charset ENCODING = StandardCharsets.UTF_8;
private static final String FIELD_DELIMITER = ",";
private static final String QUOTE = "\"";
private static final Integer SKIP_LEADING_ROWS = 42;
private static final long SKIP_LEADING_ROWS = 42L;
private static final CsvOptions CSV_OPTIONS = CsvOptions.builder()
.allowJaggedRows(ALLOW_JAGGED_ROWS)
.allowQuotedNewLines(ALLOW_QUOTED_NEWLINE)
Expand Down Expand Up @@ -65,7 +65,7 @@ public void testBuilder() {
assertEquals(ENCODING.name(), CSV_OPTIONS.encoding());
assertEquals(FIELD_DELIMITER, CSV_OPTIONS.fieldDelimiter());
assertEquals(QUOTE, CSV_OPTIONS.quote());
assertEquals(SKIP_LEADING_ROWS, CSV_OPTIONS.skipLeadingRows());
assertEquals(SKIP_LEADING_ROWS, (long) CSV_OPTIONS.skipLeadingRows());
}

@Test
Expand Down
Loading

0 comments on commit 6539fbf

Please sign in to comment.