Skip to content

Commit

Permalink
Merge pull request #1408 from ashitsalesforce/master
Browse files Browse the repository at this point in the history
config property to output csv files in a specific charset
  • Loading branch information
ashitsalesforce authored Dec 13, 2024
2 parents 1aa652d + a0526ca commit d850881
Show file tree
Hide file tree
Showing 4 changed files with 62 additions and 11 deletions.
31 changes: 22 additions & 9 deletions src/main/java/com/salesforce/dataloader/config/AppConfig.java
Original file line number Diff line number Diff line change
Expand Up @@ -376,7 +376,8 @@ public class AppConfig {
public static final String PROP_READ_UTF8 = "dataAccess.readUTF8"; //$NON-NLS-1$
public static final String PROP_WRITE_UTF8 = "dataAccess.writeUTF8"; //$NON-NLS-1$
public static final String PROP_READ_CHARSET = "dataAccess.readCharset";

public static final String PROP_WRITE_CHARSET = "dataAccess.writeCharset";

public static final String PROP_API_VERSION="salesforce.api.version";
public static final String PROP_OAUTH_INSTANCE_URL="salesforce.oauth.instanceURL";
public static final String PROP_USE_LEGACY_HTTP_GET="sfdc.useLegacyHttpGet";
Expand Down Expand Up @@ -778,6 +779,7 @@ private void setDefaults(Map<String, String> cliOptionsMap) {
setDefaultValue(PROP_RICH_TEXT_FIELD_REGEX, DEFAULT_RICHTEXT_REGEX);
setDefaultValue(PROP_DAO_SKIP_TOTAL_COUNT, true);
setDefaultValue(PROP_READ_CHARSET ,getDefaultCharsetForCsvReadWrite());
setDefaultValue(PROP_WRITE_CHARSET ,getDefaultCharsetForCsvReadWrite());
setDefaultValue(PROP_GMT_FOR_DATE_FIELD_VALUE, false);
setDefaultValue(PROP_SAVE_ALL_PROPS, false);
setDefaultValue(PROP_EXTRACT_ALL_CAPS_HEADERS, false);
Expand Down Expand Up @@ -1735,24 +1737,35 @@ public String getCsvEncoding(boolean isWrite) {
} else {
logger.debug("Getting charset for reading from CSV");
}
String charset = getDefaultCharsetForCsvReadWrite();
if (getBoolean(configProperty)) {
logger.debug("Using UTF8 charset because '"
+ configProperty
+"' is set to true");
return StandardCharsets.UTF_8.name();
}
if (!isWrite) {
String charset = getString(PROP_READ_CHARSET);
if (charset != null && !charset.isEmpty()) {
return charset;
charset = StandardCharsets.UTF_8.name();
} else {
if (isWrite) {
charset = getString(PROP_WRITE_CHARSET);
} else {
charset = getString(PROP_READ_CHARSET);
}
boolean validCharset = false;
for (String charsetName : Charset.availableCharsets().keySet()) {
if (charset.equalsIgnoreCase(charsetName)) {
validCharset = true;
break;
}
}
if (!validCharset) {
logger.warn("configured charset" + charset + " is not supported");
charset = getDefaultCharsetForCsvReadWrite();
}
}
String charset = getDefaultCharsetForCsvReadWrite();
logger.debug("Using charset " + charset);
return charset;
}

private static String defaultCharsetForCsvReadWrite = Charset.defaultCharset().name();
private static String defaultCharsetForCsvReadWrite = null;
private synchronized static String getDefaultCharsetForCsvReadWrite() {
if (defaultCharsetForCsvReadWrite != null) {
return defaultCharsetForCsvReadWrite;
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -132,7 +132,7 @@ public void open() throws DataAccessObjectInitializationException {
private byte[] getBOM() {
if (StandardCharsets.UTF_8.equals(Charset.forName(this.encoding))) {
return new byte[]{(byte) 0xEF, (byte) 0xBB, (byte) 0xBF};
} else if (StandardCharsets.UTF_16.equals(Charset.forName(this.encoding))) {
} else if (this.encoding.startsWith(StandardCharsets.UTF_16.name())) {
return new byte[]{(byte) 0xFE, (byte) 0xFF};
}
return new byte[0];
Expand Down
1 change: 1 addition & 0 deletions src/main/resources/labels.properties
Original file line number Diff line number Diff line change
Expand Up @@ -482,6 +482,7 @@ AppConfig.property.description.sfdc.extraction.allCapsHeaders=set it to "true" t
AppConfig.property.description.sfdc.extraction.outputByteOrderMark=set to "true" by default. When set to "true", it writes Byte Order Mark (BOM) character if the CSV file is created in UTF-8 format.
AppConfig.property.description.config.properties.readonly=Do not modify config.properties file even if the user makes changes through Settings dialog.
AppConfig.property.description.dataAccess.readCharset=Override system default charset by specifying charset to use for import operations. Set it to UTF-8, UTF-16BE, UTF-16LE, UTF-32BE, or UTF-32LE to handle import CSVs with Byte Order Mark (BOM) character.
AppConfig.property.description.dataAccess.writeCharset=Override system default charset by specifying charset to use for export operations. Set it to UTF-8 or UTF-16 to write export CSVs with Byte Order Mark (BOM) character.
AppConfig.property.description.loader.cacheSObjectNamesAndField=Cache object names and fields metadata across multiple operations. Applicable in the UI mode because batch mode executes one operation and stops.
AppConfig.property.description.sfdc.timezone=Details documented at https://developer.salesforce.com/docs/atlas.en-us.dataLoader.meta/dataLoader/configuring_the_data_loader.htm
AppConfig.property.description.process.outputSuccess=Details documented at https://developer.salesforce.com/docs/atlas.en-us.dataLoader.meta/dataLoader/loader_params.htm
Expand Down
39 changes: 38 additions & 1 deletion src/test/java/com/salesforce/dataloader/dao/CsvTest.java
Original file line number Diff line number Diff line change
Expand Up @@ -30,6 +30,7 @@
import java.util.List;

import org.junit.Before;
import org.junit.Ignore;
import org.junit.Test;

import com.salesforce.dataloader.ConfigTestBase;
Expand Down Expand Up @@ -130,6 +131,41 @@ public void testCSVWriteBasic() throws Exception {
doTestCSVWriteBasic(AppUtil.COMMA);
}

@Test
public void testCSVWriteUTF8BOMBasic() throws Exception{
getController().getAppConfig().setValue(AppConfig.PROP_READ_CHARSET, "UTF-8");
getController().getAppConfig().setValue(AppConfig.PROP_WRITE_CHARSET, "UTF-8");
doTestCSVWriteBasic(AppUtil.COMMA);
}

@Test
public void testCSVWriteUTF16LEBOMBasic() throws Exception{
getController().getAppConfig().setValue(AppConfig.PROP_READ_CHARSET, "UTF-16LE");
getController().getAppConfig().setValue(AppConfig.PROP_WRITE_CHARSET, "UTF-16LE");
doTestCSVWriteBasic(AppUtil.COMMA);
}

@Test
public void testCSVWriteUTF16BEBOMBasic() throws Exception{
getController().getAppConfig().setValue(AppConfig.PROP_READ_CHARSET, "UTF-16BE");
getController().getAppConfig().setValue(AppConfig.PROP_WRITE_CHARSET, "UTF-16BE");
doTestCSVWriteBasic(AppUtil.COMMA);
}

@Test
public void testCSVWriteUTF32LEBOMBasic() throws Exception{
getController().getAppConfig().setValue(AppConfig.PROP_READ_CHARSET, "UTF-32LE");
getController().getAppConfig().setValue(AppConfig.PROP_WRITE_CHARSET, "UTF-32LE");
doTestCSVWriteBasic(AppUtil.COMMA);
}

@Test
public void testCSVWriteUTF32BEBOMBasic() throws Exception{
getController().getAppConfig().setValue(AppConfig.PROP_READ_CHARSET, "UTF-32BE");
getController().getAppConfig().setValue(AppConfig.PROP_WRITE_CHARSET, "UTF-32BE");
doTestCSVWriteBasic(AppUtil.COMMA);
}

@Test
public void testCSVWriteBasicWithDashDelimiter() throws Exception {
doTestCSVWriteBasic("-");
Expand All @@ -145,8 +181,9 @@ public void testCSVWriteBasicWithTabDelimiter() throws Exception {
doTestCSVWriteBasic(AppUtil.TAB);
}

private String writeCSVFilename = getTestDataDir() + "/csvtestTemp.csv";
private void doTestCSVWriteBasic(String delimiter) throws Exception {
File f = new File(getTestDataDir(), "csvtestTemp.csv");
File f = new File(writeCSVFilename);
String path = f.getAbsolutePath();
CSVFileWriter writer = new CSVFileWriter(path, getController().getAppConfig(), delimiter);
List<RowInterface> rowList = new ArrayList<RowInterface>();
Expand Down

0 comments on commit d850881

Please sign in to comment.