Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Closes #1574 - Rename SegmentedArray to SegmentedString #1576

Merged
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
2 changes: 1 addition & 1 deletion arkouda/strings.py
Original file line number Diff line number Diff line change
Expand Up @@ -1421,7 +1421,7 @@ def group(self) -> pdarray:

Notes
-----
If the arkouda server is compiled with "-sSegmentedArray.useHash=true",
If the arkouda server is compiled with "-sSegmentedString.useHash=true",
then arkouda uses 128-bit hash values to group strings, rather than sorting
the strings directly. This method is fast, but the resulting permutation
merely groups equivalent strings and does not sort them. If the "useHash"
Expand Down
2 changes: 1 addition & 1 deletion src/ArgSortMsg.chpl
Original file line number Diff line number Diff line change
Expand Up @@ -24,7 +24,7 @@ module ArgSortMsg
use ServerErrorStrings;

use RadixSortLSD;
use SegmentedArray;
use SegmentedString;
use Reflection;
use ServerErrors;
use Logging;
Expand Down
2 changes: 1 addition & 1 deletion src/ArraySetopsMsg.chpl
Original file line number Diff line number Diff line change
Expand Up @@ -14,7 +14,7 @@ module ArraySetopsMsg

use MultiTypeSymbolTable;
use MultiTypeSymEntry;
use SegmentedArray;
use SegmentedString;
use ServerErrorStrings;

use ArraySetops;
Expand Down
2 changes: 1 addition & 1 deletion src/Cast.chpl
Original file line number Diff line number Diff line change
Expand Up @@ -2,7 +2,7 @@ module Cast {
use MultiTypeSymbolTable;
use MultiTypeSymEntry;
use Reflection;
use SegmentedArray;
use SegmentedString;
use ServerErrors;
use Logging;
use CommAggregation;
Expand Down
2 changes: 1 addition & 1 deletion src/CastMsg.chpl
Original file line number Diff line number Diff line change
Expand Up @@ -2,7 +2,7 @@ module CastMsg {
use MultiTypeSymbolTable;
use MultiTypeSymEntry;
use Reflection;
use SegmentedArray;
use SegmentedString;
use ServerErrors;
use Logging;
use Message;
Expand Down
2 changes: 1 addition & 1 deletion src/ConcatenateMsg.chpl
Original file line number Diff line number Diff line change
Expand Up @@ -11,7 +11,7 @@ module ConcatenateMsg

use MultiTypeSymbolTable;
use MultiTypeSymEntry;
use SegmentedArray;
use SegmentedString;
use ServerErrorStrings;
use CommAggregation;
use PrivateDist;
Expand Down
2 changes: 1 addition & 1 deletion src/FindSegmentsMsg.chpl
Original file line number Diff line number Diff line change
Expand Up @@ -12,7 +12,7 @@ module FindSegmentsMsg
use MultiTypeSymbolTable;
use MultiTypeSymEntry;
use ServerErrorStrings;
use SegmentedArray;
use SegmentedString;

use PrivateDist;
use CommAggregation;
Expand Down
3 changes: 1 addition & 2 deletions src/Flatten.chpl
Original file line number Diff line number Diff line change
Expand Up @@ -2,14 +2,13 @@ module Flatten {
use ServerConfig;

use AryUtil;
use SegmentedArray;
use SegmentedString;
use ServerErrors;
use SymArrayDmap;
use CommAggregation;
use Reflection;
use ArkoudaRegexCompat;
use CTypes;
use SegmentedArray only checkCompile, _unsafeCompileRegex;
stress-tess marked this conversation as resolved.
Show resolved Hide resolved

config const NULL_STRINGS_VALUE = 0:uint(8);

Expand Down
2 changes: 1 addition & 1 deletion src/FlattenMsg.chpl
Original file line number Diff line number Diff line change
Expand Up @@ -5,7 +5,7 @@ module FlattenMsg {
use Reflection;
use Flatten;
use ServerConfig;
use SegmentedArray;
use SegmentedString;
use Logging;
use Message;

Expand Down
2 changes: 1 addition & 1 deletion src/GenSymIO.chpl
Original file line number Diff line number Diff line change
Expand Up @@ -16,7 +16,7 @@ module GenSymIO {
use Logging;
use Message;
use ServerConfig;
use SegmentedArray;
use SegmentedString;

private config const logLevel = ServerConfig.logLevel;
const gsLogger = new Logger(logLevel);
Expand Down
54 changes: 27 additions & 27 deletions src/HDF5Msg.chpl
Original file line number Diff line number Diff line change
Expand Up @@ -21,7 +21,7 @@ module HDF5Msg {
use ServerConfig;
use ServerErrors;
use ServerErrorStrings;
use SegmentedArray;
use SegmentedString;
use Sort;

require "c_helpers/help_h5ls.h", "c_helpers/help_h5ls.c";
Expand All @@ -36,8 +36,8 @@ module HDF5Msg {
const ARKOUDA_HDF5_FILE_VERSION_KEY = "file_version";
const ARKOUDA_HDF5_FILE_VERSION_VAL = 1.0:real(32);
type ARKOUDA_HDF5_FILE_VERSION_TYPE = real(32);
config const SEGARRAY_OFFSET_NAME = "segments";
config const SEGARRAY_VALUE_NAME = "values";
config const SEGSTRING_OFFSET_NAME = "segments";
config const SEGSTRING_VALUE_NAME = "values";
config const NULL_STRINGS_VALUE = 0:uint(8);
config const TRUNCATE: int = 0;
config const APPEND: int = 1;
Expand Down Expand Up @@ -279,38 +279,38 @@ module HDF5Msg {
var dataclass: C_HDF5.H5T_class_t;
var bytesize: int;
var isSigned: bool;
var isSegArray: bool;
var isSegString: bool;

try {
defer { // Close the file on exit
C_HDF5.H5Fclose(file_id);
}
if isStringsDataset(file_id, dsetName) {
if ( !skipSegStringOffsets ) {
var offsetDset = dsetName + "/" + SEGARRAY_OFFSET_NAME;
var offsetDset = dsetName + "/" + SEGSTRING_OFFSET_NAME;
var (offsetClass, offsetByteSize, offsetSign) =
try get_dataset_info(file_id, offsetDset);
if (offsetClass != C_HDF5.H5T_INTEGER) {
throw getErrorWithContext(
msg="dataset %s has incorrect one or more sub-datasets" +
" %s %s".format(dsetName,SEGARRAY_OFFSET_NAME,SEGARRAY_VALUE_NAME),
" %s %s".format(dsetName,SEGSTRING_OFFSET_NAME,SEGSTRING_VALUE_NAME),
lineNumber=getLineNumber(),
routineName=getRoutineName(),
moduleName=getModuleName(),
errorClass='SegArrayError');
errorClass='SegStringError');
}
}
var valueDset = dsetName + "/" + SEGARRAY_VALUE_NAME;
var valueDset = dsetName + "/" + SEGSTRING_VALUE_NAME;
try (dataclass, bytesize, isSigned) =
try get_dataset_info(file_id, valueDset);
isSegArray = true;
isSegString = true;
} else if isBooleanDataset(file_id, dsetName) {
var booleanDset = dsetName + "/" + "booleans";
(dataclass, bytesize, isSigned) = get_dataset_info(file_id, booleanDset);
isSegArray = false;
isSegString = false;
} else {
(dataclass, bytesize, isSigned) = get_dataset_info(file_id, dsetName);
isSegArray = false;
isSegString = false;
}
} catch e : Error {
//:TODO: recommend revisiting this catch block
Expand All @@ -321,7 +321,7 @@ module HDF5Msg {
moduleName=getModuleName(),
errorClass='Error');
}
return (isSegArray, dataclass, bytesize, isSigned);
return (isSegString, dataclass, bytesize, isSigned);
}


Expand Down Expand Up @@ -1865,7 +1865,7 @@ module HDF5Msg {
} else {
filenames = filelist;
}
var segArrayFlags: [filedom] bool;
var segStringFlags: [filedom] bool;
var dclasses: [filedom] C_HDF5.hid_t;
var bytesizes: [filedom] int;
var signFlags: [filedom] bool;
Expand All @@ -1881,7 +1881,7 @@ module HDF5Msg {
for (i, fname) in zip(filedom, filenames) {
var hadError = false;
try {
(segArrayFlags[i], dclasses[i], bytesizes[i], signFlags[i]) = get_dtype(fname, dsetName, calcStringOffsets);
(segStringFlags[i], dclasses[i], bytesizes[i], signFlags[i]) = get_dtype(fname, dsetName, calcStringOffsets);
} catch e: FileNotFoundError {
fileErrorMsg = "File %s not found".format(fname);
h5Logger.error(getModuleName(),getRoutineName(),getLineNumber(),fileErrorMsg);
Expand All @@ -1902,8 +1902,8 @@ module HDF5Msg {
h5Logger.error(getModuleName(),getRoutineName(),getLineNumber(),fileErrorMsg);
hadError = true;
if !allowErrors { return new MsgTuple(fileErrorMsg, MsgType.ERROR); }
} catch e: SegArrayError {
fileErrorMsg = "SegmentedArray error: %s".format(e.message());
} catch e: SegStringError {
fileErrorMsg = "SegmentedString error: %s".format(e.message());
h5Logger.error(getModuleName(),getRoutineName(),getLineNumber(),fileErrorMsg);
hadError = true;
if !allowErrors { return new MsgTuple(fileErrorMsg, MsgType.ERROR); }
Expand All @@ -1922,12 +1922,12 @@ module HDF5Msg {
fileErrorCount += 1;
}
}
const isSegArray = segArrayFlags[filedom.first];
const isSegString = segStringFlags[filedom.first];
const dataclass = dclasses[filedom.first];
const bytesize = bytesizes[filedom.first];
const isSigned = signFlags[filedom.first];
for (name, sa, dc, bs, sf) in zip(filenames, segArrayFlags, dclasses, bytesizes, signFlags) {
if ((sa != isSegArray) || (dc != dataclass)) {
for (name, sa, dc, bs, sf) in zip(filenames, segStringFlags, dclasses, bytesizes, signFlags) {
if ((sa != isSegString) || (dc != dataclass)) {
var errorMsg = "Inconsistent dtype in dataset %s of file %s".format(dsetName, name);
h5Logger.error(getModuleName(),getRoutineName(),getLineNumber(),errorMsg);
return new MsgTuple(errorMsg, MsgType.ERROR);
Expand All @@ -1946,11 +1946,11 @@ module HDF5Msg {
var len: int;
var nSeg: int;
try {
if isSegArray {
if isSegString {
if (!calcStringOffsets) {
(segSubdoms, nSeg, skips) = get_subdoms(filenames, dsetName + "/" + SEGARRAY_OFFSET_NAME);
(segSubdoms, nSeg, skips) = get_subdoms(filenames, dsetName + "/" + SEGSTRING_OFFSET_NAME);
}
(subdoms, len, skips) = get_subdoms(filenames, dsetName + "/" + SEGARRAY_VALUE_NAME);
(subdoms, len, skips) = get_subdoms(filenames, dsetName + "/" + SEGSTRING_VALUE_NAME);
} else {
(subdoms, len, skips) = get_subdoms(filenames, dsetName);
}
Expand All @@ -1967,18 +1967,18 @@ module HDF5Msg {
h5Logger.debug(getModuleName(),getRoutineName(),getLineNumber(),
"Got subdomains and total length for dataset %s".format(dsetName));

select (isSegArray, dataclass) {
select (isSegString, dataclass) {
when (true, C_HDF5.H5T_INTEGER) {
if (bytesize != 1) || isSigned {
var errorMsg = "Error: detected unhandled datatype: segmented? %t, class %i, size %i, signed? %t".format(
isSegArray, dataclass, bytesize, isSigned);
isSegString, dataclass, bytesize, isSigned);
h5Logger.error(getModuleName(),getRoutineName(),getLineNumber(),errorMsg);
return new MsgTuple(errorMsg, MsgType.ERROR);
}

// Load the strings bytes/values first
var entryVal = new shared SymEntry(len, uint(8));
read_files_into_distributed_array(entryVal.a, subdoms, filenames, dsetName + "/" + SEGARRAY_VALUE_NAME, skips);
read_files_into_distributed_array(entryVal.a, subdoms, filenames, dsetName + "/" + SEGSTRING_VALUE_NAME, skips);

proc _buildEntryCalcOffsets(): shared SymEntry throws {
var offsetsArray = segmentedCalcOffsets(entryVal.a, entryVal.aD);
Expand All @@ -1987,7 +1987,7 @@ module HDF5Msg {

proc _buildEntryLoadOffsets() throws {
var offsetsEntry = new shared SymEntry(nSeg, int);
read_files_into_distributed_array(offsetsEntry.a, segSubdoms, filenames, dsetName + "/" + SEGARRAY_OFFSET_NAME, skips);
read_files_into_distributed_array(offsetsEntry.a, segSubdoms, filenames, dsetName + "/" + SEGSTRING_OFFSET_NAME, skips);
fixupSegBoundaries(offsetsEntry.a, segSubdoms, subdoms);
return offsetsEntry;
}
Expand Down Expand Up @@ -2059,7 +2059,7 @@ module HDF5Msg {
}
otherwise {
var errorMsg = "detected unhandled datatype: segmented? %t, class %i, size %i, " +
"signed? %t".format(isSegArray, dataclass, bytesize, isSigned);
"signed? %t".format(isSegString, dataclass, bytesize, isSigned);
h5Logger.error(getModuleName(),getRoutineName(),getLineNumber(),errorMsg);
return new MsgTuple(errorMsg, MsgType.ERROR);
}
Expand Down
2 changes: 1 addition & 1 deletion src/KExtremeMsg.chpl
Original file line number Diff line number Diff line change
Expand Up @@ -15,7 +15,7 @@ module KExtremeMsg

use MultiTypeSymbolTable;
use MultiTypeSymEntry;
use SegmentedArray;
use SegmentedString;
use ServerErrorStrings;

use KReduce;
Expand Down
2 changes: 1 addition & 1 deletion src/Merge.chpl
Original file line number Diff line number Diff line change
@@ -1,6 +1,6 @@
module Merge {
use IO;
use SegmentedArray;
use SegmentedString;
use RadixSortLSD only numTasks, calcBlock;
use Reflection;
use ServerConfig;
Expand Down
2 changes: 1 addition & 1 deletion src/ParquetMsg.chpl
Original file line number Diff line number Diff line change
Expand Up @@ -13,7 +13,7 @@ module ParquetMsg {
use Sort;
use CommAggregation;

use SegmentedArray;
use SegmentedString;


// Use reflection for error information
Expand Down
2 changes: 1 addition & 1 deletion src/RandArray.chpl
Original file line number Diff line number Diff line change
Expand Up @@ -3,7 +3,7 @@ module RandArray {
use ServerErrors;
use Logging;
use Random;
use SegmentedArray;
use SegmentedString;
use ServerErrorStrings;
use MultiTypeSymEntry;
use Map;
Expand Down
6 changes: 3 additions & 3 deletions src/RegistrationMsg.chpl
Original file line number Diff line number Diff line change
Expand Up @@ -13,7 +13,7 @@ module RegistrationMsg
use MultiTypeSymbolTable;
use MultiTypeSymEntry;
use ServerErrorStrings;
use SegmentedArray;
use SegmentedString;

private config const logLevel = ServerConfig.logLevel;
const regLogger = new Logger(logLevel);
Expand Down Expand Up @@ -174,7 +174,7 @@ module RegistrationMsg
}

/*
Compile the component parts of a SegArray attach message
Compile the component parts of a SegString attach message

:arg cmd: calling command
:type cmd: string
Expand All @@ -189,7 +189,7 @@ module RegistrationMsg
*/
proc attachSegArrayMsg(cmd: string, name: string, st: borrowed SymTab): MsgTuple throws {
regLogger.debug(getModuleName(),getRoutineName(),getLineNumber(),
"%s: Collecting SegArray components for '%s'".format(cmd, name));
"%s: Collecting SegString components for '%s'".format(cmd, name));

var repMsg: string;

Expand Down
2 changes: 1 addition & 1 deletion src/SegStringSort.chpl
Original file line number Diff line number Diff line change
@@ -1,5 +1,5 @@
module SegStringSort {
use SegmentedArray;
use SegmentedString;
use Sort;
use Time;
use IO;
Expand Down
2 changes: 1 addition & 1 deletion src/SegmentedMsg.chpl
Original file line number Diff line number Diff line change
Expand Up @@ -4,7 +4,7 @@ module SegmentedMsg {
use ServerErrors;
use Logging;
use Message;
use SegmentedArray;
use SegmentedString;
use ServerErrorStrings;
use ServerConfig;
use MultiTypeSymbolTable;
Expand Down
4 changes: 2 additions & 2 deletions src/SegmentedArray.chpl → src/SegmentedString.chpl
Original file line number Diff line number Diff line change
@@ -1,4 +1,4 @@
module SegmentedArray {
module SegmentedString {
use AryUtil;
use CTypes;
use MultiTypeSymbolTable;
Expand All @@ -22,7 +22,7 @@ module SegmentedArray {
const saLogger = new Logger(logLevel);

private config param useHash = true;
param SegmentedArrayUseHash = useHash;
param SegmentedStringUseHash = useHash;

private config param regexMaxCaptures = ServerConfig.regexMaxCaptures;

Expand Down
8 changes: 4 additions & 4 deletions src/ServerErrors.chpl
Original file line number Diff line number Diff line change
Expand Up @@ -117,14 +117,14 @@ module ServerErrors {
}

/*
* The SegArrayError is thrown if the file corresponding to the SegArray lacks either the
* SEGARRAY_OFFSET_NAME or SEGARRAY_VALUE_NAME dataset.
* The SegStringError is thrown if the file corresponding to the SegString lacks either the
* SEGSTRING_OFFSET_NAME or SEGSTRING_VALUE_NAME dataset.
*/
class SegArrayError: ErrorWithContext {
class SegStringError: ErrorWithContext {

proc init(msg : string, lineNumber: int, routineName: string,
moduleName: string) {
super.init(msg,lineNumber,routineName,moduleName,errorClass='SegArrayError');
super.init(msg,lineNumber,routineName,moduleName,errorClass='SegStringError');
}

proc init(){ super.init(); }
Expand Down
Loading