From a66e6a298abe68d291b4da7ce248e7ad97048a07 Mon Sep 17 00:00:00 2001
From: Akansha Maloo <amaloo@google.com>
Date: Fri, 20 Sep 2024 20:17:17 +0000
Subject: [PATCH 1/9] add range_splitter with test

---
 storage/dataflux/fast_list.go           |   3 +-
 storage/dataflux/integration_test.go    |   1 -
 storage/dataflux/range_splitter.go      | 305 ++++++++++++++++++++++-
 storage/dataflux/range_splitter_test.go | 308 ++++++++++++++++++++++++
 4 files changed, 611 insertions(+), 6 deletions(-)
 create mode 100644 storage/dataflux/range_splitter_test.go

diff --git a/storage/dataflux/fast_list.go b/storage/dataflux/fast_list.go
index 45a59758ee0c..68109845a821 100644
--- a/storage/dataflux/fast_list.go
+++ b/storage/dataflux/fast_list.go
@@ -49,8 +49,7 @@ type ListerInput struct {
 	BatchSize int
 
 	// Query is the query to filter objects for listing. Default value is nil. Optional.
-	//Use ProjectionNoACL For faster listing. ACL is expensive and this results in fewer objects
-	// to be returned from GCS in each API call.
+	// Use ProjectionNoACL for faster listing. ACL increases latency while fetching objects
 	Query storage.Query
 
 	// SkipDirectoryObjects is to indicate whether to list directory objects. Default value is false. Optional.
diff --git a/storage/dataflux/integration_test.go b/storage/dataflux/integration_test.go
index 193356dc6c83..099f2c33c2a2 100644
--- a/storage/dataflux/integration_test.go
+++ b/storage/dataflux/integration_test.go
@@ -49,7 +49,6 @@ var (
 
 func TestMain(m *testing.M) {
 	flag.Parse()
-	fmt.Println("creating bucket")
 	if err := httpTestBucket.Create(testPrefix); err != nil {
 		log.Fatalf("test bucket creation failed: %v", err)
 	}
diff --git a/storage/dataflux/range_splitter.go b/storage/dataflux/range_splitter.go
index 4c3564ecc54b..b66412ea9cbf 100644
--- a/storage/dataflux/range_splitter.go
+++ b/storage/dataflux/range_splitter.go
@@ -15,6 +15,9 @@
 package dataflux
 
 import (
+	"fmt"
+	"math/big"
+	"sort"
 	"sync"
 )
 
@@ -31,12 +34,308 @@ type listRange struct {
 	endRange   string
 }
 
+// minimalIntRange specifies start and end range in base-10 form, along with the
+// minimal string length for the split range strings.
+type minimalIntRange struct {
+	startInteger  *big.Int
+	endInteger    *big.Int
+	minimalLength int
+}
+
+// generateSplitsOpts specifies the parameters needed to generate the split
+// range strings.
+type generateSplitsOpts struct {
+	minimalIntRange *minimalIntRange
+	numSplits       int
+	startRange      string
+	endRange        string
+}
+
 // newRangeSplitter creates a new RangeSplitter with the given alphabets.
-func newRangeSplitter(alphabet string) *rangeSplitter {
-	return &rangeSplitter{}
+func newRangeSplitter(alphabet string) (*rangeSplitter, error) {
+
+	// Validate that we do not have empty alphabet passed in.
+	if len(alphabet) == 0 {
+		return nil, fmt.Errorf("no alphabet specified for the range splitter")
+	}
+	// Sort the alphabet lexicographically and store a mapping of each alphabet
+	// to its index. We need a mapping for efficient index lookup in later operations.
+	sortedAlphabet := sortAlphabet([]rune(alphabet))
+	alphabetMap := constructAlphabetMap(sortedAlphabet)
+
+	return &rangeSplitter{
+		alphabetMap:    alphabetMap,
+		sortedAlphabet: sortedAlphabet,
+	}, nil
 }
 
 // splitRange creates a given number of splits based on a provided start and end range.
 func (rs *rangeSplitter) splitRange(startRange, endRange string, numSplits int) ([]string, error) {
-	return nil, nil
+	// Number of splits has to be at least one, otherwise it is not splittable.
+	if numSplits < 1 {
+		return nil, fmt.Errorf("number of splits should be at least 1, got %d", numSplits)
+	}
+
+	// End range (if specified) has to be lexicographically greater than the start range
+	// for the range to be valid.
+	if len(endRange) != 0 && startRange >= endRange {
+		return nil, fmt.Errorf("start range %q cannot be lexicographically greater end range %q", startRange, endRange)
+	}
+
+	rs.addCharsToAlphabet([]rune(startRange))
+	rs.addCharsToAlphabet([]rune(endRange))
+
+	// Validate start range characters and convert into character array form.
+	startRangeCharArray, err := rs.convertRangeStringToArray(startRange)
+	if err != nil {
+		return nil, fmt.Errorf("unable to convert start range %q to array: %v", startRange, err)
+	}
+
+	// Validate end range characters and convert into character array form.
+	endRangeCharArray, err := rs.convertRangeStringToArray(endRange)
+	if err != nil {
+		return nil, fmt.Errorf("unable to convert end range %q to array: %v", endRange, err)
+	}
+
+	// Construct the final split ranges to be returned.
+	var splitPoints []string
+
+	// If the start and end string ranges are equal with padding, no splitting is
+	// necessary. In such cases, an empty array of split ranges is returned.
+	if rs.isRangeEqualWithPadding(startRangeCharArray, endRangeCharArray) {
+		return splitPoints, nil
+	}
+	// Convert the range strings from base-N to base-10 and employ a greedy approach
+	// to determine the smallest splittable integer range difference.
+	minimalIntRange, err := rs.convertStringRangeToMinimalIntRange(
+		startRangeCharArray, endRangeCharArray, numSplits)
+	if err != nil {
+		return nil, fmt.Errorf("range splitting with start range %q and end range %q: %v",
+			startRange, endRange, err)
+	}
+
+	// Generate the split points and return them.
+	splitPoints = rs.generateSplits(generateSplitsOpts{
+		startRange:      startRange,
+		endRange:        endRange,
+		numSplits:       numSplits,
+		minimalIntRange: minimalIntRange,
+	})
+
+	return splitPoints, nil
+}
+
+// generateSplits generates the split points using the specified options.
+func (rs *rangeSplitter) generateSplits(opts generateSplitsOpts) []string {
+
+	startInteger := opts.minimalIntRange.startInteger
+	endInteger := opts.minimalIntRange.endInteger
+	minimalLength := opts.minimalIntRange.minimalLength
+
+	rangeDifference := new(big.Int).Sub(endInteger, startInteger)
+
+	var splitPoints []string
+
+	// The number of intervals is one more than the number of split points.
+	rangeInterval := new(big.Int).SetInt64(int64(opts.numSplits + 1))
+
+	for i := 1; i <= opts.numSplits; i++ {
+		// Combine the range interval and index to determine the split point in base-10 form.
+		rangeDiffWithIdx := new(big.Int).Mul(rangeDifference, big.NewInt(int64(i)))
+		rangeInterval := new(big.Int).Div(rangeDiffWithIdx, rangeInterval)
+		splitPoint := new(big.Int).Add(rangeInterval, startInteger)
+
+		// Convert the split point back from base-10 to base-N.
+		splitString := rs.convertIntToString(splitPoint, minimalLength)
+
+		// Due to the approximate nature on how the minimal int range is derived, we need to perform
+		// another validation to check to ensure each split point falls in valid range.
+		isGreaterThanStart := len(splitString) > 0 && splitString > opts.startRange
+		isLessThanEnd := len(opts.endRange) == 0 || (len(splitString) > 0 && splitString < opts.endRange)
+		if isGreaterThanStart && isLessThanEnd {
+			splitPoints = append(splitPoints, splitString)
+		}
+	}
+	return splitPoints
+}
+
+// sortAlphabet sorts the alphabets string lexicographically and returns a pointer to the sorted string.
+func sortAlphabet(unsortedAlphabet []rune) *[]rune {
+	sortedAlphabet := unsortedAlphabet
+	sort.Slice(sortedAlphabet, func(i, j int) bool {
+		return sortedAlphabet[i] < sortedAlphabet[j]
+	})
+	return &sortedAlphabet
+}
+
+// constructAlphabetMap constructs a mapping from each character in the
+// alphabets to its index in the alphabet array.
+func constructAlphabetMap(alphabet *[]rune) map[rune]int {
+	alphabetMap := make(map[rune]int)
+	for i, char := range *alphabet {
+		alphabetMap[char] = i
+	}
+	return alphabetMap
+}
+
+// addCharsToAlphabet adds a character to the known alphabet.
+func (rs *rangeSplitter) addCharsToAlphabet(characters []rune) {
+	rs.mu.Lock()         // Acquire the lock
+	defer rs.mu.Unlock() // Release the lock when the function exits
+	allAlphabet := *rs.sortedAlphabet
+	newChars := false
+	for _, char := range characters {
+		if _, exists := rs.alphabetMap[char]; exists {
+			continue
+		}
+		allAlphabet = append(allAlphabet, char)
+		newChars = true
+		rs.alphabetMap[char] = 0
+	}
+	if newChars {
+		rs.sortedAlphabet = sortAlphabet(allAlphabet)
+		rs.alphabetMap = constructAlphabetMap(rs.sortedAlphabet)
+	}
+}
+
+// isRangeEqualWithPadding checks if two range strings are identical. Equality
+// encompasses any padding using the smallest alphabet character from the set.
+func (rs *rangeSplitter) isRangeEqualWithPadding(startRange, endRange *[]rune) bool {
+
+	sortedAlphabet := rs.sortedAlphabet
+
+	// When the end range is unspecified, it's interpreted as a sequence of the
+	// highest possible characters. Consequently, they are not deemed equal.
+	if len(*endRange) == 0 {
+		return false
+	}
+
+	// Get the longer length of the two range strings.
+	maxLength := len(*startRange)
+	if len(*endRange) > maxLength {
+		maxLength = len(*endRange)
+	}
+
+	smallestChar := (*sortedAlphabet)[0]
+
+	// Loop through the string range.
+	for i := 0; i < maxLength; i++ {
+
+		// In cases where a character is absent at a specific position (due to a length
+		// difference), the position is padded with the smallest character in the alphabet.
+		charStart := charAtOrDefault(startRange, i, smallestChar)
+		charEnd := charAtOrDefault(endRange, i, smallestChar)
+
+		// As soon as we find a difference, we conclude the two strings are different.
+		if charStart != charEnd {
+			return false
+		}
+	}
+	// Otherwise, we conclude the two strings are equal.
+	return true
+}
+
+// charAtOrDefault returns the character at the specified position, or the default character if
+// the position is out of bounds.
+func charAtOrDefault(charArray *[]rune, position int, defaultChar rune) rune {
+	if position < 0 || position >= len(*charArray) {
+		return defaultChar
+	}
+	return (*charArray)[position]
+}
+
+// convertStringRangeToMinimalIntRange gradually extends the start and end string
+// range in base-10 representation, until the difference reaches a threshold
+// suitable for splitting.
+func (rs *rangeSplitter) convertStringRangeToMinimalIntRange(
+	startRange, endRange *[]rune, numSplits int) (*minimalIntRange, error) {
+
+	startInteger := big.NewInt(0)
+	endInteger := big.NewInt(0)
+
+	alphabetLength := len(*rs.sortedAlphabet)
+	startChar := (*rs.sortedAlphabet)[0]
+	endChar := (*rs.sortedAlphabet)[alphabetLength-1]
+
+	endDefaultChar := startChar
+	if len(*endRange) == 0 {
+		endDefaultChar = endChar
+	}
+
+	for i := 0; ; i++ {
+
+		// Convert each character of the start range string into a big integer
+		// based on the alphabet system.
+		startPosition, err := rs.charPosition(charAtOrDefault(startRange, i, startChar))
+		if err != nil {
+			return nil, err
+		}
+		startInteger.Mul(startInteger, big.NewInt(int64(alphabetLength)))
+		startInteger.Add(startInteger, big.NewInt(int64(startPosition)))
+
+		// Convert each character of the end range string into a big integer
+		// based on the alphabet system.
+		endPosition, err := rs.charPosition(charAtOrDefault(endRange, i, endDefaultChar))
+		if err != nil {
+			return nil, err
+		}
+		endInteger.Mul(endInteger, big.NewInt(int64(alphabetLength)))
+		endInteger.Add(endInteger, big.NewInt(int64(endPosition)))
+
+		// Calculate the difference between the start and end range in big integer representation.
+		difference := new(big.Int).Sub(endInteger, startInteger)
+
+		// If the difference is bigger than the number of split points, we are done.
+		// In particular, the minimal length is one greater than the index (due to zero indexing).
+		if difference.Cmp(big.NewInt(int64(numSplits))) > 0 {
+			return &minimalIntRange{
+				startInteger:  startInteger,
+				endInteger:    endInteger,
+				minimalLength: i + 1,
+			}, nil
+		}
+	}
+}
+
+// charPosition returns the index of the character in the alphabet set.
+func (rs *rangeSplitter) charPosition(ch rune) (int, error) {
+	if idx, ok := rs.alphabetMap[ch]; ok {
+		return idx, nil
+	}
+	return -1, fmt.Errorf("character %c is not found in the alphabet map %v", ch, rs.alphabetMap)
+}
+
+// convertRangeStringToArray transforms the range string into a rune slice while
+// verifying the presence of each character in the alphabets.
+func (rs *rangeSplitter) convertRangeStringToArray(rangeString string) (*[]rune, error) {
+	for _, char := range rangeString {
+		if _, exists := rs.alphabetMap[char]; !exists {
+			return nil, fmt.Errorf("character %c in range string %q is not found in the alphabet array", char, rangeString)
+		}
+	}
+	characterArray := []rune(rangeString)
+	return &characterArray, nil
+}
+
+// convertIntToString converts the split point from base-10 to base-N.
+func (rs *rangeSplitter) convertIntToString(splitPoint *big.Int, stringLength int) string {
+
+	remainder := new(big.Int)
+
+	var splitChar []rune
+	alphabetSize := big.NewInt(int64(len(*rs.sortedAlphabet)))
+
+	// Iterate through the split point and convert alphabet by alphabet.
+	for i := 0; i < stringLength; i++ {
+		remainder.Mod(splitPoint, alphabetSize)
+		splitPoint.Div(splitPoint, alphabetSize)
+		splitChar = append(splitChar, (*rs.sortedAlphabet)[(int)(remainder.Int64())])
+	}
+
+	// Reverse the converted alphabet order because we originally processed from right to left.
+	for i, j := 0, len(splitChar)-1; i < j; i, j = i+1, j-1 {
+		splitChar[i], splitChar[j] = splitChar[j], splitChar[i]
+	}
+
+	return string(splitChar)
 }
diff --git a/storage/dataflux/range_splitter_test.go b/storage/dataflux/range_splitter_test.go
new file mode 100644
index 000000000000..c697c3483b62
--- /dev/null
+++ b/storage/dataflux/range_splitter_test.go
@@ -0,0 +1,308 @@
+package dataflux
+
+import (
+	"testing"
+
+	"github.com/google/go-cmp/cmp"
+)
+
+func TestNewRangeSplitter(t *testing.T) {
+	testCases := []struct {
+		desc     string
+		alphabet string
+		wantErr  bool
+	}{
+		{
+			desc:     "Valid alphabet",
+			alphabet: "0123456789",
+			wantErr:  false,
+		},
+		{
+			desc:     "Empty alphabet",
+			alphabet: "",
+			wantErr:  true,
+		},
+	}
+
+	for _, tc := range testCases {
+		t.Run(tc.desc, func(t *testing.T) {
+			_, gotErr := newRangeSplitter(tc.alphabet)
+			if (gotErr != nil) != tc.wantErr {
+				t.Errorf("NewRangeSplitter(%q) got error = %v, want error = %v", tc.alphabet, gotErr, tc.wantErr)
+			}
+		})
+	}
+}
+
+func TestSplitRange(t *testing.T) {
+
+	testAlphabet := "0123456789"
+
+	// We use the numbers as the base alphabet for testing purposes.
+	rangeSplitter, err := newRangeSplitter(testAlphabet)
+	if err != nil {
+		t.Fatalf("NewRangeSplitter(%q) got error = %v, want error = nil", testAlphabet, err)
+	}
+
+	testCases := []struct {
+		desc            string
+		startRange      string
+		endRange        string
+		numSplits       int
+		wantErr         bool
+		wantSplitPoints []string
+	}{
+		// Tests for checking invalid arguments are properly handled.
+		{
+			desc:            "Number of Splits Less Than One",
+			startRange:      "123",
+			endRange:        "456",
+			numSplits:       0,
+			wantErr:         true,
+			wantSplitPoints: nil,
+		},
+		{
+			desc:            "End Range Lexicographically Smaller Than Start Range",
+			startRange:      "456",
+			endRange:        "123",
+			numSplits:       2,
+			wantErr:         true,
+			wantSplitPoints: nil,
+		},
+		// Test for unsplittable cases.
+		{
+			desc:            "Unsplittable with Empty Start Range",
+			startRange:      "",
+			endRange:        "0",
+			numSplits:       100,
+			wantErr:         false,
+			wantSplitPoints: nil,
+		},
+		{
+			desc:            "Unsplittable with Non Empty Ranges",
+			startRange:      "9",
+			endRange:        "90",
+			numSplits:       100,
+			wantErr:         false,
+			wantSplitPoints: nil,
+		},
+		// Test for splittable cases.
+		{
+			desc:            "Split Entire Bucket Namespace",
+			startRange:      "",
+			endRange:        "",
+			numSplits:       24,
+			wantErr:         false,
+			wantSplitPoints: []string{"03", "07", "11", "15", "19", "23", "27", "31", "35", "39", "43", "47", "51", "55", "59", "63", "67", "71", "75", "79", "83", "87", "91", "95"},
+		},
+		{
+			desc:            "Split with Only Start Range",
+			startRange:      "5555",
+			endRange:        "",
+			numSplits:       4,
+			wantErr:         false,
+			wantSplitPoints: []string{"63", "72", "81", "90"},
+		},
+		{
+			desc:            "Split Large Distance with Few Split Points",
+			startRange:      "0",
+			endRange:        "9",
+			numSplits:       3,
+			wantErr:         false,
+			wantSplitPoints: []string{"2", "4", "6"},
+		},
+		{
+			desc:            "Split with Prefix, Distance at Index 5 > 1",
+			startRange:      "0123455111",
+			endRange:        "012347",
+			numSplits:       1,
+			wantErr:         false,
+			wantSplitPoints: []string{"012346"},
+		},
+		{
+			desc:            "Split with Prefix, Distance at Index 6 > 1",
+			startRange:      "00005699",
+			endRange:        "00006",
+			numSplits:       3,
+			wantErr:         false,
+			wantSplitPoints: []string{"000057", "000058", "000059"},
+		},
+		{
+			desc:            "Split into Half with Small Range",
+			startRange:      "199999",
+			endRange:        "2",
+			numSplits:       1,
+			wantErr:         false,
+			wantSplitPoints: []string{"1999995"},
+		},
+		{
+			desc:            "Split into Multuple Pieces with Small Range",
+			startRange:      "011",
+			endRange:        "022",
+			numSplits:       5,
+			wantErr:         false,
+			wantSplitPoints: []string{"012", "014", "016", "018", "020"},
+		},
+		{
+			desc:            "Split towards End Range",
+			startRange:      "8999",
+			endRange:        "",
+			numSplits:       4,
+			wantErr:         false,
+			wantSplitPoints: []string{"91", "93", "95", "97"},
+		},
+		{
+			desc:            "Split with Sequence of Adjacent Characters",
+			startRange:      "12345",
+			endRange:        "23456",
+			numSplits:       4,
+			wantErr:         false,
+			wantSplitPoints: []string{"14", "16", "18", "20"},
+		},
+		{
+			desc:            "Split into Adjenct Split Points",
+			startRange:      "0999998",
+			endRange:        "1000002",
+			numSplits:       3,
+			wantErr:         false,
+			wantSplitPoints: []string{"0999999", "1000000", "1000001"},
+		},
+		{
+			desc:            "End Range Contains new Character",
+			startRange:      "123",
+			endRange:        "xyz",
+			numSplits:       2,
+			wantErr:         false,
+			wantSplitPoints: []string{"4", "7"},
+		},
+		{
+			desc:            "Start Range Contains new Character",
+			startRange:      "abc",
+			endRange:        "xyz",
+			numSplits:       2,
+			wantErr:         false,
+			wantSplitPoints: []string{"b", "c"},
+		},
+	}
+
+	for _, tc := range testCases {
+		t.Run(tc.desc, func(t *testing.T) {
+			gotSplitPoints, gotErr := rangeSplitter.splitRange(tc.startRange, tc.endRange, tc.numSplits)
+			if (gotErr != nil) != tc.wantErr {
+				t.Errorf("SplitRange(%q, %q, %d) got error = %v, want error = %v",
+					tc.startRange, tc.endRange, tc.numSplits, gotErr, tc.wantErr)
+			}
+
+			if diff := cmp.Diff(tc.wantSplitPoints, gotSplitPoints); diff != "" {
+				t.Errorf("SplitRange(%q, %q, %d) returned unexpected diff (-want +got):\n%s",
+					tc.startRange, tc.endRange, tc.numSplits, diff)
+			}
+		})
+	}
+}
+
+func TestSortAlphabet(t *testing.T) {
+	testCases := []struct {
+		desc             string
+		unsortedAlphabet []rune
+		wantAphabet      *[]rune
+	}{
+		{
+			desc:             "unsorted array",
+			unsortedAlphabet: []rune{'8', '9', '7'},
+			wantAphabet:      &[]rune{'7', '8', '9'},
+		},
+		{
+			desc:             "one alphabet",
+			unsortedAlphabet: []rune{'7'},
+			wantAphabet:      &[]rune{'7'},
+		},
+		{
+			desc:             "empty array",
+			unsortedAlphabet: []rune{},
+			wantAphabet:      &[]rune{},
+		},
+	}
+	for _, tc := range testCases {
+		t.Run(tc.desc, func(t *testing.T) {
+			got := sortAlphabet(tc.unsortedAlphabet)
+			if diff := cmp.Diff(tc.wantAphabet, got); diff != "" {
+				t.Errorf("sortAlphabet(%q) returned unexpected diff (-want +got):\n%s", tc.unsortedAlphabet, diff)
+			}
+		})
+	}
+}
+
+func TestConstructAlphabetMap(t *testing.T) {
+	testCases := []struct {
+		desc           string
+		sortedAlphabet *[]rune
+		wantMap        map[rune]int
+	}{
+		{
+			desc:           "sorted array",
+			sortedAlphabet: &[]rune{'7', '8', '9'},
+			wantMap:        map[rune]int{'7': 0, '8': 1, '9': 2},
+		},
+		{
+			desc:           "unsorted array",
+			sortedAlphabet: &[]rune{'7', '9', '8'},
+			wantMap:        map[rune]int{'7': 0, '9': 1, '8': 2},
+		},
+		{
+			desc:           "one alphabet",
+			sortedAlphabet: &[]rune{'7'},
+			wantMap:        map[rune]int{'7': 0},
+		},
+		{
+			desc:           "empty array",
+			sortedAlphabet: &[]rune{},
+			wantMap:        map[rune]int{},
+		},
+	}
+	for _, tc := range testCases {
+		t.Run(tc.desc, func(t *testing.T) {
+			got := constructAlphabetMap(tc.sortedAlphabet)
+			if diff := cmp.Diff(tc.wantMap, got); diff != "" {
+				t.Errorf("constructAlphabetMap(%q) returned unexpected diff (-want +got):\n%s", tc.sortedAlphabet, diff)
+			}
+		})
+	}
+}
+
+func TestCharPosition(t *testing.T) {
+	testCases := []struct {
+		desc      string
+		character rune
+		wantErr   bool
+		wantPos   int
+	}{
+		{
+			desc:      "no error",
+			character: '7',
+			wantErr:   false,
+			wantPos:   0,
+		},
+		{
+			desc:      "character not present",
+			character: '6',
+			wantErr:   true,
+			wantPos:   -1,
+		},
+	}
+	rs, err := newRangeSplitter("78898")
+	if err != nil {
+		t.Fatalf("Failed to initialize range splitter, err: %v", err)
+	}
+	for _, tc := range testCases {
+		t.Run(tc.desc, func(t *testing.T) {
+			got, err := rs.charPosition(tc.character)
+			if (err != nil) != tc.wantErr {
+				t.Errorf("charPosition(%q) got error = %v, want error = %v", tc.character, err, tc.wantErr)
+			}
+			if got != tc.wantPos {
+				t.Errorf("charPosition(%q) got = %v, want = %v", tc.character, got, tc.wantPos)
+			}
+		})
+	}
+}

From 438028f265f31382024123077601a6c364b6862e Mon Sep 17 00:00:00 2001
From: Akansha Maloo <amaloo@google.com>
Date: Fri, 20 Sep 2024 20:43:45 +0000
Subject: [PATCH 2/9] add license on range_splitter_test

---
 storage/dataflux/range_splitter_test.go | 14 ++++++++++++++
 1 file changed, 14 insertions(+)

diff --git a/storage/dataflux/range_splitter_test.go b/storage/dataflux/range_splitter_test.go
index c697c3483b62..ecb60997b8ff 100644
--- a/storage/dataflux/range_splitter_test.go
+++ b/storage/dataflux/range_splitter_test.go
@@ -1,3 +1,17 @@
+// Copyright 2024 Google LLC
+//
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+//      http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+
 package dataflux
 
 import (

From de75ff2069e313881009350f42a217bd0dbf9be1 Mon Sep 17 00:00:00 2001
From: Akansha Maloo <amaloo@google.com>
Date: Mon, 23 Sep 2024 21:36:41 +0000
Subject: [PATCH 3/9] remove pointer from rune

---
 storage/dataflux/fast_list.go           |  2 +-
 storage/dataflux/range_splitter.go      | 80 ++++++++++++++-----------
 storage/dataflux/range_splitter_test.go | 72 +++++++++++++---------
 3 files changed, 91 insertions(+), 63 deletions(-)

diff --git a/storage/dataflux/fast_list.go b/storage/dataflux/fast_list.go
index 68109845a821..08aacd959ae7 100644
--- a/storage/dataflux/fast_list.go
+++ b/storage/dataflux/fast_list.go
@@ -49,7 +49,7 @@ type ListerInput struct {
 	BatchSize int
 
 	// Query is the query to filter objects for listing. Default value is nil. Optional.
-	// Use ProjectionNoACL for faster listing. ACL increases latency while fetching objects
+	// Use ProjectionNoACL for faster listing. Including ACLs increases latency while fetching objects.
 	Query storage.Query
 
 	// SkipDirectoryObjects is to indicate whether to list directory objects. Default value is false. Optional.
diff --git a/storage/dataflux/range_splitter.go b/storage/dataflux/range_splitter.go
index b66412ea9cbf..6bef53a78372 100644
--- a/storage/dataflux/range_splitter.go
+++ b/storage/dataflux/range_splitter.go
@@ -24,7 +24,7 @@ import (
 // rangeSplitter specifies the a list and a map of sorted alphabets.
 type rangeSplitter struct {
 	mu             sync.Mutex
-	sortedAlphabet *[]rune
+	sortedAlphabet []rune
 	alphabetMap    map[rune]int
 }
 
@@ -52,6 +52,15 @@ type generateSplitsOpts struct {
 }
 
 // newRangeSplitter creates a new RangeSplitter with the given alphabets.
+// RangeSplitter determines split points within a given range based on the given
+// alphabets. This process involves translating the start and end range strings
+// into base-10 integers, performing a split within the integer domain, and then
+// converting the splits back into strings. In essence, this operation resembles
+// a base-N to base-10 conversion, followed by a split in base 10, and finally
+// another base-10 to base-N conversion. In this scenario, N represents the size
+// of the alphabet, with the character's position in the alphabet indicating the
+// digit's value. As of now, the range splitter exclusively supports only the
+// provided alphabets.
 func newRangeSplitter(alphabet string) (*rangeSplitter, error) {
 
 	// Validate that we do not have empty alphabet passed in.
@@ -60,7 +69,8 @@ func newRangeSplitter(alphabet string) (*rangeSplitter, error) {
 	}
 	// Sort the alphabet lexicographically and store a mapping of each alphabet
 	// to its index. We need a mapping for efficient index lookup in later operations.
-	sortedAlphabet := sortAlphabet([]rune(alphabet))
+	sortedAlphabet := []rune(alphabet)
+	sortAlphabet(sortedAlphabet)
 	alphabetMap := constructAlphabetMap(sortedAlphabet)
 
 	return &rangeSplitter{
@@ -69,7 +79,14 @@ func newRangeSplitter(alphabet string) (*rangeSplitter, error) {
 	}, nil
 }
 
-// splitRange creates a given number of splits based on a provided start and end range.
+// splitRange divides the provided start and end range into approximately equal
+// subranges, returning the split points. An empty slice is returned if suitable
+// splits cannot be determined. Please note that this method provides a rough
+// estimate of split points, without ensuring precise even partitioning of the range.
+// Additionally, the number of found splits might be fewer than requested if the
+// algorithm struggles to find sufficient split points. However, if both the start
+// and end ranges are empty strings (indicating the entire namespace), the algorithm
+// guarantees the requested number of split points is returned.
 func (rs *rangeSplitter) splitRange(startRange, endRange string, numSplits int) ([]string, error) {
 	// Number of splits has to be at least one, otherwise it is not splittable.
 	if numSplits < 1 {
@@ -79,7 +96,7 @@ func (rs *rangeSplitter) splitRange(startRange, endRange string, numSplits int)
 	// End range (if specified) has to be lexicographically greater than the start range
 	// for the range to be valid.
 	if len(endRange) != 0 && startRange >= endRange {
-		return nil, fmt.Errorf("start range %q cannot be lexicographically greater end range %q", startRange, endRange)
+		return nil, fmt.Errorf("start range %q cannot be lexicographically greater than end range %q", startRange, endRange)
 	}
 
 	rs.addCharsToAlphabet([]rune(startRange))
@@ -159,20 +176,18 @@ func (rs *rangeSplitter) generateSplits(opts generateSplitsOpts) []string {
 	return splitPoints
 }
 
-// sortAlphabet sorts the alphabets string lexicographically and returns a pointer to the sorted string.
-func sortAlphabet(unsortedAlphabet []rune) *[]rune {
-	sortedAlphabet := unsortedAlphabet
-	sort.Slice(sortedAlphabet, func(i, j int) bool {
-		return sortedAlphabet[i] < sortedAlphabet[j]
+// sortAlphabet sorts the alphabets string lexicographically.
+func sortAlphabet(unsortedAlphabet []rune) {
+	sort.Slice(unsortedAlphabet, func(i, j int) bool {
+		return unsortedAlphabet[i] < unsortedAlphabet[j]
 	})
-	return &sortedAlphabet
 }
 
 // constructAlphabetMap constructs a mapping from each character in the
 // alphabets to its index in the alphabet array.
-func constructAlphabetMap(alphabet *[]rune) map[rune]int {
+func constructAlphabetMap(alphabet []rune) map[rune]int {
 	alphabetMap := make(map[rune]int)
-	for i, char := range *alphabet {
+	for i, char := range alphabet {
 		alphabetMap[char] = i
 	}
 	return alphabetMap
@@ -182,7 +197,7 @@ func constructAlphabetMap(alphabet *[]rune) map[rune]int {
 func (rs *rangeSplitter) addCharsToAlphabet(characters []rune) {
 	rs.mu.Lock()         // Acquire the lock
 	defer rs.mu.Unlock() // Release the lock when the function exits
-	allAlphabet := *rs.sortedAlphabet
+	allAlphabet := rs.sortedAlphabet
 	newChars := false
 	for _, char := range characters {
 		if _, exists := rs.alphabetMap[char]; exists {
@@ -190,33 +205,30 @@ func (rs *rangeSplitter) addCharsToAlphabet(characters []rune) {
 		}
 		allAlphabet = append(allAlphabet, char)
 		newChars = true
-		rs.alphabetMap[char] = 0
 	}
 	if newChars {
-		rs.sortedAlphabet = sortAlphabet(allAlphabet)
+		sortAlphabet(allAlphabet)
+		rs.sortedAlphabet = allAlphabet
 		rs.alphabetMap = constructAlphabetMap(rs.sortedAlphabet)
 	}
 }
 
 // isRangeEqualWithPadding checks if two range strings are identical. Equality
 // encompasses any padding using the smallest alphabet character from the set.
-func (rs *rangeSplitter) isRangeEqualWithPadding(startRange, endRange *[]rune) bool {
+func (rs *rangeSplitter) isRangeEqualWithPadding(startRange, endRange []rune) bool {
 
 	sortedAlphabet := rs.sortedAlphabet
 
 	// When the end range is unspecified, it's interpreted as a sequence of the
 	// highest possible characters. Consequently, they are not deemed equal.
-	if len(*endRange) == 0 {
+	if len(endRange) == 0 {
 		return false
 	}
 
 	// Get the longer length of the two range strings.
-	maxLength := len(*startRange)
-	if len(*endRange) > maxLength {
-		maxLength = len(*endRange)
-	}
+	maxLength := max(len(startRange), len(endRange))
 
-	smallestChar := (*sortedAlphabet)[0]
+	smallestChar := sortedAlphabet[0]
 
 	// Loop through the string range.
 	for i := 0; i < maxLength; i++ {
@@ -237,28 +249,28 @@ func (rs *rangeSplitter) isRangeEqualWithPadding(startRange, endRange *[]rune) b
 
 // charAtOrDefault returns the character at the specified position, or the default character if
 // the position is out of bounds.
-func charAtOrDefault(charArray *[]rune, position int, defaultChar rune) rune {
-	if position < 0 || position >= len(*charArray) {
+func charAtOrDefault(charArray []rune, position int, defaultChar rune) rune {
+	if position < 0 || position >= len(charArray) {
 		return defaultChar
 	}
-	return (*charArray)[position]
+	return (charArray)[position]
 }
 
 // convertStringRangeToMinimalIntRange gradually extends the start and end string
 // range in base-10 representation, until the difference reaches a threshold
 // suitable for splitting.
 func (rs *rangeSplitter) convertStringRangeToMinimalIntRange(
-	startRange, endRange *[]rune, numSplits int) (*minimalIntRange, error) {
+	startRange, endRange []rune, numSplits int) (*minimalIntRange, error) {
 
 	startInteger := big.NewInt(0)
 	endInteger := big.NewInt(0)
 
-	alphabetLength := len(*rs.sortedAlphabet)
-	startChar := (*rs.sortedAlphabet)[0]
-	endChar := (*rs.sortedAlphabet)[alphabetLength-1]
+	alphabetLength := len(rs.sortedAlphabet)
+	startChar := (rs.sortedAlphabet)[0]
+	endChar := (rs.sortedAlphabet)[alphabetLength-1]
 
 	endDefaultChar := startChar
-	if len(*endRange) == 0 {
+	if len(endRange) == 0 {
 		endDefaultChar = endChar
 	}
 
@@ -307,14 +319,14 @@ func (rs *rangeSplitter) charPosition(ch rune) (int, error) {
 
 // convertRangeStringToArray transforms the range string into a rune slice while
 // verifying the presence of each character in the alphabets.
-func (rs *rangeSplitter) convertRangeStringToArray(rangeString string) (*[]rune, error) {
+func (rs *rangeSplitter) convertRangeStringToArray(rangeString string) ([]rune, error) {
 	for _, char := range rangeString {
 		if _, exists := rs.alphabetMap[char]; !exists {
 			return nil, fmt.Errorf("character %c in range string %q is not found in the alphabet array", char, rangeString)
 		}
 	}
 	characterArray := []rune(rangeString)
-	return &characterArray, nil
+	return characterArray, nil
 }
 
 // convertIntToString converts the split point from base-10 to base-N.
@@ -323,13 +335,13 @@ func (rs *rangeSplitter) convertIntToString(splitPoint *big.Int, stringLength in
 	remainder := new(big.Int)
 
 	var splitChar []rune
-	alphabetSize := big.NewInt(int64(len(*rs.sortedAlphabet)))
+	alphabetSize := big.NewInt(int64(len(rs.sortedAlphabet)))
 
 	// Iterate through the split point and convert alphabet by alphabet.
 	for i := 0; i < stringLength; i++ {
 		remainder.Mod(splitPoint, alphabetSize)
 		splitPoint.Div(splitPoint, alphabetSize)
-		splitChar = append(splitChar, (*rs.sortedAlphabet)[(int)(remainder.Int64())])
+		splitChar = append(splitChar, (rs.sortedAlphabet)[(int)(remainder.Int64())])
 	}
 
 	// Reverse the converted alphabet order because we originally processed from right to left.
diff --git a/storage/dataflux/range_splitter_test.go b/storage/dataflux/range_splitter_test.go
index ecb60997b8ff..e5b673700434 100644
--- a/storage/dataflux/range_splitter_test.go
+++ b/storage/dataflux/range_splitter_test.go
@@ -27,12 +27,12 @@ func TestNewRangeSplitter(t *testing.T) {
 		wantErr  bool
 	}{
 		{
-			desc:     "Valid alphabet",
+			desc:     "valid alphabet",
 			alphabet: "0123456789",
 			wantErr:  false,
 		},
 		{
-			desc:     "Empty alphabet",
+			desc:     "empty alphabet",
 			alphabet: "",
 			wantErr:  true,
 		},
@@ -66,9 +66,25 @@ func TestSplitRange(t *testing.T) {
 		wantErr         bool
 		wantSplitPoints []string
 	}{
+		{
+			desc:            "empty start",
+			startRange:      "",
+			endRange:        "9",
+			numSplits:       2,
+			wantErr:         false,
+			wantSplitPoints: []string{"3", "6"},
+		},
+		{
+			desc:            "empty end",
+			startRange:      "0",
+			endRange:        "",
+			numSplits:       2,
+			wantErr:         false,
+			wantSplitPoints: []string{"3", "6"},
+		},
 		// Tests for checking invalid arguments are properly handled.
 		{
-			desc:            "Number of Splits Less Than One",
+			desc:            "splits less than one",
 			startRange:      "123",
 			endRange:        "456",
 			numSplits:       0,
@@ -76,7 +92,7 @@ func TestSplitRange(t *testing.T) {
 			wantSplitPoints: nil,
 		},
 		{
-			desc:            "End Range Lexicographically Smaller Than Start Range",
+			desc:            "end range lexicographically smaller than start range",
 			startRange:      "456",
 			endRange:        "123",
 			numSplits:       2,
@@ -85,7 +101,7 @@ func TestSplitRange(t *testing.T) {
 		},
 		// Test for unsplittable cases.
 		{
-			desc:            "Unsplittable with Empty Start Range",
+			desc:            "unsplittable with empty start range",
 			startRange:      "",
 			endRange:        "0",
 			numSplits:       100,
@@ -93,7 +109,7 @@ func TestSplitRange(t *testing.T) {
 			wantSplitPoints: nil,
 		},
 		{
-			desc:            "Unsplittable with Non Empty Ranges",
+			desc:            "unsplittable with non empty ranges",
 			startRange:      "9",
 			endRange:        "90",
 			numSplits:       100,
@@ -110,7 +126,7 @@ func TestSplitRange(t *testing.T) {
 			wantSplitPoints: []string{"03", "07", "11", "15", "19", "23", "27", "31", "35", "39", "43", "47", "51", "55", "59", "63", "67", "71", "75", "79", "83", "87", "91", "95"},
 		},
 		{
-			desc:            "Split with Only Start Range",
+			desc:            "split with only start range",
 			startRange:      "5555",
 			endRange:        "",
 			numSplits:       4,
@@ -118,7 +134,7 @@ func TestSplitRange(t *testing.T) {
 			wantSplitPoints: []string{"63", "72", "81", "90"},
 		},
 		{
-			desc:            "Split Large Distance with Few Split Points",
+			desc:            "split large distance with few split points",
 			startRange:      "0",
 			endRange:        "9",
 			numSplits:       3,
@@ -126,7 +142,7 @@ func TestSplitRange(t *testing.T) {
 			wantSplitPoints: []string{"2", "4", "6"},
 		},
 		{
-			desc:            "Split with Prefix, Distance at Index 5 > 1",
+			desc:            "split with prefix, distance at index 5 > 1",
 			startRange:      "0123455111",
 			endRange:        "012347",
 			numSplits:       1,
@@ -134,7 +150,7 @@ func TestSplitRange(t *testing.T) {
 			wantSplitPoints: []string{"012346"},
 		},
 		{
-			desc:            "Split with Prefix, Distance at Index 6 > 1",
+			desc:            "split with prefix, distance at index 6 > 1",
 			startRange:      "00005699",
 			endRange:        "00006",
 			numSplits:       3,
@@ -142,7 +158,7 @@ func TestSplitRange(t *testing.T) {
 			wantSplitPoints: []string{"000057", "000058", "000059"},
 		},
 		{
-			desc:            "Split into Half with Small Range",
+			desc:            "split into half with small range",
 			startRange:      "199999",
 			endRange:        "2",
 			numSplits:       1,
@@ -150,7 +166,7 @@ func TestSplitRange(t *testing.T) {
 			wantSplitPoints: []string{"1999995"},
 		},
 		{
-			desc:            "Split into Multuple Pieces with Small Range",
+			desc:            "split into multuple pieces with small range",
 			startRange:      "011",
 			endRange:        "022",
 			numSplits:       5,
@@ -158,7 +174,7 @@ func TestSplitRange(t *testing.T) {
 			wantSplitPoints: []string{"012", "014", "016", "018", "020"},
 		},
 		{
-			desc:            "Split towards End Range",
+			desc:            "split towards end range",
 			startRange:      "8999",
 			endRange:        "",
 			numSplits:       4,
@@ -166,7 +182,7 @@ func TestSplitRange(t *testing.T) {
 			wantSplitPoints: []string{"91", "93", "95", "97"},
 		},
 		{
-			desc:            "Split with Sequence of Adjacent Characters",
+			desc:            "split with sequence of adjacent characters",
 			startRange:      "12345",
 			endRange:        "23456",
 			numSplits:       4,
@@ -174,7 +190,7 @@ func TestSplitRange(t *testing.T) {
 			wantSplitPoints: []string{"14", "16", "18", "20"},
 		},
 		{
-			desc:            "Split into Adjenct Split Points",
+			desc:            "split into adjenct split points",
 			startRange:      "0999998",
 			endRange:        "1000002",
 			numSplits:       3,
@@ -182,7 +198,7 @@ func TestSplitRange(t *testing.T) {
 			wantSplitPoints: []string{"0999999", "1000000", "1000001"},
 		},
 		{
-			desc:            "End Range Contains new Character",
+			desc:            "end range contains new character",
 			startRange:      "123",
 			endRange:        "xyz",
 			numSplits:       2,
@@ -190,7 +206,7 @@ func TestSplitRange(t *testing.T) {
 			wantSplitPoints: []string{"4", "7"},
 		},
 		{
-			desc:            "Start Range Contains new Character",
+			desc:            "start range contains new character",
 			startRange:      "abc",
 			endRange:        "xyz",
 			numSplits:       2,
@@ -219,28 +235,28 @@ func TestSortAlphabet(t *testing.T) {
 	testCases := []struct {
 		desc             string
 		unsortedAlphabet []rune
-		wantAphabet      *[]rune
+		wantAphabet      []rune
 	}{
 		{
 			desc:             "unsorted array",
 			unsortedAlphabet: []rune{'8', '9', '7'},
-			wantAphabet:      &[]rune{'7', '8', '9'},
+			wantAphabet:      []rune{'7', '8', '9'},
 		},
 		{
 			desc:             "one alphabet",
 			unsortedAlphabet: []rune{'7'},
-			wantAphabet:      &[]rune{'7'},
+			wantAphabet:      []rune{'7'},
 		},
 		{
 			desc:             "empty array",
 			unsortedAlphabet: []rune{},
-			wantAphabet:      &[]rune{},
+			wantAphabet:      []rune{},
 		},
 	}
 	for _, tc := range testCases {
 		t.Run(tc.desc, func(t *testing.T) {
-			got := sortAlphabet(tc.unsortedAlphabet)
-			if diff := cmp.Diff(tc.wantAphabet, got); diff != "" {
+			sortAlphabet(tc.unsortedAlphabet)
+			if diff := cmp.Diff(tc.wantAphabet, tc.unsortedAlphabet); diff != "" {
 				t.Errorf("sortAlphabet(%q) returned unexpected diff (-want +got):\n%s", tc.unsortedAlphabet, diff)
 			}
 		})
@@ -250,27 +266,27 @@ func TestSortAlphabet(t *testing.T) {
 func TestConstructAlphabetMap(t *testing.T) {
 	testCases := []struct {
 		desc           string
-		sortedAlphabet *[]rune
+		sortedAlphabet []rune
 		wantMap        map[rune]int
 	}{
 		{
 			desc:           "sorted array",
-			sortedAlphabet: &[]rune{'7', '8', '9'},
+			sortedAlphabet: []rune{'7', '8', '9'},
 			wantMap:        map[rune]int{'7': 0, '8': 1, '9': 2},
 		},
 		{
 			desc:           "unsorted array",
-			sortedAlphabet: &[]rune{'7', '9', '8'},
+			sortedAlphabet: []rune{'7', '9', '8'},
 			wantMap:        map[rune]int{'7': 0, '9': 1, '8': 2},
 		},
 		{
 			desc:           "one alphabet",
-			sortedAlphabet: &[]rune{'7'},
+			sortedAlphabet: []rune{'7'},
 			wantMap:        map[rune]int{'7': 0},
 		},
 		{
 			desc:           "empty array",
-			sortedAlphabet: &[]rune{},
+			sortedAlphabet: []rune{},
 			wantMap:        map[rune]int{},
 		},
 	}

From 616dd0fd02e188986d79124fc1b56f1c3150752c Mon Sep 17 00:00:00 2001
From: Akansha Maloo <amaloo@google.com>
Date: Mon, 23 Sep 2024 21:41:08 +0000
Subject: [PATCH 4/9] simplify if statement

---
 storage/dataflux/range_splitter.go | 7 +++----
 1 file changed, 3 insertions(+), 4 deletions(-)

diff --git a/storage/dataflux/range_splitter.go b/storage/dataflux/range_splitter.go
index 6bef53a78372..7e06d95c66fb 100644
--- a/storage/dataflux/range_splitter.go
+++ b/storage/dataflux/range_splitter.go
@@ -200,11 +200,10 @@ func (rs *rangeSplitter) addCharsToAlphabet(characters []rune) {
 	allAlphabet := rs.sortedAlphabet
 	newChars := false
 	for _, char := range characters {
-		if _, exists := rs.alphabetMap[char]; exists {
-			continue
+		if _, exists := rs.alphabetMap[char]; !exists {
+			allAlphabet = append(allAlphabet, char)
+			newChars = true
 		}
-		allAlphabet = append(allAlphabet, char)
-		newChars = true
 	}
 	if newChars {
 		sortAlphabet(allAlphabet)

From 04e85d7b904e35b370907caf57651a1034be2b5f Mon Sep 17 00:00:00 2001
From: Akansha Maloo <amaloo@google.com>
Date: Tue, 24 Sep 2024 01:53:48 +0000
Subject: [PATCH 5/9] update comments and add test case for empty end

---
 storage/dataflux/range_splitter.go      | 25 +++++++++++++------------
 storage/dataflux/range_splitter_test.go |  8 ++++++++
 2 files changed, 21 insertions(+), 12 deletions(-)

diff --git a/storage/dataflux/range_splitter.go b/storage/dataflux/range_splitter.go
index 7e06d95c66fb..737576e5b408 100644
--- a/storage/dataflux/range_splitter.go
+++ b/storage/dataflux/range_splitter.go
@@ -53,14 +53,7 @@ type generateSplitsOpts struct {
 
 // newRangeSplitter creates a new RangeSplitter with the given alphabets.
 // RangeSplitter determines split points within a given range based on the given
-// alphabets. This process involves translating the start and end range strings
-// into base-10 integers, performing a split within the integer domain, and then
-// converting the splits back into strings. In essence, this operation resembles
-// a base-N to base-10 conversion, followed by a split in base 10, and finally
-// another base-10 to base-N conversion. In this scenario, N represents the size
-// of the alphabet, with the character's position in the alphabet indicating the
-// digit's value. As of now, the range splitter exclusively supports only the
-// provided alphabets.
+// alphabets.
 func newRangeSplitter(alphabet string) (*rangeSplitter, error) {
 
 	// Validate that we do not have empty alphabet passed in.
@@ -84,9 +77,9 @@ func newRangeSplitter(alphabet string) (*rangeSplitter, error) {
 // splits cannot be determined. Please note that this method provides a rough
 // estimate of split points, without ensuring precise even partitioning of the range.
 // Additionally, the number of found splits might be fewer than requested if the
-// algorithm struggles to find sufficient split points. However, if both the start
-// and end ranges are empty strings (indicating the entire namespace), the algorithm
-// guarantees the requested number of split points is returned.
+// algorithm struggles to find sufficient split points. If the start range is empty
+// the algorithm assumes it to be sequence of smallest possible character and empty
+// end range as sequence of highest possible characters.
 func (rs *rangeSplitter) splitRange(startRange, endRange string, numSplits int) ([]string, error) {
 	// Number of splits has to be at least one, otherwise it is not splittable.
 	if numSplits < 1 {
@@ -142,7 +135,13 @@ func (rs *rangeSplitter) splitRange(startRange, endRange string, numSplits int)
 	return splitPoints, nil
 }
 
-// generateSplits generates the split points using the specified options.
+// generateSplits generates the split points by translating the start and end
+// range strings into base-10 integers, performing a split within the integer
+// domain, and then  converting the splits back into strings. In essence, this
+// operation resembles a base-N to base-10 conversion, followed by a split in
+// base 10, and finally another base-10 to base-N conversion. In this scenario,
+// N represents the size of the alphabet, with the character's position in the
+// alphabet indicating the digit's value.
 func (rs *rangeSplitter) generateSplits(opts generateSplitsOpts) []string {
 
 	startInteger := opts.minimalIntRange.startInteger
@@ -220,6 +219,8 @@ func (rs *rangeSplitter) isRangeEqualWithPadding(startRange, endRange []rune) bo
 
 	// When the end range is unspecified, it's interpreted as a sequence of the
 	// highest possible characters. Consequently, they are not deemed equal.
+	// If start range has highest possible characters, then smaller characters
+	// are appended to start range to find split points.
 	if len(endRange) == 0 {
 		return false
 	}
diff --git a/storage/dataflux/range_splitter_test.go b/storage/dataflux/range_splitter_test.go
index e5b673700434..934ef0748074 100644
--- a/storage/dataflux/range_splitter_test.go
+++ b/storage/dataflux/range_splitter_test.go
@@ -213,6 +213,14 @@ func TestSplitRange(t *testing.T) {
 			wantErr:         false,
 			wantSplitPoints: []string{"b", "c"},
 		},
+		{
+			desc:            "start range is sequence of highest characters",
+			startRange:      "zzz",
+			endRange:        "",
+			numSplits:       2,
+			wantErr:         false,
+			wantSplitPoints: []string{"zzz5", "zzza"},
+		},
 	}
 
 	for _, tc := range testCases {

From 1cebd05562a323d2be730d3fc5fc95c6ab51b14a Mon Sep 17 00:00:00 2001
From: Akansha Maloo <amaloo@google.com>
Date: Tue, 24 Sep 2024 18:38:19 +0000
Subject: [PATCH 6/9] add walkthrough of an example splitRange

---
 storage/dataflux/range_splitter.go | 13 +++++++++++++
 1 file changed, 13 insertions(+)

diff --git a/storage/dataflux/range_splitter.go b/storage/dataflux/range_splitter.go
index 737576e5b408..e09900d02e28 100644
--- a/storage/dataflux/range_splitter.go
+++ b/storage/dataflux/range_splitter.go
@@ -80,6 +80,19 @@ func newRangeSplitter(alphabet string) (*rangeSplitter, error) {
 // algorithm struggles to find sufficient split points. If the start range is empty
 // the algorithm assumes it to be sequence of smallest possible character and empty
 // end range as sequence of highest possible characters.
+// For example, sorted alphabet {"a","b","c","d"}
+//	Input: startRange= "d", endRange= "", numSplits=2
+//
+//  This will be converted from base-N to base-10 integers.
+//  While calculating base-10 integer, "a" will be appended to startRange
+//  and "d" will be appended to endRange until the difference between integers it
+//  more than number of splits.
+//  startInteger for "da" = 12, endInteger for "dd" = 15
+//
+//  To splits points will be 13 and 14 in base-10. This will be converted back to
+//	base-N value to
+//  {"db","dc"}
+
 func (rs *rangeSplitter) splitRange(startRange, endRange string, numSplits int) ([]string, error) {
 	// Number of splits has to be at least one, otherwise it is not splittable.
 	if numSplits < 1 {

From 8b0fb5403cbbe69a36d01553d516ac9e7ec078cf Mon Sep 17 00:00:00 2001
From: Akansha Maloo <amaloo@google.com>
Date: Tue, 24 Sep 2024 18:42:54 +0000
Subject: [PATCH 7/9] formatting

---
 storage/dataflux/range_splitter.go | 1 +
 1 file changed, 1 insertion(+)

diff --git a/storage/dataflux/range_splitter.go b/storage/dataflux/range_splitter.go
index e09900d02e28..deb066e602ec 100644
--- a/storage/dataflux/range_splitter.go
+++ b/storage/dataflux/range_splitter.go
@@ -80,6 +80,7 @@ func newRangeSplitter(alphabet string) (*rangeSplitter, error) {
 // algorithm struggles to find sufficient split points. If the start range is empty
 // the algorithm assumes it to be sequence of smallest possible character and empty
 // end range as sequence of highest possible characters.
+//
 // For example, sorted alphabet {"a","b","c","d"}
 //	Input: startRange= "d", endRange= "", numSplits=2
 //

From fc4a0b44f8d9ae2f05a4337ae5f8d29c0a8252db Mon Sep 17 00:00:00 2001
From: Akansha Maloo <amaloo@google.com>
Date: Tue, 24 Sep 2024 18:46:45 +0000
Subject: [PATCH 8/9] typo

---
 storage/dataflux/range_splitter.go | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/storage/dataflux/range_splitter.go b/storage/dataflux/range_splitter.go
index deb066e602ec..5da1aeb54804 100644
--- a/storage/dataflux/range_splitter.go
+++ b/storage/dataflux/range_splitter.go
@@ -90,8 +90,8 @@ func newRangeSplitter(alphabet string) (*rangeSplitter, error) {
 //  more than number of splits.
 //  startInteger for "da" = 12, endInteger for "dd" = 15
 //
-//  To splits points will be 13 and 14 in base-10. This will be converted back to
-//	base-N value to
+//  The splits points will be 13 and 14 in base-10. This will be converted back to
+//	base-N value and returned as split points:
 //  {"db","dc"}
 
 func (rs *rangeSplitter) splitRange(startRange, endRange string, numSplits int) ([]string, error) {

From d6e9e6ef36abc5110dc7abfd0fcffddc6100a62e Mon Sep 17 00:00:00 2001
From: Akansha Maloo <amaloo@google.com>
Date: Tue, 24 Sep 2024 18:48:59 +0000
Subject: [PATCH 9/9] typo

---
 storage/dataflux/range_splitter.go | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/storage/dataflux/range_splitter.go b/storage/dataflux/range_splitter.go
index 5da1aeb54804..4451e00aa48d 100644
--- a/storage/dataflux/range_splitter.go
+++ b/storage/dataflux/range_splitter.go
@@ -81,12 +81,12 @@ func newRangeSplitter(alphabet string) (*rangeSplitter, error) {
 // the algorithm assumes it to be sequence of smallest possible character and empty
 // end range as sequence of highest possible characters.
 //
-// For example, sorted alphabet {"a","b","c","d"}
+//  For example, sorted alphabet = {"a","b","c","d"}
 //	Input: startRange= "d", endRange= "", numSplits=2
 //
 //  This will be converted from base-N to base-10 integers.
 //  While calculating base-10 integer, "a" will be appended to startRange
-//  and "d" will be appended to endRange until the difference between integers it
+//  and "d" will be appended to endRange until the difference between integers is
 //  more than number of splits.
 //  startInteger for "da" = 12, endInteger for "dd" = 15
 //