Skip to content

Commit

Permalink
Fix performance regression in from_json (#10306)
Browse files Browse the repository at this point in the history
* experimental fix

* experimental fix

* close resource

* fix resource leak

* signoff

Signed-off-by: Andy Grove <[email protected]>

---------

Signed-off-by: Andy Grove <[email protected]>
  • Loading branch information
andygrove authored Jan 29, 2024
1 parent 1fb6361 commit bc22bf8
Showing 1 changed file with 13 additions and 9 deletions.
Original file line number Diff line number Diff line change
Expand Up @@ -78,19 +78,23 @@ case class GpuJsonToStructs(
withResource(isLiteralNull.ifElse(emptyRow, nullsReplaced)) { cleaned =>
checkForNewline(cleaned, "\n", "line separator")
checkForNewline(cleaned, "\r", "carriage return")
// if the last entry in a column is incomplete or invalid, then cuDF
// will drop the row rather than replace with null if there is no newline, so we
// add a newline here to prevent that
val joined = withResource(cudf.Scalar.fromString("\n")) { lineSep =>
cleaned.joinStrings(lineSep, emptyRow)

// add a newline to each JSON line
val withNewline = withResource(cudf.Scalar.fromString("\n")) { lineSep =>
withResource(ColumnVector.fromScalar(lineSep, cleaned.getRowCount.toInt)) {
newLineCol =>
ColumnVector.stringConcatenate(Array[ColumnView](cleaned, newLineCol))
}
}
val concat = withResource(joined) { _ =>
withResource(ColumnVector.fromStrings("\n")) { newline =>
ColumnVector.stringConcatenate(Array[ColumnView](joined, newline))

// join all the JSON lines into one string
val joined = withResource(withNewline) { _ =>
withResource(Scalar.fromString("")) { emptyString =>
withNewline.joinStrings(emptyString, emptyRow)
}
}

(isNullOrEmptyInput, concat)
(isNullOrEmptyInput, joined)
}
}
}
Expand Down

0 comments on commit bc22bf8

Please sign in to comment.