Skip to content

Commit

Permalink
#139: improves file encoding support
Browse files Browse the repository at this point in the history
  • Loading branch information
rladstaetter committed Sep 27, 2023
1 parent a0ba8ed commit f2d61f9
Show file tree
Hide file tree
Showing 9 changed files with 106 additions and 45 deletions.
35 changes: 35 additions & 0 deletions app/src/main/java/app/logorrr/io/FEncoding.scala
Original file line number Diff line number Diff line change
@@ -0,0 +1,35 @@
package app.logorrr.io

import java.nio.file.{Files, Path}

object FEncoding {

def apply(path: Path): FEncoding = {
val is = Files.newInputStream(path)
try {
val bom = Array.fill[Byte](3)(0)
is.read(bom)
if (bom.startsWith(Array(0xFF.toByte, 0xFE.toByte))) {
UTF16LE
} else if (bom.startsWith(Array(0xFE.toByte, 0xFF.toByte))) {
UTF16BE
} else if (bom.startsWith(Array(0xEF.toByte, 0xBB.toByte, 0xBF.toByte))) {
UTF8
} else {
Unknown
}
} finally {
is.close()
}
}
}

class FEncoding(val asString: String)

case object UTF8 extends FEncoding("UTF-8")

case object UTF16LE extends FEncoding("UTF-16LE")

case object UTF16BE extends FEncoding("UTF-16BE")

case object Unknown extends FEncoding("Unknown")
31 changes: 31 additions & 0 deletions app/src/main/java/app/logorrr/io/FileManager.scala
Original file line number Diff line number Diff line change
@@ -0,0 +1,31 @@
package app.logorrr.io

import app.logorrr.util.CanLog

import java.io.{BufferedReader, FileInputStream, InputStreamReader}
import java.nio.file.{Files, Path}


object FileManager extends CanLog {

private def openFileWithDetectedEncoding(path: Path): BufferedReader = {
val encoding = FEncoding(path)
if (encoding == Unknown) {
logWarn(encoding.asString + " encoding - fallback to UTF-8")
new BufferedReader(new InputStreamReader(new FileInputStream(path.toFile), UTF8.asString))
} else {
new BufferedReader(new InputStreamReader(new FileInputStream(path.toFile), encoding.asString))
}
}

def fromPath(path: Path): Seq[String] = {
require(Files.exists(path))
val reader = openFileWithDetectedEncoding(path)
try {
(for (line <- Iterator.continually(reader.readLine()).takeWhile(_ != null)) yield line).toSeq
} finally {
reader.close()
}
}

}
2 changes: 1 addition & 1 deletion app/src/main/scala/app/logorrr/conf/LogoRRRGlobals.scala
Original file line number Diff line number Diff line change
Expand Up @@ -70,7 +70,7 @@ object LogoRRRGlobals extends CanLog {
case x => x
})

if (OsUtil.isMac) {
if (OsUtil.isMac && !OsUtil.inTest) {
OsxBridge.releasePath(pathAsString)
}

Expand Down
14 changes: 6 additions & 8 deletions app/src/main/scala/app/logorrr/model/LogEntryFileReader.scala
Original file line number Diff line number Diff line change
Expand Up @@ -7,8 +7,7 @@ import javafx.scene.paint.Color

import java.nio.file.Path
import java.time.Instant
import java.util.stream
import java.util.stream.Collectors
import java.util

/** Abstraction for a log file */
object LogEntryFileReader extends CanLog {
Expand All @@ -17,20 +16,19 @@ object LogEntryFileReader extends CanLog {
, parseEntryForTimeInstant: String => Option[Instant])
(logFilePath: Path): ObservableList[LogEntry] = timeR({
var lineNumber: Int = 0
// trying to be very clever and use java stream instead of scala collections
// it makes a notable difference in performance if we don't convert huge lists from java <-> scala
val logEntryStream: stream.Stream[LogEntry] = LogFileReader.readFromFile(logFilePath).stream().map(l => {
val arraylist = new util.ArrayList[LogEntry]()
LogFileReader.readFromFile(logFilePath).map(l => {
lineNumber = lineNumber + 1
LogEntry(lineNumber, parseColor(l), l, parseEntryForTimeInstant(l))
arraylist.add(LogEntry(lineNumber, parseColor(l), l, parseEntryForTimeInstant(l)))
})
FXCollections.observableList(logEntryStream.collect(Collectors.toList[LogEntry]()))
FXCollections.observableList(arraylist)
}, s"Imported ${logFilePath.toAbsolutePath.toString} ... ")

def from(logFilePath: Path, filters: Seq[Fltr], logEntryTimeFormat: LogEntryInstantFormat): ObservableList[LogEntry] = {
mkLogEntryList(l => Filter.calcColor(l, filters), l => LogEntryInstantFormat.parseInstant(l, logEntryTimeFormat))(logFilePath)
}

def from(logFilePath: Path, filters: Seq[Fltr]): ObservableList[LogEntry] = mkLogEntryList(l => Filter.calcColor(l, filters), _ => None)(logFilePath)
def from(logFile: Path, filters: Seq[Fltr]): ObservableList[LogEntry] = mkLogEntryList(l => Filter.calcColor(l, filters), _ => None)(logFile)


}
Expand Down
46 changes: 11 additions & 35 deletions app/src/main/scala/app/logorrr/model/LogFileReader.scala
Original file line number Diff line number Diff line change
@@ -1,52 +1,28 @@
package app.logorrr.model

import app.logorrr.OsxBridge
import app.logorrr.io.FileManager
import app.logorrr.util.{CanLog, OsUtil}

import java.nio.charset.StandardCharsets
import java.nio.file.{Files, Path}
import java.util
import scala.util.{Failure, Success, Try}
import java.nio.file.Path

object LogFileReader extends CanLog {

def readFromFile(logFile: Path): util.List[String] = {
Try {
if (OsUtil.isMac) {
logInfo("Registering security bookmark for " + logFile.toAbsolutePath.toString)
OsxBridge.registerPath(logFile.toAbsolutePath.toString)
}

val lines = Files.readAllLines(logFile)

logEmptyLogFile(logFile, lines)
lines
} match {
case Failure(exception) =>
val msg = s"Failed to read ${logFile.toAbsolutePath.toString}, exception: ${exception.getMessage}, retrying ISO_8859_1 ..."
logException(msg, exception)
Try {
val lines = Files.readAllLines(logFile, StandardCharsets.ISO_8859_1)
logEmptyLogFile(logFile, lines)
lines
} match {
case Failure(exception) =>
val msg = s"Could not read file ${logFile.toAbsolutePath.toString} properly. Reason: ${exception.getMessage}."
logException(msg, exception)
util.Arrays.asList(msg)
case Success(value) =>
value
}
case Success(lines) => lines
def readFromFile(logFile: Path): Seq[String] = {
if (OsUtil.isMac && !OsUtil.inTest) {
logInfo("Registering security bookmark for " + logFile.toAbsolutePath.toString)
OsxBridge.registerPath(logFile.toAbsolutePath.toString)
}
val lines = FileManager.fromPath(logFile)
logEmptyLogFile(logFile, lines)
lines
}


private def logEmptyLogFile(logFile: Path, lines: util.List[String]): Unit = {
private def logEmptyLogFile(logFile: Path, lines: Seq[String]): Unit = {
if (lines.isEmpty) {
logWarn(s"${logFile.toAbsolutePath.toString} was empty.")
} else {
logTrace(s"${logFile.toAbsolutePath.toString} has ${lines.size()} lines.")
logTrace(s"${logFile.toAbsolutePath.toString} has ${lines.size} lines.")
}
}
}
1 change: 0 additions & 1 deletion app/src/main/scala/app/logorrr/views/search/Filter.scala
Original file line number Diff line number Diff line change
Expand Up @@ -48,7 +48,6 @@ object Filter {
* @param pattern text to search for
* @param color associated color
*/
// TODO write encoder for pureconfig for color, see https://github.com/rladstaetter/LogoRRR/issues/105
class Filter(val pattern: String, val color: Color) extends Fltr {

override def applyMatch(searchTerm: String): Boolean = searchTerm.contains(pattern)
Expand Down
Binary file added app/src/test/resources/app/logorrr/issue-139.log
Binary file not shown.
20 changes: 20 additions & 0 deletions app/src/test/scala/app/logorrr/Issue139Spec.scala
Original file line number Diff line number Diff line change
@@ -0,0 +1,20 @@
package app.logorrr

import app.logorrr.io.FileManager
import org.scalatest.wordspec.AnyWordSpec

import java.nio.file.{Files, Paths}



class Issue139Spec extends AnyWordSpec {

"Logfile" when {
"encodedInUtf16" should {
val p = Paths.get("src/test/resources/app/logorrr/issue-139.log")
//val p = Paths.get("src/test/resources/app/logorrr/util/orig.log")
"exist" in assert(Files.exists(p))
"can read file" in assert(FileManager.fromPath(p).nonEmpty)
}
}
}
2 changes: 2 additions & 0 deletions core/src/main/scala/app/logorrr/util/OsUtil.scala
Original file line number Diff line number Diff line change
Expand Up @@ -5,6 +5,8 @@ package app.logorrr.util
*/
object OsUtil {

val inTest = false

sealed trait Os

case object Windows extends Os
Expand Down

0 comments on commit f2d61f9

Please sign in to comment.