Skip to content

Commit

Permalink
add BOM detection, and language guessing, and add multi-byte characte…
Browse files Browse the repository at this point in the history
…r counts to the profiler
  • Loading branch information
scholarsmate committed Dec 7, 2023
1 parent d9f8c46 commit 75bf9d5
Show file tree
Hide file tree
Showing 9 changed files with 178 additions and 46 deletions.
1 change: 1 addition & 0 deletions package.json
Original file line number Diff line number Diff line change
Expand Up @@ -50,6 +50,7 @@
"@vscode/debugadapter": "1.63.0",
"await-notify": "1.0.1",
"hexy": "0.3.5",
"iso-639-1": "^3.1.0",
"jsonc-parser": "3.2.0",
"semver": "7.5.4",
"unzip-stream": "0.3.1",
Expand Down
100 changes: 76 additions & 24 deletions src/dataEditor/dataEditorClient.ts
Original file line number Diff line number Diff line change
Expand Up @@ -20,6 +20,7 @@ import {
ALL_EVENTS,
beginSessionTransaction,
clear,
countCharacters,
CountKind,
createSession,
createSimpleFileLogger,
Expand All @@ -30,11 +31,13 @@ import {
EditorClient,
endSessionTransaction,
EventSubscriptionRequest,
getByteOrderMark,
getClient,
getClientVersion,
getComputedFileSize,
getContentType,
getCounts,
getLanguage,
getLogger,
getServerHeartbeat,
getServerInfo,
Expand Down Expand Up @@ -167,8 +170,6 @@ export class DataEditorClient implements vscode.Disposable {
private currentViewportId: string
private fileToEdit: string = ''
private omegaSessionId = ''
private contentType = ''
private fileSize = 0
private sendHeartbeatIntervalId: NodeJS.Timeout | number | undefined =
undefined

Expand All @@ -191,8 +192,6 @@ export class DataEditorClient implements vscode.Disposable {
this.svelteWebviewInitializer = new SvelteWebviewInitializer(context)
this.svelteWebviewInitializer.initialize(this.view, this.panel.webview)
this.currentViewportId = ''
this.contentType = ''
this.fileSize = 0
this.fileToEdit = fileToEdit
this.displayState = new DisplayState(this.panel)
}
Expand Down Expand Up @@ -241,6 +240,17 @@ export class DataEditorClient implements vscode.Disposable {
'checkpointPath is not set'
)

let data = {
byteOrderMark: '',
changeCount: 0,
computedFileSize: 0,
diskFileSize: 0,
fileName: this.fileToEdit,
language: '',
type: '',
undoCount: 0,
}

// create a session and capture the session id, content type, and file size
try {
const createSessionResponse = await createSession(
Expand All @@ -252,17 +262,39 @@ export class DataEditorClient implements vscode.Disposable {
assert(this.omegaSessionId.length > 0, 'omegaSessionId is not set')
addActiveSession(this.omegaSessionId)

this.fileSize = createSessionResponse.hasFileSize()
? (createSessionResponse.getFileSize() as number)
: 0
data.diskFileSize = data.computedFileSize =
createSessionResponse.hasFileSize()
? (createSessionResponse.getFileSize() as number)
: 0

const contentTypeResponse = await getContentType(
this.omegaSessionId,
0,
Math.min(1024, this.fileSize)
Math.min(1024, data.computedFileSize)
)
this.contentType = contentTypeResponse.getContentType()
assert(this.contentType.length > 0, 'contentType is not set')
data.type = contentTypeResponse.getContentType()
assert(data.type.length > 0, 'contentType is not set')

const byteOrderMarkResponse = await getByteOrderMark(
this.omegaSessionId,
0
)
data.byteOrderMark = byteOrderMarkResponse.getByteOrderMark()
assert(data.byteOrderMark.length > 0, 'byteOrderMark is not set')

const languageResponse = await getLanguage(
this.omegaSessionId,
0,
Math.min(1024, data.computedFileSize),
data.byteOrderMark
)
data.language = languageResponse.getLanguage()
assert(data.language.length > 0, 'language is not set')

data.diskFileSize = data.computedFileSize =
createSessionResponse.hasFileSize()
? (createSessionResponse.getFileSize() as number)
: 0
} catch {
const msg = `Failed to create session for ${this.fileToEdit}`
getLogger().error({
Expand Down Expand Up @@ -301,14 +333,7 @@ export class DataEditorClient implements vscode.Disposable {
// send the initial file info to the webview
await this.panel.webview.postMessage({
command: MessageCommand.fileInfo,
data: {
changeCount: 0,
computedFileSize: this.fileSize,
diskFileSize: this.fileSize,
fileName: this.fileToEdit,
type: this.contentType,
undoCount: 0,
},
data: data,
})
}

Expand Down Expand Up @@ -450,13 +475,40 @@ export class DataEditorClient implements vscode.Disposable {
startOffset,
length
)
const characterCount = await countCharacters(
this.omegaSessionId,
startOffset,
length
)
const contentTypeResponse = await getContentType(
this.omegaSessionId,
startOffset,
length
)
const languageResponse = await getLanguage(
this.omegaSessionId,
startOffset,
length,
characterCount.getByteOrderMark()
)
await this.panel.webview.postMessage({
command: MessageCommand.profile,
data: {
startOffset: startOffset,
length: length,
byteProfile: byteProfile,
numAscii: numAscii(byteProfile),
language: languageResponse.getLanguage(),
contentType: contentTypeResponse.getContentType(),
characterCount: {
byteOrderMark: characterCount.getByteOrderMark(),
byteOrderMarkBytes: characterCount.getByteOrderMarkBytes(),
singleByteCount: characterCount.getSingleByteChars(),
doubleByteCount: characterCount.getDoubleByteChars(),
tripleByteCount: characterCount.getTripleByteChars(),
quadByteCount: characterCount.getQuadByteChars(),
invalidBytes: characterCount.getInvalidBytes(),
},
},
})
}
Expand Down Expand Up @@ -714,16 +766,16 @@ export class DataEditorClient implements vscode.Disposable {

if (saved) {
this.fileToEdit = fileToSave
this.fileSize = await getComputedFileSize(this.omegaSessionId)
const fileSize = await getComputedFileSize(this.omegaSessionId)
await this.panel.webview.postMessage({
command: MessageCommand.fileInfo,
data: {
computedFileSize: this.fileSize,
diskFileSize: this.fileSize,
fileName: this.fileToEdit,
computedFileSize: fileSize,
diskFileSize: fileSize,
fileName: fileToSave,
},
})
vscode.window.showInformationMessage(`Saved: ${this.fileToEdit}`)
vscode.window.showInformationMessage(`Saved: ${fileToSave}`)
} else if (cancelled) {
vscode.window.showInformationMessage(`Cancelled save: ${fileToSave}`)
} else {
Expand Down Expand Up @@ -1269,7 +1321,7 @@ async function serverStart() {
getPidFile(omegaEditPort),
logConfigFile
),
new Promise((resolve, reject) => {
new Promise((_resolve, reject) => {
setTimeout(() => {
reject((): Error => {
return new Error(
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -497,8 +497,8 @@ limitations under the License.
})
</script>

{#if $selectionDataStore.active && $editMode == EditByteModes.Single}
{#key $selectedByte || selectedByteElement || dataRadix || $editorActionsAllowed == EditActionRestrictions.None}
{#if $selectionDataStore.active && $editMode === EditByteModes.Single}
{#key $selectedByte || selectedByteElement || dataRadix || $editorActionsAllowed === EditActionRestrictions.None}
<SelectedByteEdit
byte={$selectedByte}
on:seek
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -324,7 +324,7 @@ limitations under the License.
}
</script>

{#if $editorActionsAllowed == EditActionRestrictions.None}
{#if $editorActionsAllowed === EditActionRestrictions.None}
<span>
<input
class="insert {themeClass}"
Expand Down
80 changes: 62 additions & 18 deletions src/svelte/src/components/DataMetrics/DataMetrics.svelte
Original file line number Diff line number Diff line change
Expand Up @@ -16,14 +16,13 @@ limitations under the License.
-->
<script lang="ts">
import Button from '../Inputs/Buttons/Button.svelte'
import { vscode } from '../../utilities/vscode'
import { MessageCommand } from '../../utilities/message'
import { onMount } from 'svelte'
import {vscode} from '../../utilities/vscode'
import {MessageCommand} from '../../utilities/message'
import {onMount} from 'svelte'
import Input from '../Inputs/Input/Input.svelte'
import { viewport } from '../../stores'
import { DATA_PROFILE_MAX_LENGTH } from '../../stores/configuration'
import { addressRadix } from '../../stores'
import { radixToString, regexEditDataTest } from '../../utilities/display'
import {addressRadix, viewport} from '../../stores'
import {DATA_PROFILE_MAX_LENGTH} from '../../stores/configuration'
import {radixToString, regexEditDataTest} from '../../utilities/display'
const PROFILE_DOS_EOL = 256
Expand All @@ -36,8 +35,20 @@ limitations under the License.
// number of bytes to profile from the start offset
export let length: number
class CharacterCountData {
byteOrderMark: string = ''
byteOrderMarkBytes: number = 0
singleByteCount: number = 0
doubleByteCount: number = 0
tripleByteCount: number = 0
quadByteCount: number = 0
invalidBytes: number = 0
}
let endOffset: number = 0
let byteProfile: number[] = []
let language: string = ''
let contentType: string = ''
let currentTooltip: { index: number; value: number } | null = null
let colorScaleData: string[] = []
let scaledData: number[] = []
Expand All @@ -47,6 +58,7 @@ limitations under the License.
let mean: number = 0
let variance: number = 0
let stdDev: number = 0
let characterCountData: CharacterCountData = new CharacterCountData()
let numAscii: number = 0
let numDistinct: number = 0
let fieldBeingEdited: string = ''
Expand Down Expand Up @@ -267,6 +279,18 @@ limitations under the License.
case MessageCommand.profile:
numAscii = msg.data.data.numAscii as number
byteProfile = msg.data.data.byteProfile as number[]
language = msg.data.data.language as string
contentType = msg.data.data.contentType as string
// character count data
characterCountData.byteOrderMark = msg.data.data.characterCount.byteOrderMark as string
characterCountData.byteOrderMarkBytes = msg.data.data.characterCount.byteOrderMarkBytes as number
characterCountData.singleByteCount = msg.data.data.characterCount.singleByteCount as number
characterCountData.doubleByteCount = msg.data.data.characterCount.doubleByteCount as number
characterCountData.tripleByteCount = msg.data.data.characterCount.tripleByteCount as number
characterCountData.quadByteCount = msg.data.data.characterCount.quadByteCount as number
characterCountData.invalidBytes = msg.data.data.characterCount.invalidBytes as number
setStatusMessage(
`Profiled bytes from ${startOffset} to ${startOffset + length}`
)
Expand Down Expand Up @@ -447,60 +471,80 @@ limitations under the License.
<hr />
<div class="stats">
<label for="computed-size"
>&nbsp;Max Offset: <span id="computed-size" class="nowrap"
>&nbsp;&nbsp;Max Offset: <span id="computed-size" class="nowrap"
>{viewport.offsetMax.toString($addressRadix)} ({radixToString(
$addressRadix
)})</span
></label
>
<label for="language"
>&nbsp;&nbsp;&nbsp;&nbsp;Language: <span id="language" class="nowrap"
>{language}</span
></label
>
<label for="content-type"
>Content Type: <span id="content-type" class="nowrap"
>{contentType}</span
></label
>
<label for="min-frequency"
>&nbsp;&nbsp;Min Freq.: <span id="min-frequency" class="nowrap"
>&nbsp;&nbsp;&nbsp;Min Freq.: <span id="min-frequency" class="nowrap"
>{minFrequency}</span
></label
>
<label for="max-frequency"
>&nbsp;&nbsp;Max Freq.: <span id="max-frequency" class="nowrap"
>&nbsp;&nbsp;&nbsp;Max Freq.: <span id="max-frequency" class="nowrap"
>{maxFrequency}</span
></label
>
<label for="mean-frequency"
>&nbsp;Mean Freq.: <span id="mean-frequency" class="nowrap"
>&nbsp;&nbsp;Mean Freq.: <span id="mean-frequency" class="nowrap"
>{mean.toFixed(2)}</span
></label
>
<label for="variance"
>&nbsp;&nbsp;&nbsp;Variance: <span id="variance" class="nowrap"
>&nbsp;&nbsp;&nbsp;&nbsp;Variance: <span id="variance" class="nowrap"
>{variance.toFixed(2)}</span
></label
>
<label for="stddev"
>&nbsp;&nbsp;Std. Dev.: <span id="stddev" class="nowrap"
>&nbsp;&nbsp;&nbsp;Std. Dev.: <span id="stddev" class="nowrap"
>{stdDev.toFixed(2)}</span
></label
>
<label for="byte-count"
>&nbsp;Byte Count: <span id="byte-count" class="nowrap">{sum}</span
>&nbsp;&nbsp;Byte Count: <span id="byte-count" class="nowrap">{sum}</span
></label
>
<label for="distinct-count"
>&nbsp;&nbsp;&nbsp;Distinct: <span id="distinct-count" class="nowrap"
>&nbsp;&nbsp;&nbsp;&nbsp;Distinct: <span id="distinct-count" class="nowrap"
>{numDistinct}</span
></label
>
<label for="dos_eol-count"
>&nbsp;&nbsp;&nbsp;&nbsp;DOS EOL: <span id="dos_eol-count" class="nowrap"
>&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;DOS EOL: <span id="dos_eol-count" class="nowrap"
>{byteProfile[PROFILE_DOS_EOL]}</span
></label
>
<label for="ascii-count"
>ASCII Count: <span id="ascii-count" class="nowrap">{numAscii}</span
>&nbsp;ASCII Count: <span id="ascii-count" class="nowrap">{numAscii}</span
></label
>
<label for="ascii-percent"
>&nbsp;&nbsp;&nbsp;&nbsp;% ASCII: <span id="ascii-percent" class="nowrap"
>&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;% ASCII: <span id="ascii-percent" class="nowrap"
>{((numAscii / sum) * 100).toFixed(2)}</span
>
</label>
</div>
<hr />
<div class="char-count">
<label for="char-count-bom">&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;BOM: <span id="char-count-bom" class="nowrap">{characterCountData.byteOrderMark}</span></label>
<label for="char-count-bom-bytes">&nbsp;&nbsp;BOM Bytes: <span id="char-count-bom-bytes" class="nowrap">{characterCountData.byteOrderMarkBytes}</span></label>
<label for="char-count-single">&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;Single: <span id="char-count-single" class="nowrap">{characterCountData.singleByteCount}</span></label>
<label for="char-count-double">&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;Double: <span id="char-count-double" class="nowrap">{characterCountData.doubleByteCount}</span></label>
<label for="char-count-triple">&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;Triple: <span id="char-count-triple" class="nowrap">{characterCountData.tripleByteCount}</span></label>
<label for="char-count-quad">&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;Quad: <span id="char-count-quad" class="nowrap">{characterCountData.quadByteCount}</span></label>
<label for="char-count-invalid">&nbsp;&nbsp;&nbsp;&nbsp;Invalid: <span id="char-count-invalid" class="nowrap">{characterCountData.invalidBytes}</span></label>
</div>
<hr />
<Button fn={handleCsvProfileDownload} description="Download profiled data as .csv">
Expand Down
Loading

0 comments on commit 75bf9d5

Please sign in to comment.