Skip to content

Commit

Permalink
This commit does not belong to any branch on this repository, and may belong to a fork outside of the repository.
add BOM detection, and language guessing, and add multi-byte characte…
Browse files Browse the repository at this point in the history
…r counts to the profiler
scholarsmate committed Dec 7, 2023
1 parent d9f8c46 commit 9e6be18
Showing 9 changed files with 179 additions and 46 deletions.
1 change: 1 addition & 0 deletions package.json
Original file line number Diff line number Diff line change
@@ -50,6 +50,7 @@
"@vscode/debugadapter": "1.63.0",
"await-notify": "1.0.1",
"hexy": "0.3.5",
"iso-639-1": "^3.1.0",
"jsonc-parser": "3.2.0",
"semver": "7.5.4",
"unzip-stream": "0.3.1",
100 changes: 76 additions & 24 deletions src/dataEditor/dataEditorClient.ts
Original file line number Diff line number Diff line change
@@ -20,6 +20,7 @@ import {
ALL_EVENTS,
beginSessionTransaction,
clear,
countCharacters,
CountKind,
createSession,
createSimpleFileLogger,
@@ -30,11 +31,13 @@ import {
EditorClient,
endSessionTransaction,
EventSubscriptionRequest,
getByteOrderMark,
getClient,
getClientVersion,
getComputedFileSize,
getContentType,
getCounts,
getLanguage,
getLogger,
getServerHeartbeat,
getServerInfo,
@@ -167,8 +170,6 @@ export class DataEditorClient implements vscode.Disposable {
private currentViewportId: string
private fileToEdit: string = ''
private omegaSessionId = ''
private contentType = ''
private fileSize = 0
private sendHeartbeatIntervalId: NodeJS.Timeout | number | undefined =
undefined

@@ -191,8 +192,6 @@ export class DataEditorClient implements vscode.Disposable {
this.svelteWebviewInitializer = new SvelteWebviewInitializer(context)
this.svelteWebviewInitializer.initialize(this.view, this.panel.webview)
this.currentViewportId = ''
this.contentType = ''
this.fileSize = 0
this.fileToEdit = fileToEdit
this.displayState = new DisplayState(this.panel)
}
@@ -241,6 +240,17 @@ export class DataEditorClient implements vscode.Disposable {
'checkpointPath is not set'
)

let data = {
byteOrderMark: '',
changeCount: 0,
computedFileSize: 0,
diskFileSize: 0,
fileName: this.fileToEdit,
language: '',
type: '',
undoCount: 0,
}

// create a session and capture the session id, content type, and file size
try {
const createSessionResponse = await createSession(
@@ -252,17 +262,39 @@ export class DataEditorClient implements vscode.Disposable {
assert(this.omegaSessionId.length > 0, 'omegaSessionId is not set')
addActiveSession(this.omegaSessionId)

this.fileSize = createSessionResponse.hasFileSize()
? (createSessionResponse.getFileSize() as number)
: 0
data.diskFileSize = data.computedFileSize =
createSessionResponse.hasFileSize()
? (createSessionResponse.getFileSize() as number)
: 0

const contentTypeResponse = await getContentType(
this.omegaSessionId,
0,
Math.min(1024, this.fileSize)
Math.min(1024, data.computedFileSize)
)
this.contentType = contentTypeResponse.getContentType()
assert(this.contentType.length > 0, 'contentType is not set')
data.type = contentTypeResponse.getContentType()
assert(data.type.length > 0, 'contentType is not set')

const byteOrderMarkResponse = await getByteOrderMark(
this.omegaSessionId,
0
)
data.byteOrderMark = byteOrderMarkResponse.getByteOrderMark()
assert(data.byteOrderMark.length > 0, 'byteOrderMark is not set')

const languageResponse = await getLanguage(
this.omegaSessionId,
0,
Math.min(1024, data.computedFileSize),
data.byteOrderMark
)
data.language = languageResponse.getLanguage()
assert(data.language.length > 0, 'language is not set')

data.diskFileSize = data.computedFileSize =
createSessionResponse.hasFileSize()
? (createSessionResponse.getFileSize() as number)
: 0
} catch {
const msg = `Failed to create session for ${this.fileToEdit}`
getLogger().error({
@@ -301,14 +333,7 @@ export class DataEditorClient implements vscode.Disposable {
// send the initial file info to the webview
await this.panel.webview.postMessage({
command: MessageCommand.fileInfo,
data: {
changeCount: 0,
computedFileSize: this.fileSize,
diskFileSize: this.fileSize,
fileName: this.fileToEdit,
type: this.contentType,
undoCount: 0,
},
data: data,
})
}

@@ -450,13 +475,40 @@ export class DataEditorClient implements vscode.Disposable {
startOffset,
length
)
const characterCount = await countCharacters(
this.omegaSessionId,
startOffset,
length
)
const contentTypeResponse = await getContentType(
this.omegaSessionId,
startOffset,
length
)
const languageResponse = await getLanguage(
this.omegaSessionId,
startOffset,
length,
characterCount.getByteOrderMark()
)
await this.panel.webview.postMessage({
command: MessageCommand.profile,
data: {
startOffset: startOffset,
length: length,
byteProfile: byteProfile,
numAscii: numAscii(byteProfile),
language: languageResponse.getLanguage(),
contentType: contentTypeResponse.getContentType(),
characterCount: {
byteOrderMark: characterCount.getByteOrderMark(),
byteOrderMarkBytes: characterCount.getByteOrderMarkBytes(),
singleByteCount: characterCount.getSingleByteChars(),
doubleByteCount: characterCount.getDoubleByteChars(),
tripleByteCount: characterCount.getTripleByteChars(),
quadByteCount: characterCount.getQuadByteChars(),
invalidBytes: characterCount.getInvalidBytes(),
},
},
})
}
@@ -714,16 +766,16 @@ export class DataEditorClient implements vscode.Disposable {

if (saved) {
this.fileToEdit = fileToSave
this.fileSize = await getComputedFileSize(this.omegaSessionId)
const fileSize = await getComputedFileSize(this.omegaSessionId)
await this.panel.webview.postMessage({
command: MessageCommand.fileInfo,
data: {
computedFileSize: this.fileSize,
diskFileSize: this.fileSize,
fileName: this.fileToEdit,
computedFileSize: fileSize,
diskFileSize: fileSize,
fileName: fileToSave,
},
})
vscode.window.showInformationMessage(`Saved: ${this.fileToEdit}`)
vscode.window.showInformationMessage(`Saved: ${fileToSave}`)
} else if (cancelled) {
vscode.window.showInformationMessage(`Cancelled save: ${fileToSave}`)
} else {
@@ -1269,7 +1321,7 @@ async function serverStart() {
getPidFile(omegaEditPort),
logConfigFile
),
new Promise((resolve, reject) => {
new Promise((_resolve, reject) => {
setTimeout(() => {
reject((): Error => {
return new Error(
Original file line number Diff line number Diff line change
@@ -497,8 +497,8 @@ limitations under the License.
})
</script>

{#if $selectionDataStore.active && $editMode == EditByteModes.Single}
{#key $selectedByte || selectedByteElement || dataRadix || $editorActionsAllowed == EditActionRestrictions.None}
{#if $selectionDataStore.active && $editMode === EditByteModes.Single}
{#key $selectedByte || selectedByteElement || dataRadix || $editorActionsAllowed === EditActionRestrictions.None}
<SelectedByteEdit
byte={$selectedByte}
on:seek
Original file line number Diff line number Diff line change
@@ -324,7 +324,7 @@ limitations under the License.
}
</script>

{#if $editorActionsAllowed == EditActionRestrictions.None}
{#if $editorActionsAllowed === EditActionRestrictions.None}
<span>
<input
class="insert {themeClass}"
81 changes: 63 additions & 18 deletions src/svelte/src/components/DataMetrics/DataMetrics.svelte
Original file line number Diff line number Diff line change
@@ -16,14 +16,15 @@ limitations under the License.
-->
<script lang="ts">
import Button from '../Inputs/Buttons/Button.svelte'
import { vscode } from '../../utilities/vscode'
import { MessageCommand } from '../../utilities/message'
import { onMount } from 'svelte'
import {vscode} from '../../utilities/vscode'
import {MessageCommand} from '../../utilities/message'
import {onMount} from 'svelte'
import Input from '../Inputs/Input/Input.svelte'
import { viewport } from '../../stores'
import { DATA_PROFILE_MAX_LENGTH } from '../../stores/configuration'
import { addressRadix } from '../../stores'
import { radixToString, regexEditDataTest } from '../../utilities/display'
import {addressRadix, viewport} from '../../stores'
import {DATA_PROFILE_MAX_LENGTH} from '../../stores/configuration'
import {radixToString, regexEditDataTest} from '../../utilities/display'
import ISO6391 from "iso-639-1";
import Tooltip from "src/components/layouts/Tooltip.svelte";
const PROFILE_DOS_EOL = 256
@@ -36,8 +37,20 @@ limitations under the License.
// number of bytes to profile from the start offset
export let length: number
class CharacterCountData {
byteOrderMark: string = ''
byteOrderMarkBytes: number = 0
singleByteCount: number = 0
doubleByteCount: number = 0
tripleByteCount: number = 0
quadByteCount: number = 0
invalidBytes: number = 0
}
let endOffset: number = 0
let byteProfile: number[] = []
let language: string = ''
let contentType: string = ''
let currentTooltip: { index: number; value: number } | null = null
let colorScaleData: string[] = []
let scaledData: number[] = []
@@ -47,6 +60,7 @@ limitations under the License.
let mean: number = 0
let variance: number = 0
let stdDev: number = 0
let characterCountData: CharacterCountData = new CharacterCountData()
let numAscii: number = 0
let numDistinct: number = 0
let fieldBeingEdited: string = ''
@@ -267,6 +281,18 @@ limitations under the License.
case MessageCommand.profile:
numAscii = msg.data.data.numAscii as number
byteProfile = msg.data.data.byteProfile as number[]
language = msg.data.data.language as string
contentType = msg.data.data.contentType as string
// character count data
characterCountData.byteOrderMark = msg.data.data.characterCount.byteOrderMark as string
characterCountData.byteOrderMarkBytes = msg.data.data.characterCount.byteOrderMarkBytes as number
characterCountData.singleByteCount = msg.data.data.characterCount.singleByteCount as number
characterCountData.doubleByteCount = msg.data.data.characterCount.doubleByteCount as number
characterCountData.tripleByteCount = msg.data.data.characterCount.tripleByteCount as number
characterCountData.quadByteCount = msg.data.data.characterCount.quadByteCount as number
characterCountData.invalidBytes = msg.data.data.characterCount.invalidBytes as number
setStatusMessage(
`Profiled bytes from ${startOffset} to ${startOffset + length}`
)
@@ -447,60 +473,79 @@ limitations under the License.
<hr />
<div class="stats">
<label for="computed-size"
>&nbsp;Max Offset: <span id="computed-size" class="nowrap"
>&nbsp;&nbsp;Max Offset: <span id="computed-size" class="nowrap"
>{viewport.offsetMax.toString($addressRadix)} ({radixToString(
$addressRadix
)})</span
></label
>
<label for="language">&nbsp;&nbsp;&nbsp;&nbsp;Language:<Tooltip
description="{ISO6391.getName(language)}"
alwaysEnabled={true}><span id="language" class="nowrap"
>{language}</span></Tooltip></label>
<label for="content-type"
>Content Type: <span id="content-type" class="nowrap"
>{contentType}</span
></label
>
<label for="min-frequency"
>&nbsp;&nbsp;Min Freq.: <span id="min-frequency" class="nowrap"
>&nbsp;&nbsp;&nbsp;Min Freq.: <span id="min-frequency" class="nowrap"
>{minFrequency}</span
></label
>
<label for="max-frequency"
>&nbsp;&nbsp;Max Freq.: <span id="max-frequency" class="nowrap"
>&nbsp;&nbsp;&nbsp;Max Freq.: <span id="max-frequency" class="nowrap"
>{maxFrequency}</span
></label
>
<label for="mean-frequency"
>&nbsp;Mean Freq.: <span id="mean-frequency" class="nowrap"
>&nbsp;&nbsp;Mean Freq.: <span id="mean-frequency" class="nowrap"
>{mean.toFixed(2)}</span
></label
>
<label for="variance"
>&nbsp;&nbsp;&nbsp;Variance: <span id="variance" class="nowrap"
>&nbsp;&nbsp;&nbsp;&nbsp;Variance: <span id="variance" class="nowrap"
>{variance.toFixed(2)}</span
></label
>
<label for="stddev"
>&nbsp;&nbsp;Std. Dev.: <span id="stddev" class="nowrap"
>&nbsp;&nbsp;&nbsp;Std. Dev.: <span id="stddev" class="nowrap"
>{stdDev.toFixed(2)}</span
></label
>
<label for="byte-count"
>&nbsp;Byte Count: <span id="byte-count" class="nowrap">{sum}</span
>&nbsp;&nbsp;Byte Count: <span id="byte-count" class="nowrap">{sum}</span
></label
>
<label for="distinct-count"
>&nbsp;&nbsp;&nbsp;Distinct: <span id="distinct-count" class="nowrap"
>&nbsp;&nbsp;&nbsp;&nbsp;Distinct: <span id="distinct-count" class="nowrap"
>{numDistinct}</span
></label
>
<label for="dos_eol-count"
>&nbsp;&nbsp;&nbsp;&nbsp;DOS EOL: <span id="dos_eol-count" class="nowrap"
>&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;DOS EOL: <span id="dos_eol-count" class="nowrap"
>{byteProfile[PROFILE_DOS_EOL]}</span
></label
>
<label for="ascii-count"
>ASCII Count: <span id="ascii-count" class="nowrap">{numAscii}</span
>&nbsp;ASCII Count: <span id="ascii-count" class="nowrap">{numAscii}</span
></label
>
<label for="ascii-percent"
>&nbsp;&nbsp;&nbsp;&nbsp;% ASCII: <span id="ascii-percent" class="nowrap"
>&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;% ASCII: <span id="ascii-percent" class="nowrap"
>{((numAscii / sum) * 100).toFixed(2)}</span
>
</label>
</div>
<hr />
<div class="char-count">
<label for="char-count-bom">&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;BOM: <span id="char-count-bom" class="nowrap">{characterCountData.byteOrderMark}</span></label>
<label for="char-count-bom-bytes">&nbsp;&nbsp;BOM Bytes: <span id="char-count-bom-bytes" class="nowrap">{characterCountData.byteOrderMarkBytes}</span></label>
<label for="char-count-single">&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;Single: <span id="char-count-single" class="nowrap">{characterCountData.singleByteCount}</span></label>
<label for="char-count-double">&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;Double: <span id="char-count-double" class="nowrap">{characterCountData.doubleByteCount}</span></label>
<label for="char-count-triple">&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;Triple: <span id="char-count-triple" class="nowrap">{characterCountData.tripleByteCount}</span></label>
<label for="char-count-quad">&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;Quad: <span id="char-count-quad" class="nowrap">{characterCountData.quadByteCount}</span></label>
<label for="char-count-invalid">&nbsp;&nbsp;&nbsp;&nbsp;Invalid: <span id="char-count-invalid" class="nowrap">{characterCountData.invalidBytes}</span></label>
</div>
<hr />
<Button fn={handleCsvProfileDownload} description="Download profiled data as .csv">
18 changes: 17 additions & 1 deletion src/svelte/src/components/Header/fieldsets/FileMetrics.svelte
Original file line number Diff line number Diff line change
@@ -27,6 +27,7 @@ limitations under the License.
import { humanReadableByteLength } from '../../../utilities/display'
import { DATA_PROFILE_MAX_LENGTH } from '../../../stores/configuration'
import Tooltip from '../../layouts/Tooltip.svelte'
import ISO6391 from 'iso-639-1'
const eventDispatcher = createEventDispatcher()
let displayOpts = false
@@ -74,6 +75,9 @@ limitations under the License.
if ('type' in msg.data.data) {
$fileMetrics.type = msg.data.data.type
}
if ('language' in msg.data.data) {
$fileMetrics.language = msg.data.data.language
}
if ('diskFileSize' in msg.data.data) {
$fileMetrics.diskSize = msg.data.data.diskFileSize
}
@@ -182,7 +186,19 @@ limitations under the License.
</FlexContainer>
<FlexContainer --dir="column">
<label for="content_type">Content Type</label>
<span id="content_type" class="nowrap">{$fileMetrics.type}</span>
<Tooltip
description="{$fileMetrics.type}"
alwaysEnabled={true}>
<span id="content_type" class="nowrap">{$fileMetrics.type.split('/').pop()}</span>
</Tooltip>
</FlexContainer>
<FlexContainer --dir="column">
<label for="language">Language</label>
<Tooltip
description="{ISO6391.getName($fileMetrics.language)}"
alwaysEnabled={true}>
<span id="language" class="nowrap">{$fileMetrics.language}</span>
</Tooltip>
</FlexContainer>
</FlexContainer>
<hr />
1 change: 1 addition & 0 deletions src/svelte/src/components/Header/fieldsets/FileMetrics.ts
Original file line number Diff line number Diff line change
@@ -18,6 +18,7 @@ import { SimpleWritable } from '../../../stores/localStore'
class FileMetricsData {
name: string = ''
type: string = ''
language: string = ''
diskSize: number = 0
computedSize: number = 0
changeCount: number = 0
13 changes: 13 additions & 0 deletions src/svelte/src/components/Header/fieldsets/Settings.svelte
Original file line number Diff line number Diff line change
@@ -28,7 +28,20 @@ limitations under the License.
import FlexContainer from '../../layouts/FlexContainer.svelte'
import { UIThemeCSSClass } from '../../../utilities/colorScheme'
import ViewportVisibilityIcon from '../../Icons/ViewportVisibilityIcon.svelte'
import {MessageCommand} from "../../../utilities/message";
window.addEventListener('message', (msg) => {
switch (msg.data.command) {
case MessageCommand.fileInfo:
{
if ('byteOrderMark' in msg.data.data) {
const { byteOrderMark } = msg.data.data
if (byteOrderMark === 'UTF-8') $editorEncoding = 'utf-8'
else if (byteOrderMark === 'UTF-16LE') $editorEncoding = 'utf-16le'
}
}
}
})
</script>

<fieldset>
5 changes: 5 additions & 0 deletions yarn.lock
Original file line number Diff line number Diff line change
@@ -2046,6 +2046,11 @@ isexe@^2.0.0:
resolved "https://registry.yarnpkg.com/isexe/-/isexe-2.0.0.tgz#e8fbf374dc556ff8947a10dcb0572d633f2cfa10"
integrity sha512-RHxMLp9lnKHGHRng9QFhRCMbYAcVpn69smSGcq3f36xjgVVWThj4qqLbTLlq7Ssj8B+fIQ1EuCEGI2lKsyQeIw==

iso-639-1@^3.1.0:
version "3.1.0"
resolved "https://registry.yarnpkg.com/iso-639-1/-/iso-639-1-3.1.0.tgz#62611c680eba80ccedb57c3fa00d048f7c866693"
integrity sha512-rWcHp9dcNbxa5C8jA/cxFlWNFNwy5Vup0KcFvgA8sPQs9ZeJHj/Eq0Y8Yz2eL8XlWYpxw4iwh9FfTeVxyqdRMw==

isobject@^3.0.1:
version "3.0.1"
resolved "https://registry.yarnpkg.com/isobject/-/isobject-3.0.1.tgz#4e431e92b11a9731636aa1f9c8d1ccbcfdab78df"

0 comments on commit 9e6be18

Please sign in to comment.