Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Add tesseract and foreign object rendering #86

Merged
merged 12 commits into from
Apr 11, 2023
3 changes: 3 additions & 0 deletions NOTICE.txt
Original file line number Diff line number Diff line change
@@ -1,2 +1,5 @@
OpenSearch (https://opensearch.org/)
Copyright OpenSearch Contributors

This product includes software developed by
naptha (https://github.com/naptha/tesseract.js/)
2 changes: 2 additions & 0 deletions common/tesseract/.gitignore
Original file line number Diff line number Diff line change
@@ -0,0 +1,2 @@
*
!.gitignore
5 changes: 3 additions & 2 deletions package.json
Original file line number Diff line number Diff line change
Expand Up @@ -14,7 +14,7 @@
"cypress:run": "cypress run",
"cypress:open": "cypress open",
"plugin-helpers": "node ../../scripts/plugin_helpers",
"postinstall": "node ./scripts/patch-html2canvas.js"
"postinstall": "node ./scripts/postinstall.js"
},
"dependencies": {
"babel-polyfill": "^6.26.0",
Expand All @@ -38,7 +38,8 @@
"react-router-dom": "^5.3.0",
"react-toast-notifications": "^2.4.0",
"set-interval-async": "1.0.33",
"showdown": "^1.9.1"
"showdown": "^1.9.1",
"tesseract.js": "^4.0.2"
},
"devDependencies": {
"@elastic/eslint-import-resolver-kibana": "link:../../packages/osd-eslint-import-resolver-opensearch-dashboards",
Expand Down
54 changes: 27 additions & 27 deletions public/components/context_menu/context_menu_ui.js

Large diffs are not rendered by default.

1 change: 1 addition & 0 deletions public/components/visual_report/assets/report_styles.ts
Original file line number Diff line number Diff line change
Expand Up @@ -8,6 +8,7 @@ html,
body {
margin: 0;
padding: 0;
padding-top: 0px;
}

iframe, embed, object {
Expand Down
138 changes: 105 additions & 33 deletions public/components/visual_report/generate_report.ts
Original file line number Diff line number Diff line change
Expand Up @@ -6,6 +6,7 @@
import createDOMPurify from 'dompurify';
import html2canvas from 'html2canvas';
import jsPDF from 'jspdf';
import { createWorker } from 'tesseract.js';
import { v1 as uuidv1 } from 'uuid';
import { ReportSchemaType } from '../../../server/model';
import { uiSettingsService } from '../utils/settings_service';
Expand Down Expand Up @@ -57,15 +58,16 @@ const removeNonReportElements = (
reportSource: VISUAL_REPORT_TYPE
) => {
// remove buttons
doc.querySelectorAll("button[class^='euiButton']:not(.visLegend__button)").forEach((e) => e.remove());
doc
.querySelectorAll("button[class^='euiButton']:not(.visLegend__button)")
.forEach((e) => e.remove());
// remove top navBar
doc.querySelectorAll("[class^='euiHeader']").forEach((e) => e.remove());
// remove visualization editor
if (reportSource === VISUAL_REPORT_TYPE.visualization) {
doc.querySelector('[data-test-subj="splitPanelResizer"]')?.remove();
doc.querySelector('.visEditor__collapsibleSidebar')?.remove();
}
doc.body.style.paddingTop = '0px';
};

const addReportHeader = (doc: Document, header: string) => {
Expand Down Expand Up @@ -96,8 +98,10 @@ const addReportFooter = (doc: Document, footer: string) => {

const addReportStyle = (doc: Document, style: string) => {
const styleElement = document.createElement('style');
styleElement.className = 'reportInjectedStyles';
styleElement.innerHTML = style;
doc.getElementsByTagName('head')[0].appendChild(styleElement);
doc.body.style.paddingTop = '0px';
};

const computeHeight = (height: number, header: string, footer: string) => {
Expand All @@ -115,6 +119,7 @@ const computeHeight = (height: number, header: string, footer: string) => {

export const generateReport = async (id: string, forceDelay = 15000) => {
const http = uiSettingsService.getHttpClient();
const useForeignObjectRendering = uiSettingsService.get('reporting:useFOR');
const DOMPurify = createDOMPurify(window);

const report = await http.get<ReportSchemaType>(
Expand Down Expand Up @@ -154,6 +159,26 @@ export const generateReport = async (id: string, forceDelay = 15000) => {
}
await timeout(forceDelay);

// Style changes onclone does not work with foreign object rendering enabled.
// Additionally increase span width to prevent text being truncated
if (useForeignObjectRendering) {
document
.querySelectorAll<HTMLSpanElement>('span:not([data-html2canvas-ignore])')
.forEach((el) => {
if (!el.closest('.globalFilterItem'))
el.style.width = el.offsetWidth + 30 + 'px';
});
document
.querySelectorAll<HTMLSpanElement>(
'span.globalFilterItem:not([data-html2canvas-ignore])'
)
.forEach((el) => (el.style.width = el.offsetWidth + 5 + 'px'));
addReportHeader(document, header);
addReportFooter(document, footer);
addReportStyle(document, reportingStyle);
await timeout(1000);
}

const width = document.documentElement.scrollWidth;
const height = computeHeight(
document.documentElement.scrollHeight,
Expand All @@ -170,40 +195,87 @@ export const generateReport = async (id: string, forceDelay = 15000) => {
imageTimeout: 30000,
useCORS: true,
removeContainer: false,
allowTaint: true,
foreignObjectRendering: useForeignObjectRendering,
onclone: function (documentClone) {
removeNonReportElements(documentClone, reportSource);
addReportHeader(documentClone, header);
addReportFooter(documentClone, footer);
addReportStyle(documentClone, reportingStyle);
if (!useForeignObjectRendering) {
addReportHeader(documentClone, header);
addReportFooter(documentClone, footer);
addReportStyle(documentClone, reportingStyle);
}
},
}).then(function (canvas) {
// TODO remove this and 'removeContainer: false' when https://github.com/niklasvh/html2canvas/pull/2949 is merged
document
.querySelectorAll<HTMLIFrameElement>('.html2canvas-container')
.forEach((e) => {
const iframe = e.contentWindow;
if (e) {
e.src = 'about:blank';
if (iframe) {
iframe.document.write('');
iframe.document.clear();
iframe.close();
})
.then(async function (canvas) {
// TODO remove this and 'removeContainer: false' when https://github.com/niklasvh/html2canvas/pull/2949 is merged
document
.querySelectorAll<HTMLIFrameElement>('.html2canvas-container')
.forEach((e) => {
const iframe = e.contentWindow;
if (e) {
e.src = 'about:blank';
if (iframe) {
iframe.document.write('');
iframe.document.clear();
iframe.close();
}
e.remove();
}
e.remove();
}
});
});

if (format === 'png') {
const link = document.createElement('a');
link.download = fileName;
link.href = canvas.toDataURL();
link.click();
} else {
const orient = canvas.width > canvas.height ? 'landscape' : 'portrait';
const pdf = new jsPDF(orient, 'px', [canvas.width, canvas.height]);
pdf.addImage(canvas, 'JPEG', 0, 0, canvas.width, canvas.height);
pdf.save(fileName);
}
return true;
});
if (format === 'png') {
const link = document.createElement('a');
link.download = fileName;
link.href = canvas.toDataURL();
link.click();
} else if (uiSettingsService.get('reporting:useOcr')) {
const worker = await createWorker({
workerPath: '../api/reporting/tesseract.js/dist/worker.min.js',
langPath: '../api/reporting/tesseract-lang-data',
corePath: '../api/reporting/tesseract.js-core/tesseract-core.wasm.js',
});
await worker.loadLanguage('eng');
await worker.initialize('eng');
const {
data: { text, pdf },
} = await worker
.recognize(canvas.toDataURL(), { pdfTitle: fileName }, { pdf: true })
.catch((e) => console.error('recognize', e));
await worker.terminate();

const blob = new Blob([new Uint8Array(pdf)], {
type: 'application/pdf',
});
const link = document.createElement('a');
if (link.download !== undefined) {
const url = URL.createObjectURL(blob);
link.setAttribute('href', url);
link.setAttribute('download', fileName);
link.style.visibility = 'hidden';
document.body.appendChild(link);
link.click();
document.body.removeChild(link);
}
} else {
const orient = canvas.width > canvas.height ? 'landscape' : 'portrait';
const pdf = new jsPDF(orient, 'px', [canvas.width, canvas.height]);
pdf.addImage(canvas, 'JPEG', 0, 0, canvas.width, canvas.height);
pdf.save(fileName);
}
return true;
})
.finally(() => {
if (useForeignObjectRendering) {
document
.querySelectorAll<HTMLSpanElement>(
'span:not(.data-html2canvas-ignore)'
)
.forEach((el) => (el.style.width = ''));
document.querySelectorAll('.reportWrapper').forEach((e) => e.remove());
document
.querySelectorAll('.reportInjectedStyles')
.forEach((e) => e.remove());
document.body.style.paddingTop = '';
}
});
};
15 changes: 15 additions & 0 deletions scripts/patch-html2canvas.js → scripts/postinstall.js
Original file line number Diff line number Diff line change
Expand Up @@ -5,6 +5,8 @@

// @ts-check
// workaround for Safari support before https://github.com/niklasvh/html2canvas/pull/2911 is merged
const https = require('https');
const fs = require('fs');
const replace = require('replace-in-file');

const options = {
Expand All @@ -31,3 +33,16 @@ try {
error
);
}

// download tesseract model
const modelFile = fs.createWriteStream(__dirname + '/../common/tesseract/eng.traineddata.gz');
https.get(
'https://raw.githubusercontent.com/naptha/tessdata/gh-pages/4.0.0_best/eng.traineddata.gz',
function (response) {
response.pipe(modelFile);
modelFile.on('finish', () => {
modelFile.close();
console.log('Downloaded eng.traineddata.gz for tesseract.js');
});
}
);
32 changes: 25 additions & 7 deletions server/plugin.ts
Original file line number Diff line number Diff line change
Expand Up @@ -3,23 +3,24 @@
* SPDX-License-Identifier: Apache-2.0
*/

import { schema } from '@osd/config-schema';
import {
PluginInitializerContext,
CoreSetup,
CoreStart,
Plugin,
Logger,
ILegacyClusterClient,
Logger,
Plugin,
PluginInitializerContext,
} from '../../../src/core/server';
import opensearchReportsPlugin from './backend/opensearch-reports-plugin';
import { NotificationsPlugin } from './clusters/notificationsPlugin';
import { buildConfig, ReportingConfigType } from './config';
import { ReportingConfig } from './config/config';
import registerRoutes from './routes';
import {
ReportsDashboardsPluginSetup,
ReportsDashboardsPluginStart,
} from './types';
import registerRoutes from './routes';
import { NotificationsPlugin } from './clusters/notificationsPlugin';
import { buildConfig, ReportingConfigType } from './config';
import { ReportingConfig } from './config/config';

export interface ReportsPluginRequestContext {
logger: Logger;
Expand Down Expand Up @@ -49,6 +50,23 @@ export class ReportsDashboardsPlugin
public async setup(core: CoreSetup) {
this.logger.debug('reports-dashboards: Setup');

core.uiSettings.register({
'reporting:useOcr': {
name: 'Reporting use OCR on PDF',
value: false,
description:
'Whether to run optical character recognition on PDF reports to make text selectable',
schema: schema.boolean(),
},
'reporting:useFOR': {
name: 'Reporting use ForeignObject rendering',
value: true,
description:
'Whether to use ForeignObject rendering when generating reports. If it causes issues, try disabling this option.',
schema: schema.boolean(),
},
});

try {
const config = await buildConfig(
this.initializerContext,
Expand Down
2 changes: 2 additions & 0 deletions server/routes/index.ts
Original file line number Diff line number Diff line change
Expand Up @@ -8,6 +8,7 @@ import registerReportDefinitionRoute from './reportDefinition';
import registerReportSourceRoute from './reportSource';
import registerMetricRoute from './metric';
import registerNotificationRoute from './notifications';
import registerTesseractRoute from './tesseract';
import { IRouter } from '../../../../src/core/server';
import { ReportingConfig } from 'server/config/config';

Expand All @@ -17,4 +18,5 @@ export default function (router: IRouter, config: ReportingConfig) {
registerReportSourceRoute(router);
registerMetricRoute(router);
registerNotificationRoute(router);
registerTesseractRoute(router);
}
Loading