Skip to content

Commit

Permalink
Add tesseract and foreign object rendering (opensearch-project#86) (o…
Browse files Browse the repository at this point in the history
…pensearch-project#87)

Signed-off-by: Joshua Li <[email protected]>
(cherry picked from commit 503ee051dd909a970aaf0864114624d805a1a2cf)

Co-authored-by: Joshua Li <[email protected]>
  • Loading branch information
opensearch-trigger-bot[bot] and joshuali925 authored Apr 11, 2023
1 parent dd7221e commit b746a0e
Show file tree
Hide file tree
Showing 11 changed files with 543 additions and 70 deletions.
3 changes: 3 additions & 0 deletions NOTICE.txt
Original file line number Diff line number Diff line change
@@ -1,2 +1,5 @@
OpenSearch (https://opensearch.org/)
Copyright OpenSearch Contributors

This product includes software developed by
naptha (https://github.com/naptha/tesseract.js/)
2 changes: 2 additions & 0 deletions common/tesseract/.gitignore
Original file line number Diff line number Diff line change
@@ -0,0 +1,2 @@
*
!.gitignore
5 changes: 3 additions & 2 deletions package.json
Original file line number Diff line number Diff line change
Expand Up @@ -14,7 +14,7 @@
"cypress:run": "cypress run",
"cypress:open": "cypress open",
"plugin-helpers": "node ../../scripts/plugin_helpers",
"postinstall": "node ./scripts/patch-html2canvas.js"
"postinstall": "node ./scripts/postinstall.js"
},
"dependencies": {
"babel-polyfill": "^6.26.0",
Expand All @@ -38,7 +38,8 @@
"react-router-dom": "^5.3.0",
"react-toast-notifications": "^2.4.0",
"set-interval-async": "1.0.33",
"showdown": "^1.9.1"
"showdown": "^1.9.1",
"tesseract.js": "^4.0.2"
},
"devDependencies": {
"@elastic/eslint-import-resolver-kibana": "link:../../packages/osd-eslint-import-resolver-opensearch-dashboards",
Expand Down
54 changes: 27 additions & 27 deletions public/components/context_menu/context_menu_ui.js

Large diffs are not rendered by default.

1 change: 1 addition & 0 deletions public/components/visual_report/assets/report_styles.ts
Original file line number Diff line number Diff line change
Expand Up @@ -8,6 +8,7 @@ html,
body {
margin: 0;
padding: 0;
padding-top: 0px;
}
iframe, embed, object {
Expand Down
138 changes: 105 additions & 33 deletions public/components/visual_report/generate_report.ts
Original file line number Diff line number Diff line change
Expand Up @@ -6,6 +6,7 @@
import createDOMPurify from 'dompurify';
import html2canvas from 'html2canvas';
import jsPDF from 'jspdf';
import { createWorker } from 'tesseract.js';
import { v1 as uuidv1 } from 'uuid';
import { ReportSchemaType } from '../../../server/model';
import { uiSettingsService } from '../utils/settings_service';
Expand Down Expand Up @@ -57,15 +58,16 @@ const removeNonReportElements = (
reportSource: VISUAL_REPORT_TYPE
) => {
// remove buttons
doc.querySelectorAll("button[class^='euiButton']:not(.visLegend__button)").forEach((e) => e.remove());
doc
.querySelectorAll("button[class^='euiButton']:not(.visLegend__button)")
.forEach((e) => e.remove());
// remove top navBar
doc.querySelectorAll("[class^='euiHeader']").forEach((e) => e.remove());
// remove visualization editor
if (reportSource === VISUAL_REPORT_TYPE.visualization) {
doc.querySelector('[data-test-subj="splitPanelResizer"]')?.remove();
doc.querySelector('.visEditor__collapsibleSidebar')?.remove();
}
doc.body.style.paddingTop = '0px';
};

const addReportHeader = (doc: Document, header: string) => {
Expand Down Expand Up @@ -96,8 +98,10 @@ const addReportFooter = (doc: Document, footer: string) => {

const addReportStyle = (doc: Document, style: string) => {
const styleElement = document.createElement('style');
styleElement.className = 'reportInjectedStyles';
styleElement.innerHTML = style;
doc.getElementsByTagName('head')[0].appendChild(styleElement);
doc.body.style.paddingTop = '0px';
};

const computeHeight = (height: number, header: string, footer: string) => {
Expand All @@ -115,6 +119,7 @@ const computeHeight = (height: number, header: string, footer: string) => {

export const generateReport = async (id: string, forceDelay = 15000) => {
const http = uiSettingsService.getHttpClient();
const useForeignObjectRendering = uiSettingsService.get('reporting:useFOR');
const DOMPurify = createDOMPurify(window);

const report = await http.get<ReportSchemaType>(
Expand Down Expand Up @@ -154,6 +159,26 @@ export const generateReport = async (id: string, forceDelay = 15000) => {
}
await timeout(forceDelay);

// Style changes onclone does not work with foreign object rendering enabled.
// Additionally increase span width to prevent text being truncated
if (useForeignObjectRendering) {
document
.querySelectorAll<HTMLSpanElement>('span:not([data-html2canvas-ignore])')
.forEach((el) => {
if (!el.closest('.globalFilterItem'))
el.style.width = el.offsetWidth + 30 + 'px';
});
document
.querySelectorAll<HTMLSpanElement>(
'span.globalFilterItem:not([data-html2canvas-ignore])'
)
.forEach((el) => (el.style.width = el.offsetWidth + 5 + 'px'));
addReportHeader(document, header);
addReportFooter(document, footer);
addReportStyle(document, reportingStyle);
await timeout(1000);
}

const width = document.documentElement.scrollWidth;
const height = computeHeight(
document.documentElement.scrollHeight,
Expand All @@ -170,40 +195,87 @@ export const generateReport = async (id: string, forceDelay = 15000) => {
imageTimeout: 30000,
useCORS: true,
removeContainer: false,
allowTaint: true,
foreignObjectRendering: useForeignObjectRendering,
onclone: function (documentClone) {
removeNonReportElements(documentClone, reportSource);
addReportHeader(documentClone, header);
addReportFooter(documentClone, footer);
addReportStyle(documentClone, reportingStyle);
if (!useForeignObjectRendering) {
addReportHeader(documentClone, header);
addReportFooter(documentClone, footer);
addReportStyle(documentClone, reportingStyle);
}
},
}).then(function (canvas) {
// TODO remove this and 'removeContainer: false' when https://github.com/niklasvh/html2canvas/pull/2949 is merged
document
.querySelectorAll<HTMLIFrameElement>('.html2canvas-container')
.forEach((e) => {
const iframe = e.contentWindow;
if (e) {
e.src = 'about:blank';
if (iframe) {
iframe.document.write('');
iframe.document.clear();
iframe.close();
})
.then(async function (canvas) {
// TODO remove this and 'removeContainer: false' when https://github.com/niklasvh/html2canvas/pull/2949 is merged
document
.querySelectorAll<HTMLIFrameElement>('.html2canvas-container')
.forEach((e) => {
const iframe = e.contentWindow;
if (e) {
e.src = 'about:blank';
if (iframe) {
iframe.document.write('');
iframe.document.clear();
iframe.close();
}
e.remove();
}
e.remove();
}
});
});

if (format === 'png') {
const link = document.createElement('a');
link.download = fileName;
link.href = canvas.toDataURL();
link.click();
} else {
const orient = canvas.width > canvas.height ? 'landscape' : 'portrait';
const pdf = new jsPDF(orient, 'px', [canvas.width, canvas.height]);
pdf.addImage(canvas, 'JPEG', 0, 0, canvas.width, canvas.height);
pdf.save(fileName);
}
return true;
});
if (format === 'png') {
const link = document.createElement('a');
link.download = fileName;
link.href = canvas.toDataURL();
link.click();
} else if (uiSettingsService.get('reporting:useOcr')) {
const worker = await createWorker({
workerPath: '../api/reporting/tesseract.js/dist/worker.min.js',
langPath: '../api/reporting/tesseract-lang-data',
corePath: '../api/reporting/tesseract.js-core/tesseract-core.wasm.js',
});
await worker.loadLanguage('eng');
await worker.initialize('eng');
const {
data: { text, pdf },
} = await worker
.recognize(canvas.toDataURL(), { pdfTitle: fileName }, { pdf: true })
.catch((e) => console.error('recognize', e));
await worker.terminate();

const blob = new Blob([new Uint8Array(pdf)], {
type: 'application/pdf',
});
const link = document.createElement('a');
if (link.download !== undefined) {
const url = URL.createObjectURL(blob);
link.setAttribute('href', url);
link.setAttribute('download', fileName);
link.style.visibility = 'hidden';
document.body.appendChild(link);
link.click();
document.body.removeChild(link);
}
} else {
const orient = canvas.width > canvas.height ? 'landscape' : 'portrait';
const pdf = new jsPDF(orient, 'px', [canvas.width, canvas.height]);
pdf.addImage(canvas, 'JPEG', 0, 0, canvas.width, canvas.height);
pdf.save(fileName);
}
return true;
})
.finally(() => {
if (useForeignObjectRendering) {
document
.querySelectorAll<HTMLSpanElement>(
'span:not(.data-html2canvas-ignore)'
)
.forEach((el) => (el.style.width = ''));
document.querySelectorAll('.reportWrapper').forEach((e) => e.remove());
document
.querySelectorAll('.reportInjectedStyles')
.forEach((e) => e.remove());
document.body.style.paddingTop = '';
}
});
};
15 changes: 15 additions & 0 deletions scripts/patch-html2canvas.js → scripts/postinstall.js
Original file line number Diff line number Diff line change
Expand Up @@ -5,6 +5,8 @@

// @ts-check
// workaround for Safari support before https://github.com/niklasvh/html2canvas/pull/2911 is merged
const https = require('https');
const fs = require('fs');
const replace = require('replace-in-file');

const options = {
Expand All @@ -31,3 +33,16 @@ try {
error
);
}

// download tesseract model
const modelFile = fs.createWriteStream(__dirname + '/../common/tesseract/eng.traineddata.gz');
https.get(
'https://raw.githubusercontent.com/naptha/tessdata/gh-pages/4.0.0_best/eng.traineddata.gz',
function (response) {
response.pipe(modelFile);
modelFile.on('finish', () => {
modelFile.close();
console.log('Downloaded eng.traineddata.gz for tesseract.js');
});
}
);
32 changes: 25 additions & 7 deletions server/plugin.ts
Original file line number Diff line number Diff line change
Expand Up @@ -3,23 +3,24 @@
* SPDX-License-Identifier: Apache-2.0
*/

import { schema } from '@osd/config-schema';
import {
PluginInitializerContext,
CoreSetup,
CoreStart,
Plugin,
Logger,
ILegacyClusterClient,
Logger,
Plugin,
PluginInitializerContext,
} from '../../../src/core/server';
import opensearchReportsPlugin from './backend/opensearch-reports-plugin';
import { NotificationsPlugin } from './clusters/notificationsPlugin';
import { buildConfig, ReportingConfigType } from './config';
import { ReportingConfig } from './config/config';
import registerRoutes from './routes';
import {
ReportsDashboardsPluginSetup,
ReportsDashboardsPluginStart,
} from './types';
import registerRoutes from './routes';
import { NotificationsPlugin } from './clusters/notificationsPlugin';
import { buildConfig, ReportingConfigType } from './config';
import { ReportingConfig } from './config/config';

export interface ReportsPluginRequestContext {
logger: Logger;
Expand Down Expand Up @@ -49,6 +50,23 @@ export class ReportsDashboardsPlugin
public async setup(core: CoreSetup) {
this.logger.debug('reports-dashboards: Setup');

core.uiSettings.register({
'reporting:useOcr': {
name: 'Reporting use OCR on PDF',
value: false,
description:
'Whether to run optical character recognition on PDF reports to make text selectable',
schema: schema.boolean(),
},
'reporting:useFOR': {
name: 'Reporting use ForeignObject rendering',
value: true,
description:
'Whether to use ForeignObject rendering when generating reports. If it causes issues, try disabling this option.',
schema: schema.boolean(),
},
});

try {
const config = await buildConfig(
this.initializerContext,
Expand Down
2 changes: 2 additions & 0 deletions server/routes/index.ts
Original file line number Diff line number Diff line change
Expand Up @@ -8,6 +8,7 @@ import registerReportDefinitionRoute from './reportDefinition';
import registerReportSourceRoute from './reportSource';
import registerMetricRoute from './metric';
import registerNotificationRoute from './notifications';
import registerTesseractRoute from './tesseract';
import { IRouter } from '../../../../src/core/server';
import { ReportingConfig } from 'server/config/config';

Expand All @@ -17,4 +18,5 @@ export default function (router: IRouter, config: ReportingConfig) {
registerReportSourceRoute(router);
registerMetricRoute(router);
registerNotificationRoute(router);
registerTesseractRoute(router);
}
Loading

0 comments on commit b746a0e

Please sign in to comment.