Skip to content

Commit

Permalink
Implement OnLoadingFailed in CDP
Browse files Browse the repository at this point in the history
Fix infinete loop when there is two or more elements with an absolute path in the dom
Fix the data we send in JSDOM when an error happens
  • Loading branch information
sarvaje authored and molant committed Apr 28, 2017
1 parent dd85fef commit c281939
Show file tree
Hide file tree
Showing 7 changed files with 120 additions and 32 deletions.
64 changes: 48 additions & 16 deletions src/lib/collectors/cdp/cdp.ts
Original file line number Diff line number Diff line change
Expand Up @@ -22,7 +22,7 @@ import { debug as d } from '../../utils/debug';
/* eslint-disable no-unused-vars */
import {
ICollector, ICollectorBuilder,
IElementFoundEvent, IFetchEndEvent, IManifestFetchEnd, IManifestFetchErrorEvent, ITraverseUpEvent, ITraverseDownEvent,
IElementFoundEvent, IFetchEndEvent, IFetchErrorEvent, IManifestFetchEnd, IManifestFetchErrorEvent, ITraverseUpEvent, ITraverseDownEvent,
INetworkData, URL
} from '../../types';
/* eslint-enable */
Expand Down Expand Up @@ -84,10 +84,11 @@ class CDPCollector implements ICollector {
// ------------------------------------------------------------------------------

private async getElementFromParser(parts: Array<string>): Promise<CDPAsyncHTMLElement> {
let basename: string = parts.pop();
let basename: string = null;
let elements: Array<CDPAsyncHTMLElement> = [];

while (parts.length >= 0) {
while (parts.length > 0) {
basename = !basename ? parts.pop() : `${parts.pop()}/${basename}`;
const query = `[src$="${basename}"],[href$="${basename}"]`;
const newElements = await this._dom.querySelectorAll(query);

Expand All @@ -101,12 +102,8 @@ class CDPCollector implements ICollector {
}

// Just one element easy peasy
if (elements.length === 1) {
return elements[0];
}

if (parts.length > 0) {
basename = `${parts.pop()}/${basename}`;
if (newElements.length === 1) {
return newElements[0];
}

elements = newElements;
Expand All @@ -115,7 +112,7 @@ class CDPCollector implements ICollector {
/* If we reach this point, we have several elements that have the same url so we return the first
because its the one that started the request. */

return Promise.resolve(elements[0]);
return elements[0];
}

/** Returns the IAsyncHTMLElement that initiated a request */
Expand Down Expand Up @@ -170,24 +167,54 @@ class CDPCollector implements ICollector {

/** Event handler fired when HTTP request fails for some reason. */
private async onLoadingFailed(params) {
// TODO: implement this for `fetch::error` and `targetfetch::error`.
// console.error(params);
const { request: { url: resource } } = this._requests.get(params.requestId);

if (params.type === 'Manifest') {
const { request: { url: resource } } = this._requests.get(params.requestId);
const event: IManifestFetchErrorEvent = {
error: new Error(params.errorText),
resource
};

await this._server.emitAsync('manifestfetch::error', event);

return;
}

// DOM is not ready so we queue up the event for later
if (!this._dom) {
this._pendingResponseReceived.push(this.onLoadingFailed.bind(this, params));

return;
}

/* If `requestId` is not in `this._requests` it means that we already processed the request in `onResponseReceived` */
if (!this._requests.has(params.requestId)) {
debug(`requestId doesn't exist, skipping this error`);

return;
}

debug(`Error found:\n${JSON.stringify(params)}`);
const element = await this.getElementFromRequest(params.requestId);
const { request: { url: resource } } = this._requests.get(params.requestId);
const eventName = this._href === resource ? 'targetfetch::error' : 'fetch::error';

const hops = this._redirects.calculate(resource);

console.log(`Error: ${resource}`);

const event: IFetchErrorEvent = {
element,
error: params,
hops,
resource
};

await this._server.emitAsync(eventName, event);
}

/** Event handler fired when HTTP response is available and DOM loaded. */
private async onResponseReceived(params) {

// DOM is not ready so we queued up the event for later
// DOM is not ready so we queue up the event for later
if (!this._dom) {
this._pendingResponseReceived.push(this.onResponseReceived.bind(this, params));

Expand Down Expand Up @@ -249,6 +276,11 @@ class CDPCollector implements ICollector {
data.element = await this.getElementFromRequest(params.requestId);
}

/* We don't need to store the request anymore so we can remove it and ignore it
* if we receive it in `onLoadingFailed` (used only for "catastrophic" failures).
*/
this._requests.delete(params.requestId);

await this._server.emitAsync(eventName, data);
}

Expand Down
20 changes: 11 additions & 9 deletions src/lib/collectors/jsdom/jsdom.ts
Original file line number Diff line number Diff line change
Expand Up @@ -33,7 +33,6 @@ import {
} from '../../types';
/* eslint-enable */
import { JSDOMAsyncHTMLElement } from './jsdom-async-html';
import * as logger from '../../utils/logging';
import { readFileAsync } from '../../utils/misc';
import { Requester } from '../utils/requester'; //eslint-disable-line
import { Sonar } from '../../sonar'; // eslint-disable-line no-unused-vars
Expand Down Expand Up @@ -192,15 +191,17 @@ class JSDOMCollector implements ICollector {

return callback(null, resourceNetworkData.response.body.content);
} catch (err) {
const hops = this._request.getRedirects(err.uri);
const fetchError: IFetchErrorEvent = {
element: new JSDOMAsyncHTMLElement(resource.element),
error: err,
resource: resourceUrl
error: err.error,
hops,
resource: err.uri || resourceUrl
};

await this._server.emitAsync('fetch::error', fetchError);

return callback(err);
return callback(fetchError);
}
}

Expand Down Expand Up @@ -296,17 +297,18 @@ class JSDOMCollector implements ICollector {

try {
this._targetNetworkData = await this.fetchContent(target);
} catch (e) {
} catch (err) {
const hops = this._request.getRedirects(err.uri);
const fetchError: IFetchErrorEvent = {
element: null,
error: e,
error: err.error ? err.error : err,
hops,
resource: href
};

await this._server.emitAsync('targetfetch::error', fetchError);
logger.error(`Failed to fetch: ${href}`);
debug(e);
reject(e);
debug(`Failed to fetch: ${href}\n${err}`);
reject(fetchError);

return;
}
Expand Down
13 changes: 10 additions & 3 deletions src/lib/collectors/utils/requester.ts
Original file line number Diff line number Diff line change
Expand Up @@ -48,6 +48,11 @@ export class Requester {
this._request = request.defaults(options);
}

/** Return the redirects for a given `uri`. */
public getRedirects(uri: string): Array<string> {
return this._redirects.calculate(uri);
}

/** Performs a `get` to the given `uri`.
* If `Content-Type` is of type text and the charset is one of those supported by
* [`iconv-lite`](https://github.com/ashtuchkin/iconv-lite/wiki/Supported-Encodings)
Expand All @@ -62,10 +67,12 @@ export class Requester {

this._request({ uri }, async (err, response, rawBody) => {
if (err) {
debug(`Request for ${uri} failed`);
debug(err);
debug(`Request for ${uri} failed\n${err}`);

return reject(err);
return reject({
error: err,
uri
});
}

// We check if we need to redirect and call ourselves again with the new target
Expand Down
2 changes: 2 additions & 0 deletions src/lib/types/events.ts
Original file line number Diff line number Diff line change
Expand Up @@ -27,6 +27,8 @@ export interface IFetchErrorEvent {
element: IAsyncHTMLElement;
/** The error found. */
error: any;
/** The redirects performed for the url. */
hops: Array<string>
}

/** The object emitted by a collector on `traverse::start` */
Expand Down
48 changes: 44 additions & 4 deletions tests/lib/collectors/_common.ts
Original file line number Diff line number Diff line change
Expand Up @@ -20,10 +20,7 @@ import { ICollector, ICollectorBuilder } from '../../../src/lib/types'; // eslin
const testCollector = (collectorBuilder: ICollectorBuilder) => {

/* eslint-disable sort-keys */
/** The minimum set of events the collectors need to implement
*
* We need to add here the `fetch::error` ones
*/
/** The minimum set of events the collectors need to implement. */
const events = [
['targetfetch::start', { resource: 'http://localhost/' }],
['targetfetch::end', {
Expand Down Expand Up @@ -105,6 +102,37 @@ const testCollector = (collectorBuilder: ICollectorBuilder) => {
statusCode: 200,
url: 'http://localhost/style.css'
}
}],
['fetch::start', { resource: 'http://localhost/script4.js' }],
['fetch::end', {
element: {
getAttribute(attr) {
if (attr === 'href') {
return 'style.css';
}

return '';
}
},
resource: 'http://localhost/script4.js',
request: { url: 'http://localhost/script4.js' },
response: {
statusCode: 404,
url: 'http://localhost/script4.js'
}
}],
['fetch::error', {
element: {
getAttribute(attr) {
if (attr === 'href') {
return 'test://fa.il';
}

return '';
}
},
resource: 'test://fa.il',
hops: ['http://localhost/script5.js']
}]
];
/* eslint-enable sort-keys */
Expand Down Expand Up @@ -222,6 +250,11 @@ const testCollector = (collectorBuilder: ICollectorBuilder) => {
content: 'script2.js',
status: 302
},
'/script4.js': {
content: 'script4.js',
status: 404
},
'/script5.js': null,
'/style.css': fs.readFileSync(path.join(__dirname, './fixtures/common/style.css'), 'utf8')
});

Expand All @@ -238,6 +271,13 @@ const testCollector = (collectorBuilder: ICollectorBuilder) => {
invokes.push(emitAsync.getCall(i).args);
}

/* We tests there was just a single `fetch::error` emitted. */
const fetchErrorEvents = _.filter(invokes, (invoke) => {
return invoke[0] === 'fetch::error';
});

t.is(fetchErrorEvents.length, 1);

pendingEvents.forEach((event) => {
t.true(validEvent(invokes, event), `Event ${event[0]}/${event[1].resource} has the same properties`);
});
Expand Down
Binary file added tests/lib/collectors/fixtures/common/edge.png
Loading
Sorry, something went wrong. Reload?
Sorry, we cannot display this file.
Sorry, this file is invalid so it cannot be displayed.
5 changes: 5 additions & 0 deletions tests/lib/collectors/fixtures/common/index.html
Original file line number Diff line number Diff line change
Expand Up @@ -6,6 +6,8 @@
var a = 'a';
</script>
<script src="/script3.js"></script>
<script src="/script4.js"></script>
<script src="/script5.js"></script>
<style>
h1 {
font-size: 2em;
Expand All @@ -17,5 +19,8 @@
<body>
<h1>Common collector test page</h1>
<p>The content of this HTML is used to validate that all collectors that parse full HTML send the same events.</p>
<p>These two images are to test that we are not entering in a infinite loop when we have more than one element with the same src or href</p>
<img src='http://localhost/edge.png'/>
<img src='http://localhost/edge.png'/>
</body>
</html>

0 comments on commit c281939

Please sign in to comment.