Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Privacy 2024 #118

Closed
wants to merge 21 commits into from
Closed
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
45 changes: 31 additions & 14 deletions dist/ads.js
Original file line number Diff line number Diff line change
Expand Up @@ -6,9 +6,15 @@ const SELLER_TYPES = ['publisher', 'intermediary', 'both'];
const isPresent = (response, endings) => response.ok && endings.some(ending => response.url.endsWith(ending));

const fetchAndParse = async (url, parser) => {
const timeout = 5000;
/*
Google's sellers.json size is 120Mb as of May 2024 - too big for custom metrics.
It's available at realtimebidding.google.com/sellers.json, so not part of crawled pages list.
More details: https://support.google.com/authorizedbuyers/answer/9895942
*/
const controller = new AbortController();
const { signal } = controller;
setTimeout(() => controller.abort(), 5000);
setTimeout(() => controller.abort(), timeout);

try {
const response = await fetch(url, { signal });
Expand All @@ -29,6 +35,7 @@ const parseAdsTxt = async (response) => {
let result = {
present: isPresent(response, ['/ads.txt', '/app-ads.txt']),
status: response.status,
redirected: response.redirected,
};

if (result.present && content) {
Expand All @@ -48,8 +55,7 @@ const parseAdsTxt = async (response) => {
},
line_count: 0,
variables: new Set(),
variable_count: 0,
redirected: response.redirected,
variable_count: 0
}
};

Expand Down Expand Up @@ -84,9 +90,9 @@ const parseAdsTxt = async (response) => {
// Count unique and remove domain Sets for now
for (let accountType of Object.values(result.account_types)) {
accountType.domain_count = accountType.domains.size;
delete accountType.domains // Keeping a list of domains may be valuable for further research, e.g. accountType.domains = [...accountType.domains];
accountType.domains = Array.from(accountType.domains); // delete accountType.domains
}
result.variables = [...result.variables];
result.variables = Array.from(result.variables);
}

return result;
Expand Down Expand Up @@ -127,32 +133,43 @@ const parseSellersJSON = async (response) => {
seller_count: 0,
}
},
passthrough_count: 0
passthrough_count: 0,
confidential_count: 0
}
};

// Clean up file content
result.seller_count = content.sellers.length;

for (let seller of content.sellers) {
// Seller records
let type = seller.seller_type.trim().toLowerCase(),
domain = seller.domain.trim().toLowerCase();
if (Object.keys(result.seller_types).includes(type)) {
result.seller_types[type].domains.add(domain);
result.seller_types[type].seller_count += 1;
const stype = seller.seller_type.trim().toLowerCase();
// Validating records
if (!SELLER_TYPES.includes(stype) || !seller.seller_id) {
continue;
}

// Passthrough
if (seller.is_passthrough) {
result.passthrough_count += 1;
}

// Confidential
if (seller.is_confidential) {
result.confidential_count += 1;
}

// Seller records
if (seller.domain) {
const domain = seller.domain.trim().toLowerCase();
result.seller_types[stype].domains.add(domain);
result.seller_types[stype].seller_count += 1;
}
}

// Count unique and remove domain Sets for now
for (let seller_type of Object.values(result.seller_types)) {
seller_type.domain_count = seller_type.domains.size;
delete seller_type.domains //seller_type.domains = [...seller_type.domains];
seller_type.domains = Array.from(seller_type.domains); // delete seller_type.domains;
}
};

Expand All @@ -171,6 +188,6 @@ return Promise.all([
});
}).catch(error => {
return JSON.stringify({
error: error
error: error.message
});
});