Skip to content

Commit

Permalink
Merge pull request #710 from microlinkhq/next
Browse files Browse the repository at this point in the history
fix(logo-favicon): verify body first character
  • Loading branch information
Kikobeats authored Jun 18, 2024
2 parents d8bc4a1 + a3665f4 commit 2fa3670
Show file tree
Hide file tree
Showing 6 changed files with 33 additions and 11 deletions.
23 changes: 16 additions & 7 deletions packages/metascraper-logo-favicon/src/index.js
Original file line number Diff line number Diff line change
Expand Up @@ -21,9 +21,8 @@ const SIZE_REGEX_BY_X = /\d+x\d+/

const toLogo = toRule(logo)

const isValidContenType = (contentType, contentTypes) => {
return contentType && contentTypes.some(ct => contentType.includes(ct))
}
const isValidContenType = (contentType, contentTypes) =>
contentType && contentTypes.some(ct => contentType.includes(ct))

const toSize = (input, url) => {
if (isEmpty(input)) return
Expand Down Expand Up @@ -105,13 +104,16 @@ const firstReachable = async (domNodeSizes, gotOpts) => {
const contentType = response.headers['content-type']

const urlExtension = extension(url)

const contentTypes = ALLOWED_EXTENSION_CONTENT_TYPES.find(
([ext]) => ext === urlExtension
)

if (contentTypes && !isValidContenType(contentType, contentTypes[1])) {
continue
}
if (
contentTypes &&
(!isValidContenType(contentType, contentTypes[1]) ||
response.body.toString()[0] === '<')
) { continue }

return response.url
}
Expand Down Expand Up @@ -142,7 +144,14 @@ const createFavicon = ([ext, contentTypes]) => {
const response = await reachableUrl(faviconUrl, gotOpts)
if (!reachableUrl.isReachable(response)) return undefined
const contentType = response.headers['content-type']
return isValidContenType(contentType, contentTypes) && response.url

if (
contentTypes &&
(!isValidContenType(contentType, contentTypes) ||
response.body.toString()[0] === '<')
) { return undefined }

return response.url
}
}

Expand Down
2 changes: 1 addition & 1 deletion packages/metascraper-logo-favicon/test/favicon.js
Original file line number Diff line number Diff line change
Expand Up @@ -36,7 +36,7 @@ test("don't resolve favicon.ico with no valid content-type", async t => {
res.setHeader('content-type', 'image/svg+xml; charset=utf-8')
res.end('<svg></svg>')
})
t.is(await faviconICO(url), false)
t.is(await faviconICO(url), undefined)
})

test("favicon.png with 'image/png' content-type", async t => {
Expand Down
13 changes: 13 additions & 0 deletions packages/metascraper-logo-favicon/test/index.js
Original file line number Diff line number Diff line change
Expand Up @@ -267,6 +267,19 @@ test("favicon.ico detected in HTML markup can't be random content-type", async t
t.is(metadata.logo, null)
})

test("don't trust in favicon.ico content-type", async t => {
const url = await runServer(t, async ({ res }) => {
res.setHeader('content-type', 'image/x-icon')
res.end('<svg></svg>')
})

const html =
'<link rel="icon" href="/favicon.ico" type="image/x-icon" sizes="120x116">'
const metascraper = createMetascraper()
const metadata = await metascraper({ url, html })
t.is(metadata.logo, null)
})

test('favicon.ico detected in HTML markup can be `image/x-icon` content-type', async t => {
const url = await runServer(t, async ({ res }) => {
res.setHeader('content-type', 'image/x-icon')
Expand Down
5 changes: 3 additions & 2 deletions packages/metascraper/test/integration/substack/index.js
Original file line number Diff line number Diff line change
Expand Up @@ -26,7 +26,8 @@ const url =

test('substack', async t => {
const html = await readFile(resolve(__dirname, 'input.html'))
const { date, ...metadata } = await metascraper({ html, url })
t.is(typeof date, 'string')
const { date, logo, ...metadata } = await metascraper({ html, url })
t.snapshot(metadata)
t.is(typeof date, 'string')
t.true(logo.includes('gstatic'))
})
Original file line number Diff line number Diff line change
Expand Up @@ -14,7 +14,6 @@ Generated by [AVA](https://avajs.dev).
description: 'The world is a very malleable place. When I read biographies, early lives leap out the most. Leonardo da Vinci was a studio apprentice to Verrocchio at 14. Walt Disney took on a number of jobs, chiefly delivering papers, from 11 years old. Vladimir Nabokov published his first book (a collection of poems) at 16, while still in school. Andrew Carnegie',
image: 'https://substackcdn.com/image/fetch/w_1200,h_600,c_fill,f_jpg,q_auto:good,fl_progressive:steep,g_auto/https%3A%2F%2Fbucketeer-e05bbc84-baa3-437e-9518-adb32be77984.s3.amazonaws.com%2Fpublic%2Fimages%2Fef3bd0df-b9fa-4358-afee-116c23f4c55f_2560x1902.jpeg',
lang: 'en',
logo: 'https://t1.gstatic.com/faviconV2?client=SOCIAL&type=FAVICON&fallback_opts=TYPE,SIZE,URL&url=https://simonsarris.substack.com/p/the-most-precious-resource-is-agency&size=128',
publisher: 'The Map is Mostly Water',
title: 'The Most Precious Resource is Agency',
url: 'https://map.simonsarris.com/p/the-most-precious-resource-is-agency',
Expand Down
Binary file not shown.

0 comments on commit 2fa3670

Please sign in to comment.