Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Handle delete markers for objects in ES #1997

Merged
merged 6 commits into from
Jan 13, 2021
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
6 changes: 5 additions & 1 deletion catalog/app/components/SearchResults/SearchResults.js
Original file line number Diff line number Diff line change
Expand Up @@ -236,7 +236,11 @@ function VersionInfo({ bucket, path, version, versions }) {
{' from '}
<Bold>{v.updated.toLocaleString()}</Bold>
{' | '}
<Bold>{readableBytes(v.size)}</Bold>
{v.deleteMarker ? (
<Bold>Delete Marker</Bold>
) : (
<Bold>{readableBytes(v.size)}</Bold>
)}
</M.Typography>
))}
</SmallerSection>
Expand Down
149 changes: 71 additions & 78 deletions catalog/app/containers/Bucket/Overview.js
Original file line number Diff line number Diff line change
Expand Up @@ -757,89 +757,82 @@ function Head({ req, s3, overviewUrl, bucket, description }) {
const classes = useHeadStyles()
const isRODA = !!overviewUrl && overviewUrl.includes(`/${RODA_BUCKET}/`)
const colorPool = useConst(() => mkKeyedPool(COLOR_MAP))
const statsData = useData(requests.bucketStats, { req, s3, bucket, overviewUrl })
const pkgStatsData = useData(requests.bucketPkgStats, { req, bucket })
return (
<Data fetch={requests.bucketStats} params={{ req, s3, bucket, overviewUrl }}>
nl0 marked this conversation as resolved.
Show resolved Hide resolved
{(res) => (
<M.Paper className={classes.root}>
<M.Box className={classes.top}>
<M.Typography variant="h5">{bucket}</M.Typography>
{!!description && (
<M.Box mt={1}>
<M.Typography variant="body1">{description}</M.Typography>
</M.Box>
)}
{isRODA && (
<M.Box
mt={1}
position={{ md: 'absolute' }}
right={{ md: 32 }}
bottom={{ md: 31 }}
color="grey.300"
textAlign={{ md: 'right' }}
>
<M.Typography variant="body2">
From the{' '}
<M.Link href={RODA_LINK} color="inherit" underline="always">
Registry of Open Data on AWS
</M.Link>
</M.Typography>
</M.Box>
)}
<M.Box mt={{ xs: 2, sm: 3 }} display="flex" alignItems="baseline">
<StatDisplay
value={AsyncResult.prop('totalBytes', res)}
format={readableBytes}
fallback={() => '? B'}
/>
<StatDisplay
value={AsyncResult.prop('totalObjects', res)}
format={readableQuantity}
label="Objects"
fallback={() => '?'}
/>
<StatDisplay
value={AsyncResult.prop('totalPackages', res)}
format={formatQuantity}
label="Packages"
fallback={() => null}
/>
</M.Box>
<M.Paper className={classes.root}>
<M.Box className={classes.top}>
<M.Typography variant="h5">{bucket}</M.Typography>
{!!description && (
<M.Box mt={1}>
<M.Typography variant="body1">{description}</M.Typography>
</M.Box>
)}
{isRODA && (
<M.Box
p={{ xs: 2, sm: 4 }}
display="flex"
flexDirection={{ xs: 'column', md: 'row' }}
alignItems={{ md: 'flex-start' }}
position="relative"
mt={1}
position={{ md: 'absolute' }}
right={{ md: 32 }}
bottom={{ md: 31 }}
color="grey.300"
textAlign={{ md: 'right' }}
nl0 marked this conversation as resolved.
Show resolved Hide resolved
>
<ObjectsByExt
data={AsyncResult.prop('exts', res)}
width="100%"
flexShrink={1}
colorPool={colorPool}
/>
<M.Box
display="flex"
flexDirection="column"
justifyContent="center"
flexShrink={0}
height={{ xs: 32, md: '100%' }}
width={{ xs: '100%', md: 32 }}
>
<M.Hidden mdUp>
<M.Divider />
</M.Hidden>
</M.Box>
<Downloads
bucket={bucket}
colorPool={colorPool}
width="100%"
flexShrink={1}
/>
<M.Typography variant="body2">
From the{' '}
<M.Link href={RODA_LINK} color="inherit" underline="always">
Registry of Open Data on AWS
</M.Link>
</M.Typography>
</M.Box>
</M.Paper>
)}
</Data>
)}
<M.Box mt={{ xs: 2, sm: 3 }} display="flex" alignItems="baseline">
<StatDisplay
value={AsyncResult.prop('totalBytes', statsData.result)}
format={readableBytes}
fallback={() => '? B'}
/>
<StatDisplay
value={AsyncResult.prop('totalObjects', statsData.result)}
format={readableQuantity}
label="Objects"
fallback={() => '?'}
/>
<StatDisplay
value={AsyncResult.prop('totalPackages', pkgStatsData.result)}
format={formatQuantity}
label="Packages"
fallback={() => null}
/>
</M.Box>
</M.Box>
<M.Box
p={{ xs: 2, sm: 4 }}
display="flex"
flexDirection={{ xs: 'column', md: 'row' }}
alignItems={{ md: 'flex-start' }}
position="relative"
>
<ObjectsByExt
data={AsyncResult.prop('exts', statsData.result)}
width="100%"
flexShrink={1}
colorPool={colorPool}
/>
<M.Box
display="flex"
flexDirection="column"
justifyContent="center"
flexShrink={0}
height={{ xs: 32, md: '100%' }}
width={{ xs: '100%', md: 32 }}
>
<M.Hidden mdUp>
<M.Divider />
</M.Hidden>
</M.Box>
<Downloads bucket={bucket} colorPool={colorPool} width="100%" flexShrink={1} />
</M.Box>
</M.Paper>
)
}

Expand Down
29 changes: 23 additions & 6 deletions catalog/app/containers/Bucket/requests.js
Original file line number Diff line number Diff line change
Expand Up @@ -181,7 +181,6 @@ const processStats = R.applySpec({
),
totalObjects: R.path(['hits', 'total']),
totalBytes: R.path(['aggregations', 'totalBytes', 'value']),
totalPackages: R.path(['aggregations', 'totalPackageHandles', 'value']),
})

export const bucketStats = async ({ req, s3, bucket, overviewUrl }) => {
Expand All @@ -204,9 +203,7 @@ export const bucketStats = async ({ req, s3, bucket, overviewUrl }) => {
}

try {
return await req('/search', { index: `${bucket}*`, action: 'stats' }).then(
processStats,
)
return await req('/search', { index: bucket, action: 'stats' }).then(processStats)
} catch (e) {
// eslint-disable-next-line no-console
console.log('Unable to fetch live stats:')
Expand All @@ -217,6 +214,24 @@ export const bucketStats = async ({ req, s3, bucket, overviewUrl }) => {
throw new Error('Stats unavailable')
}

export const bucketPkgStats = async ({ req, bucket }) => {
try {
// TODO: use pkg_stats action when it's implemented
return await req('/search', { index: `${bucket}_packages`, action: 'stats' }).then(
R.applySpec({
totalPackages: R.path(['aggregations', 'totalPackageHandles', 'value']),
}),
)
} catch (e) {
// eslint-disable-next-line no-console
console.log('Unable to fetch package stats:')
// eslint-disable-next-line no-console
console.error(e)
}

throw new Error('Package stats unavailable')
}

const fetchFileVersioned = async ({ s3, bucket, path, version }) => {
const versionExists = await ensureObjectIsPresent({
s3,
Expand Down Expand Up @@ -432,7 +447,8 @@ export const bucketSummary = async ({ s3, req, bucket, overviewUrl, inStack }) =
// eslint-disable-next-line no-underscore-dangle
const s = (h.inner_hits.latest.hits.hits[0] || {})._source
return (
s && {
s &&
!s.delete_marker && {
bucket,
key: s.key,
version: s.version_id,
Expand Down Expand Up @@ -533,7 +549,8 @@ export const bucketImgs = async ({ req, s3, bucket, overviewUrl, inStack }) => {
// eslint-disable-next-line no-underscore-dangle
const s = (h.inner_hits.latest.hits.hits[0] || {})._source
return (
s && {
s &&
!s.delete_marker && {
bucket,
key: s.key,
version: s.version_id,
Expand Down
1 change: 1 addition & 0 deletions catalog/app/utils/search.js
Original file line number Diff line number Diff line change
Expand Up @@ -66,6 +66,7 @@ const extractObjData = ({ bucket, score, src }) => {
lastModified: parseDate(src.last_modified),
size: src.size,
meta: src.user_meta,
deleteMarker: src.delete_marker,
},
],
},
Expand Down
22 changes: 17 additions & 5 deletions lambdas/search/index.py
Original file line number Diff line number Diff line change
Expand Up @@ -103,19 +103,31 @@ def lambda_handler(request):
}
}
_source = user_source or [
'key', 'version_id', 'updated', 'last_modified', 'size', 'user_meta',
'comment', 'handle', 'hash', 'tags', 'metadata', 'pointer_file'
'key',
'version_id',
'updated',
'last_modified',
'size',
'user_meta',
'comment',
'handle',
'hash',
'tags',
'metadata',
'pointer_file',
'delete_marker',
]
size = DEFAULT_SIZE
elif action == 'stats':
body = {
"query": {"match_all": {}},
"query": {"term": {"delete_marker": False}},
"aggs": {
"totalBytes": {"sum": {"field": 'size'}},
"exts": {
"terms": {"field": 'ext'},
"aggs": {"size": {"sum": {"field": 'size'}}},
},
# TODO: move this to a separate action (pkg_stats)
"totalPackageHandles": {"value_count": {"field": "handle"}},
}
}
Expand All @@ -132,7 +144,7 @@ def lambda_handler(request):
'name': 'latest',
'size': 1,
'sort': [{'last_modified': 'desc'}],
'_source': ['key', 'version_id'],
'_source': ['key', 'version_id', 'delete_marker'],
},
},
}
Expand All @@ -155,7 +167,7 @@ def lambda_handler(request):
'name': 'latest',
'size': 1,
'sort': [{'last_modified': 'desc'}],
'_source': ['key', 'version_id'],
'_source': ['key', 'version_id', 'delete_marker'],
},
},
}
Expand Down
37 changes: 30 additions & 7 deletions lambdas/search/tests/test_search.py
Original file line number Diff line number Diff line change
Expand Up @@ -302,8 +302,19 @@ def test_search(self):
'size': 1000,
'from': 0,
'_source': ','.join([
'key', 'version_id', 'updated', 'last_modified', 'size', 'user_meta',
'comment', 'handle', 'hash', 'tags', 'metadata', 'pointer_file'
'key',
'version_id',
'updated',
'last_modified',
'size',
'user_meta',
'comment',
'handle',
'hash',
'tags',
'metadata',
'pointer_file',
'delete_marker',
]),
'terminate_after': 538,
})
Expand Down Expand Up @@ -342,8 +353,19 @@ def test_search_retry(self):
'size': 1000,
'from': 0,
'_source': ','.join([
'key', 'version_id', 'updated', 'last_modified', 'size', 'user_meta',
'comment', 'handle', 'hash', 'tags', 'metadata', 'pointer_file'
'key',
'version_id',
'updated',
'last_modified',
'size',
'user_meta',
'comment',
'handle',
'hash',
'tags',
'metadata',
'pointer_file',
'delete_marker',
]),
'terminate_after': 10000,
})
Expand Down Expand Up @@ -393,13 +415,14 @@ def test_stats(self):
def _callback(request):
payload = json.loads(request.body)
assert payload == {
"query": {"match_all": {}},
"query": {"term": {"delete_marker": False}},
"aggs": {
"totalBytes": {"sum": {"field": "size"}},
"totalBytes": {"sum": {"field": 'size'}},
"exts": {
"terms": {"field": 'ext'},
"aggs": {"size": {"sum": {"field": "size"}}},
"aggs": {"size": {"sum": {"field": 'size'}}},
},
# TODO: move this to a separate action (pkg_stats)
"totalPackageHandles": {"value_count": {"field": "handle"}},
}
}
Expand Down