Skip to content

Commit

Permalink
cdxj compatibility:
Browse files Browse the repository at this point in the history
- output 'status', 'offset', 'length' as strings in cdxj
- properly read 'content-type' for non-response records
  • Loading branch information
ikreymer committed Aug 23, 2024
1 parent 53997e6 commit ccdd4da
Showing 1 changed file with 10 additions and 2 deletions.
12 changes: 10 additions & 2 deletions src/lib/indexer.ts
Original file line number Diff line number Diff line change
Expand Up @@ -335,7 +335,15 @@ export class CDXIndexer extends Indexer {
delete result["urlkey"];
delete result["timestamp"];

return `${urlkey} ${timestamp} ${JSON.stringify(result)}\n`;
// eslint-disable-next-line @typescript-eslint/no-explicit-any
const replacer = (key: string, value: any) : any => {
if (["offset", "length", "status"].includes(key)) {
return value === null || value === undefined ? "" : "" + value;
}
return value;
}

return `${urlkey} ${timestamp} ${JSON.stringify(result, replacer)}\n`;
}

// eslint-disable-next-line @typescript-eslint/no-explicit-any
Expand Down Expand Up @@ -375,7 +383,7 @@ export class CDXIndexer extends Indexer {
break;

default:
field = "content-type";
return record.warcContentType;
}
value = super.getField(field, record);
return value ? value.toString().split(";", 1)[0]?.trim() : null;
Expand Down

0 comments on commit ccdd4da

Please sign in to comment.