Skip to content

Commit

Permalink
Move generate script language and year config to JSON (#867)
Browse files Browse the repository at this point in the history
* Improve Ebook generation

* Generalise generate chapters for multi-years

* Make ebook generation dynamic

* Code cleanup
  • Loading branch information
tunetheweb authored Jun 3, 2020
1 parent 7b55230 commit 21201ff
Show file tree
Hide file tree
Showing 13 changed files with 170 additions and 70 deletions.
7 changes: 2 additions & 5 deletions src/README.md
Original file line number Diff line number Diff line change
Expand Up @@ -63,12 +63,9 @@ npm run generate
4. To actually generate the ebooks, start your local server, then run the following:

```
npm run ebook_2019_en
npm run ebook_2019_ja
npm run ebooks
```

(TODO: make this a script to handle all languages and years at some point)

It is also possible to generate the ebook from the website, with some optional params (e.g. to print it!)

```
Expand All @@ -79,7 +76,7 @@ Note `--pdf-profile='PDF/UA-1'` may not be needed if just intend to print.

Params accepted are:

- print - this ads left, right pages, footnotes, and sets roman numerals for front matter page numbers and adds footnotes. It is used by default when running `npm run ebook_2019_en` but we could change that if prefer a less print-like ebook.
- print - this ads left, right pages, footnotes, and sets roman numerals for front matter page numbers and adds footnotes. It is used by default when running `npm run ebooks` but we could change that if prefer a less print-like ebook.
- page-size - this allows you to override the default page size of A4
- inside-margin - this allows you to set an inside margin for binding (e.g. on right for left hand pages and vice versa)

Expand Down
2 changes: 1 addition & 1 deletion src/config.py
Original file line number Diff line number Diff line change
Expand Up @@ -50,7 +50,7 @@ def get_languages(json_config):

def get_live(json_config):
is_live = False
data = get_entries_from_json(json_config,'settings','isLive')
data = get_entries_from_json(json_config,'settings','is_live')
for list in data:
if list == True:
is_live = True
Expand Down
5 changes: 3 additions & 2 deletions src/config/2019.json
Original file line number Diff line number Diff line change
@@ -1,8 +1,9 @@
{
"settings": [
{
"isLive": true,
"supported_languages": ["en","es","fr","ja"]
"is_live": true,
"supported_languages": ["en","es","fr","ja"],
"ebook_languages": ["en","ja"]
}
],
"outline": [
Expand Down
3 changes: 1 addition & 2 deletions src/package.json
Original file line number Diff line number Diff line change
Expand Up @@ -15,8 +15,7 @@
"homepage": "https://github.com/HTTPArchive/almanac.httparchive.org#readme",
"scripts": {
"generate": "node ./tools/generate",
"ebook_2019_en": "prince http://127.0.0.1:8080/en/2019/ebook?print -o static/pdfs/web_almanac_2019_en.pdf --pdf-profile='PDF/UA-1'",
"ebook_2019_ja": "prince http://127.0.0.1:8080/ja/2019/ebook?print -o static/pdfs/web_almanac_2019_ja.pdf --pdf-profile='PDF/UA-1'",
"ebooks": "node ./tools/generate/generate_ebook_pdfs",
"deploy": "echo \"Y\" | gcloud app deploy --project webalmanac --stop-previous-version"
},
"devDependencies": {
Expand Down
40 changes: 20 additions & 20 deletions src/templates/en/2019/ebook.html

Large diffs are not rendered by default.

40 changes: 20 additions & 20 deletions src/templates/ja/2019/ebook.html

Large diffs are not rendered by default.

10 changes: 10 additions & 0 deletions src/templates/sitemap.xml
Original file line number Diff line number Diff line change
Expand Up @@ -350,5 +350,15 @@
<loc>https://almanac.httparchive.org/ja/accessibility-statement</loc>
<lastmod>2020-06-03</lastmod>
</url>

<url>
<loc>https://almanac.httparchive.org/static/pdfs/web_almanac_2019_en.pdf</loc>
<lastmod>2020-05-20</lastmod>
</url>

<url>
<loc>https://almanac.httparchive.org/static/pdfs/web_almanac_2019_ja.pdf</loc>
<lastmod>2020-05-20</lastmod>
</url>

</urlset>
27 changes: 19 additions & 8 deletions src/tools/generate/generate_chapters.js
Original file line number Diff line number Diff line change
Expand Up @@ -3,10 +3,7 @@ const showdown = require('showdown');
const ejs = require('ejs');
const prettier = require('prettier');

//Chapters may exist but not be ready to be launched so do not include in sitemap
const sitemap_languages = ['en','es','fr','ja'];

const { find_files, size_of, parse_array } = require('./shared');
const { find_markdown_files, find_config_files, size_of, parse_array } = require('./shared');
const { generate_table_of_contents } = require('./generate_table_of_contents');
const { generate_header_links } = require('./generate_header_links');
const { generate_figure_ids } = require('./generate_figure_ids');
Expand All @@ -23,10 +20,24 @@ converter.setOption('tablesHeaderId', false);
converter.setOption('ghMentions', false);

const generate_chapters = async () => {

let sitemap = [];
let sitemap_languages = {};
let ebook_chapters = [];
let configs = {};

for (const config_file of await find_config_files()) {
const re = (process.platform != 'win32')
? /config\/([0-9]*).json/
: /config\\([0-9]*).json/;
const [path,year] = config_file.match(re);

configs[year] = JSON.parse(await fs.readFile(`config/${year}.json`, 'utf8'));
sitemap_languages[year] = configs[year].settings[0].supported_languages

for (const file of await find_files()) {
}

for (const file of await find_markdown_files()) {
const re = (process.platform != 'win32')
? /content\/(.*)\/(.*)\/(.*).md/
: /content\\(.*)\\(.*)\\(.*).md/;
Expand All @@ -37,7 +48,7 @@ const generate_chapters = async () => {

const markdown = await fs.readFile(file, 'utf-8');
const { metadata, body, toc } = await parse_file(markdown,chapter);
if ( sitemap_languages.includes(language) ) {
if ( sitemap_languages[year].includes(language) ) {
sitemap.push({ language, year, chapter, metadata });
}
ebook_chapters.push({ language, year, chapter, metadata, body, toc });
Expand All @@ -49,10 +60,10 @@ const generate_chapters = async () => {
}
}

await generate_ebooks(ebook_chapters,configs);

const sitemap_path = await generate_sitemap(sitemap);
await size_of(sitemap_path);

await generate_ebooks(ebook_chapters);
};

const parse_file = async (markdown,chapter) => {
Expand Down
49 changes: 49 additions & 0 deletions src/tools/generate/generate_ebook_pdfs.js
Original file line number Diff line number Diff line change
@@ -0,0 +1,49 @@
const fs = require('fs-extra');
const { exec } = require("child_process");

const { find_config_files } = require('./shared');

const generate_ebook_pdfs = async () => {

let configs = {};
let ebook_languages = {};

// Read all the config files
for (const config_file of await find_config_files()) {
const re = (process.platform != 'win32')
? /config\/([0-9]*).json/
: /config\\([0-9]*).json/;
const [path,year] = config_file.match(re);

configs[year] = JSON.parse(await fs.readFile(`config/${year}.json`, 'utf8'));
ebook_languages[year] = configs[year].settings[0].ebook_languages

}

//Generate all the configured ebook pdfs
for(let year in ebook_languages) {
console.log('Ebooks configured for',year, ':',ebook_languages[year]);
ebook_languages[year].forEach((language) => {
console.log('Generating ebook for',year,language);
const command = `prince http://127.0.0.1:8080/${language}/${year}/ebook?print -o static/pdfs/web_almanac_${year}_${language}.pdf --pdf-profile='PDF/UA-1'`;
exec (command, (err, stdout, stderr) => {
if (err) {
//some err occurred
console.error(err)
} else {
// the *entire* stdout and stderr (buffered)
console.log(`stdout: ${stdout}`);
console.log(`stderr: ${stderr}`);
}
});
});
}
};

(async () => {
// Can uncomment this to get latest timestamps from origin:master
// let { generate_last_updated } = require('./generate_last_updated');
// await generate_last_updated();

await generate_ebook_pdfs();
})();
17 changes: 9 additions & 8 deletions src/tools/generate/generate_ebooks.js
Original file line number Diff line number Diff line change
Expand Up @@ -4,11 +4,6 @@ const prettier = require('prettier');

const { size_of } = require('./shared');

// TODO: Make this more dynamic.
const ebooks_to_generate = {
'2019': ['en','ja']
};

const update_links = (chapter) => {
let body = chapter.body;
// Replace current chapter links to full anchor link (e.g. #introduction -> #javascript-introduction)
Expand All @@ -34,9 +29,15 @@ const update_links = (chapter) => {
return body;
}

const generate_ebooks = async (ebook_chapters) => {
for (let [year, languages] of Object.entries(ebooks_to_generate)) {
let config = JSON.parse(await fs.readFile(`config/${year}.json`, 'utf8'));
const generate_ebooks = async (ebook_chapters,configs) => {

// Get distinct years
const years = [...new Set(ebook_chapters.map((x) => `${x.year}`))];

for (const year of years) {

const config = configs[year];
const languages = config.settings[0].ebook_languages;

for (let language of languages) {
let ebook = { language, config, toc: [], parts: [] };
Expand Down
4 changes: 2 additions & 2 deletions src/tools/generate/generate_last_updated.js
Original file line number Diff line number Diff line change
Expand Up @@ -5,7 +5,7 @@
// Against git if you want to.
const fs = require('fs-extra');
const { execSync } = require('child_process');
const { find_files } = require('./shared');
const { find_markdown_files } = require('./shared');
const { find_template_files } = require('./shared');

const generate_last_updated = async () => {
Expand All @@ -20,7 +20,7 @@ const generate_last_updated = async () => {
return
}

for (const file of await find_files()) {
for (const file of await find_markdown_files()) {
console.log(`\n Setting the last_updated field on ${file}`);

// Fetch the last modified date, according to the git log.
Expand Down
21 changes: 21 additions & 0 deletions src/tools/generate/generate_sitemap.js
Original file line number Diff line number Diff line change
Expand Up @@ -11,6 +11,7 @@ const static_pages = [
'contributors.html',
'accessibility_statement.html'
];
const ebook_path = "static/pdfs/web_almanac_";

const generate_sitemap = async (sitemap_chapters) => {

Expand Down Expand Up @@ -39,6 +40,7 @@ const generate_sitemap = async (sitemap_chapters) => {
};

const get_static_pages = async (sitemap_chapters) => {

// Get distinct languages and years
const languages_and_years = [...new Set(sitemap_chapters.map((x) => `${x.language}/${x.year}`))];

Expand All @@ -61,6 +63,25 @@ const get_static_pages = async (sitemap_chapters) => {
}
}

// For ebooks find out if the PDF exists, get lastmod from template
const years = [...new Set(sitemap_chapters.map((x) => `${x.year}`))];
const languages = [...new Set(sitemap_chapters.map((x) => `${x.language}`))];
for (const year of years) {
for (const language of languages) {
const ebook_pdf = ebook_path + year + '_' + language + '.pdf';
const ebook_html = 'templates/' + language + '/' + year + '/ebook.html';
if (fs.existsSync(ebook_pdf)) {
if (fs.existsSync(ebook_html)) {
const file = await fs.readFile(ebook_html, 'utf-8');
const match = file.match(/"last_updated":"([0-9\-\+\:T]*)/);
const lastmod = set_min_date(match[1]);
const url = ebook_pdf;
urls.push({ url, lastmod });
}
}
}
}

return urls;
};

Expand Down
15 changes: 13 additions & 2 deletions src/tools/generate/shared.js
Original file line number Diff line number Diff line change
Expand Up @@ -12,7 +12,7 @@ const find_template_files = async () => {
return await recursive('templates', [filter]);
};

const find_files = async () => {
const find_markdown_files = async () => {
const filter = (file, stats) => {
const isMd = file && file.endsWith('.md');
const isDirectory = stats && stats.isDirectory();
Expand All @@ -23,6 +23,16 @@ const find_files = async () => {
return await recursive('content', [filter]);
};

const find_config_files = async () => {
const filter = (file, stats) => {
const isJSON = file && file.endsWith('.json')

return !isJSON;
};

return await recursive('config', [filter]);
};

const size_of = async (path) => {
let b = (await fs.stat(path)).size;

Expand All @@ -45,8 +55,9 @@ const parse_array = (array_as_string) => {
};

module.exports = {
find_files,
find_markdown_files,
find_template_files,
find_config_files,
size_of,
parse_array
};

0 comments on commit 21201ff

Please sign in to comment.