diff --git a/README.md b/README.md index 6ac1bfc..4ff7c02 100644 --- a/README.md +++ b/README.md @@ -1,67 +1,72 @@ -# pandoc-doi2cite -This pandoc lua filiter helps users to insert references in a document with using DOI(Digital Object Identifier) tags. -With this filter, users do not need to make bibtex file by themselves. Instead, the filter automatically generate .bib file from the DOI tags, and convert the DOI tags into citation keys available by `pandoc-crossref`. - - - -What the filter do are as follows: - -1. Search citations with DOI tags in the document -2. Get bibtex data of the DOI from http://api.crossref.org -3. Add reference data to a .bib file -4. Replace DOI tags to the correspoinding citation keys. - -# Prerequisites -- Pandoc version 2.0 or newer -- This filter does not need any external dependencies -- This filter must be executed before `pandoc-crossref` or `--citeproc` - -# DOI tags -Following DOI tags can be used: -* @https://doi.org/ -* @doi.org/ -* @DOI: -* @doi: - -The first one (@https://doi.org/) may be the most useful because it is same as the accessible URL. - -# Specify auto-generated bibliography file path -The path of the auto-generated bibliography file can be designated in the document yaml header. -The yaml key is `bib_from_doi`. -Both of the string and array are acceptable(If it is given as an array, only first item will be used). -Note that users typically should add same file path also in `bibliography`, in order to be recognized by `--citeproc`. - -# Example -example paper.md: - -
----
-bibliography:
-  - "doi_refs.bib"
-  - "my_refs.bib"
-bib_from_doi:
-  - "doi_refs.bib"
----
-
-# Introduction
-Electrophoresis is one of the most usable methodologies to separate proteins.[@https://doi.org/10.1038/227680a0]
-By the way, Einstein is genius.[@doi.org/10.1002/andp.19053221004; @doi:10.1002/andp.19053220806]
-
-
-
- -Example command 1 (.md -> .md) - -```sh -pandoc --lua-filter doi2cite.lua --wrap=preserve -s example.md -o ourpur.md -``` - -Example command 2 (.md -> .pdf with [JACS](https://pubs.acs.org/journal/jacsat) style): - -```sh -pandoc --lua-filter doi2cite.lua --filter=pandoc-crossref --citeproc --csl=journal-of-the-american-chemical-society.csl -s example.md -o example.pdf -``` - -Example result - -![4](https://user-images.githubusercontent.com/30950088/117563789-3947be80-b0e3-11eb-974c-565af3902dca.png) +# pandoc-doi2cite +This pandoc lua filiter helps users to insert references in a document +with using DOI(Digital Object Identifier) tags. With this filter, users +do not need to make bibtex file by themselves. Instead, the filter +automatically generate .bib file from the DOI tags, and convert the DOI +tags into citation keys available by --citeproc. + + + +What the filter do are as follows: +1. Search citations with DOI tags in the document +2. Search corresponding bibtex data from `__from_DOI.bib` file +3. If not found, get bibtex data of the DOI from + http://api.crossref.org +4. Add reference data to `__from_DOI.bib` file +5. Check duplications of reference keys +6. Replace DOI tags to the correspoinding citation keys + +# Prerequisites +- Pandoc version 2.0 or newer +- This filter does not need any external dependencies +- This filter should be executed before `pandoc-crossref` or + `--citeproc` + +# DOI tags +Following DOI tags can be used: +- @https://doi.org/ +- @doi.org/ +- @DOI: +- @doi: + +The first one (@https://doi.org/) may be the most useful because it is +same as the accessible URL. + +# YAML header +The file **name** of the auto-generated bibliography file **MUST** be +`__from_DOI.bib`, but the **place** of the file can be changed (e.g. +`'./refs/__from_DOI.bib'` or `'refs\\__from_DOI.bib'` for Windows). Yo +u can designate the filepath in the document yaml header. The yaml key + is `bibliography`, which is also used by --citeproc. + + +# Example + +example1.md: + + --- + bibliography: + - 'my_refs.bib' + - '__from_DOI.bib' + --- + + # Introduction + The Laemmli system is one of the most widely used gel systems for the separation of proteins.[@LAEMMLI_1970] + By the way, Einstein is genius.[@https://doi.org/10.1002/andp.19053220607; @doi.org/10.1002/andp.19053220806; @doi:10.1002/andp.19053221004] + +Example command 1 (.md -\> .md) + +``` {.sh} +pandoc --lua-filter=doi2cite.lua --wrap=preserve -s example1.md -o expected1.md +``` + +Example command 2 (.md -\> .pdf with +[ACS](https://pubs.acs.org/journal/jacsat) style): + +``` {.sh} +pandoc --lua-filter=doi2cite.lua --filter=pandoc-crossref --citeproc --csl=sample1.csl -s example1.md -o expected1.pdf +``` + +Example result + +![expected1](https://user-images.githubusercontent.com/30950088/119964566-4d952200-bfe4-11eb-90d9-ed2366c639e8.png) diff --git a/doi2cite.lua b/doi2cite.lua index 82c47ad..6ea41b1 100644 --- a/doi2cite.lua +++ b/doi2cite.lua @@ -9,10 +9,17 @@ -- Global variables -- -------------------------------------------------------------------------------- base_url = "http://api.crossref.org" -bibpath = "./bib_from_doi.bib" +mailto = "pandoc.doi2cite@gmail.com" +bibname = "__from_DOI.bib" key_list = {}; doi_key_map = {}; doi_entry_map = {}; +error_strs = {}; +error_strs["Resource not found."] = 404 +error_strs["No acceptable resource available."] = 406 +error_strs["

503 Service Unavailable

\n" + .."No server is available to handle this request.\n" + ..""] = 503 -------------------------------------------------------------------------------- @@ -20,14 +27,10 @@ doi_entry_map = {}; -------------------------------------------------------------------------------- -- Get bibliography filepath from yaml metadata function Meta(m) - local bp = m.bib_from_doi - if bp ~= nil then - if bp[1].text ~= nil then - bibpath = bp[1].text - elseif bp[1][1] ~= nil then - bibpath = bp[1][1].text - else end - end + local bib_data = m.bibliography + local bibpaths = get_paths_from(bib_data) + bibpath = find_filepath(bibname, bibpaths) + bibpath = verify_path(bibpath) local f = io.open(bibpath, "r") if f then entries_str = f:read('*all') @@ -39,8 +42,9 @@ function Meta(m) end end f:close() + else + make_new_file(bibpath) end - print(bibpath .. " is created for bibliography from DOI.") end -- Get bibtex data of doi-based citation.id and make bibliography. @@ -58,18 +62,16 @@ function Cite(c) doi = nil end if doi then - if doi_key_map[doi] ~= nil then - local entry_key = doi_key_map[doi] - citation.id = entry_key - print("Existing DOI: "..doi) + if doi_key_map[doi] then + citation.id = doi_key_map[doi] else local entry_str = get_bibentry(doi) - if entry_str == nil or entry_str == "Resource not found." then + if entry_str == nil or error_strs[entry_str] then print("Failed to get ref from DOI: " .. doi) else - entry_str = replace_symbols(entry_str) + entry_str = tex2raw(entry_str) local entry_key = get_entrykey(entry_str) - if key_list[entry_key] ~= nil then + if key_list[entry_key] then entry_key = entry_key.."_"..doi entry_str = replace_entrykey(entry_str, entry_key) end @@ -81,7 +83,7 @@ function Cite(c) f:write(entry_str .. "\n") f:close() else - error("Can not open file: "..bibpath) + error("Unable to open file: "..bibpath) end end end @@ -99,21 +101,71 @@ function get_bibentry(doi) local entry_str = doi_entry_map[doi] if entry_str == nil then print("Request DOI: " .. doi) - local url = base_url.."/works/"..doi.."/transform/application/x-bibtex" + local url = base_url.."/works/" + ..doi.."/transform/application/x-bibtex" + .."?mailto="..mailto mt, entry_str = pandoc.mediabag.fetch(url) end return entry_str end --- Replace some symbols inorder to escape (maybe) bugs of pandoc/citeproc -function replace_symbols(string) - local buggystrs = {}; - buggystrs["{\textendash}"] = "–" - buggystrs["{\textemdash}"] = "—" - buggystrs["{\textquoteright}"] = "’" - buggystrs["{\textquoteleft}"] = "‘" - for buggystr, altanative in pairs(buggystrs) do - local string = string:gsub(buggystr, altanative) +-- Extract designated filepaths from 1 or 2 dimensional metadata +function get_paths_from(metadata) + local filepaths = {}; + if metadata then + if metadata[1].text then + filepaths[metadata[1].text] = true + elseif type(metadata) == "table" then + for _, datum in pairs(metadata) do + if datum[1] then + if datum[1].text then + filepaths[datum[1].text] = true + end + end + end + end + end + return filepaths +end + +-- Extract filename and dirname from a given a path +function split_path(filepath) + local delim = nil + local len = filepath:len() + local reversed = filepath:reverse() + if filepath:find("/") then + delim = "/" + elseif filepath:find([[\]]) then + delim = [[\]] + else + return {filename = filepath, dirname = nil} + end + local pos = reversed:find(delim) + local dirname = filepath:sub(1, len - pos) + local filename = reversed:sub(1, pos - 1):reverse() + return {filename = filename, dirname = dirname} +end + +-- Find bibname in a given filepath list and return the filepath if found +function find_filepath(filename, filepaths) + for path, _ in pairs(filepaths) do + local filename = split_path(path)["filename"] + if filename == bibname then + return path + end + end + return nil +end + +-- Make some TeX descriptions processable by citeproc +function tex2raw(string) + local symbols = {}; + symbols["{\textendash}"] = "–" + symbols["{\textemdash}"] = "—" + symbols["{\textquoteright}"] = "’" + symbols["{\textquoteleft}"] = "‘" + for tex, raw in pairs(symbols) do + local string = string:gsub(tex, raw) end return string end @@ -157,6 +209,39 @@ function get_doi_key_map(bibtex_string) return keys end +-- function to make directories and files +function make_new_file(filepath) + if filepath then + print("doi2cite: creating "..filepath) + local dirname = split_path(filepath)["dirname"] + if dirname then + os.execute("mkdir -p "..dirname) + end + f = io.open(filepath, "w") + if f then + f:close() + else + error("Unable to make bibtex file: "..bibpath..".\n" + .."This error may come from the missing directory. \n" + ) + end + end +end + +-- Verify that the given filepath is correct. +-- Catch common Pandoc user mistakes about Windows-formatted filepath. +function verify_path(bibpath) + if bibpath == nil then + print("[WARNING] doi2cite: " + .."The given file path is incorrect or empty. " + .."In Windows-formatted filepath, Pandoc recognizes " + .."double backslash ("..[[\\]]..") as the delimiters." + ) + return "__from_DOI.bib" + else + return bibpath + end +end -------------------------------------------------------------------------------- -- The main function -- @@ -164,4 +249,4 @@ end return { { Meta = Meta }, { Cite = Cite } -} \ No newline at end of file +} diff --git a/sample1.md b/sample1.md new file mode 100644 index 0000000..1d29c2b --- /dev/null +++ b/sample1.md @@ -0,0 +1,9 @@ +--- +bibliography: + - 'my_refs.bib' + - '__from_DOI.bib' +--- + +# Introduction +The Laemmli system is one of the most widely used gel systems for the separation of proteins.[@LAEMMLI_1970] +By the way, Einstein is genius.[@https://doi.org/10.1002/andp.19053220607; @doi.org/10.1002/andp.19053220806; @doi:10.1002/andp.19053221004] \ No newline at end of file