-
Notifications
You must be signed in to change notification settings - Fork 0
/
convert_kindle_clippings.exs
executable file
·159 lines (128 loc) · 3.9 KB
/
convert_kindle_clippings.exs
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
#! /usr/bin/env elixir
defmodule KindleClipps do
@moduledoc """
`KindleClipps` is a module for parsing Kindle clippings.
"""
alias SanitizeFilename
@locationRe ~r/.*Location (\d+)-(\d+).*/i
@doc """
Parse a Kindle clipping and create a map with the following keys:
- `:title` - the title of the book
- `:location` - the location of the clip
- `:added` - the date and time the clip was added
- `:text` - the text of the clip
"""
def parse_clip(txt) when is_bitstring(txt) do
txt
|> String.split("\r\n", [trim: true])
|> Enum.map(&String.trim/1)
|> Enum.map(&String.trim_leading(&1, "\uFEFF"))
|> case do
[title, loc_and_added, text] ->
{loc_start, loc_end} = parse_location(loc_and_added)
%{
title: parse_title(title),
autor: parse_autor(title),
location_start: loc_start,
location_end: loc_end,
text: text
}
_ ->
# bookmarks and notes are ignored
%{}
end
end
# Parse the location from a Kindle clip line
def parse_location(loc_and_added) do
case Regex.run(@locationRe, loc_and_added) do
[_, loc_start, loc_end] ->
{String.to_integer(loc_start), String.to_integer(loc_end)}
_ ->
{nil, nil}
end
end
def parse_title(title) do
[title | _] = String.split(title, " (")
title
end
def parse_autor(title) do
values = String.split(title, " (")
String.trim_trailing(List.last(values, ""), ")")
end
@doc """
Load a file with Kindle clippings and parse each clip.
Remove invalid clips and duplicates.
"""
def load_clips(file) do
File.read!(file)
|> String.split("==========")
|> Enum.map(&parse_clip/1)
|> Enum.reject(fn clip -> Map.get(clip, :text) == nil or Map.get(clip, :location_start) == nil end) # remove invalid clips
|> Enum.uniq_by(&(&1.location_start ))
end
@doc """
Sort clips by book title and location.
"""
def sort_clips(clips) do
Enum.sort(clips, &sort_by_title_and_location/2)
end
defp sort_by_title_and_location(clip1, clip2) do
if clip1.title == clip2.title do
clip1.location_start < clip2.location_start
else
clip1.title < clip2.title
end
end
@doc """
Write clips to a files per book as markdown
"""
def write_clips(clips, dir) do
clips
|> Enum.group_by(& &1[:title])
|> Enum.each(fn {title, clips} ->
File.write!(Path.join([dir, SanitizeFilename.sanitize(title) <> ".md"]), format_md_clips(title, clips))
end)
end
def format_md_clips(title, clips) do
"# " <> title <> "\n" <> Enum.random(clips).autor <> "\n\n" <> (Enum.map(clips, &format_md_clip/1) |> Enum.join("\n\n"))
end
def format_md_clip(clip) do
"""
> #{clip[:text]}
> (Location #{clip[:location_start]}-#{clip[:location_end]})
"""
end
end
defmodule SanitizeFilename do
@doc """
Takes a filename and strip and normalizes it with :nfd, keeps number and non accent character.
"""
def sanitize(string) do
String.trim(string)
|> String.normalize(:nfd)
|> String.replace(~r/[^.0-9A-z\s]/u, "")
|> String.replace(~r/[[:space:]]+/u, "-")
end
end
# CLI interface
cli_help = "Usage: convert_kindle_clippings.exs <My Clippings.txt> <export_dir>"
cli_args = System.argv()
case cli_args do
[input_file, dir] ->
if !File.dir?(dir) do
IO.puts("Error: #{dir} is not a directory\n\n#{cli_help}")
exit(:shutdown)
end
if !File.exists?(input_file) do
IO.puts("Error: #{input_file} does not exist\n\n#{cli_help}")
exit(:shutdown)
end
IO.puts("\nLoading clips from #{input_file} and write files per book to #{dir}\n\n")
clips = KindleClipps.load_clips(input_file)
clips
|> KindleClipps.sort_clips()
|> KindleClipps.write_clips(dir)
IO.puts("Done. #{Enum.count(clips)} clips written to #{dir}")
_ ->
IO.puts("Error: no input file and output dir\n\n#{cli_help}")
end