forked from brianpetro/obsidian-smart-connections
-
Notifications
You must be signed in to change notification settings - Fork 0
/
main.js
3575 lines (3436 loc) · 133 KB
/
main.js
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
321
322
323
324
325
326
327
328
329
330
331
332
333
334
335
336
337
338
339
340
341
342
343
344
345
346
347
348
349
350
351
352
353
354
355
356
357
358
359
360
361
362
363
364
365
366
367
368
369
370
371
372
373
374
375
376
377
378
379
380
381
382
383
384
385
386
387
388
389
390
391
392
393
394
395
396
397
398
399
400
401
402
403
404
405
406
407
408
409
410
411
412
413
414
415
416
417
418
419
420
421
422
423
424
425
426
427
428
429
430
431
432
433
434
435
436
437
438
439
440
441
442
443
444
445
446
447
448
449
450
451
452
453
454
455
456
457
458
459
460
461
462
463
464
465
466
467
468
469
470
471
472
473
474
475
476
477
478
479
480
481
482
483
484
485
486
487
488
489
490
491
492
493
494
495
496
497
498
499
500
501
502
503
504
505
506
507
508
509
510
511
512
513
514
515
516
517
518
519
520
521
522
523
524
525
526
527
528
529
530
531
532
533
534
535
536
537
538
539
540
541
542
543
544
545
546
547
548
549
550
551
552
553
554
555
556
557
558
559
560
561
562
563
564
565
566
567
568
569
570
571
572
573
574
575
576
577
578
579
580
581
582
583
584
585
586
587
588
589
590
591
592
593
594
595
596
597
598
599
600
601
602
603
604
605
606
607
608
609
610
611
612
613
614
615
616
617
618
619
620
621
622
623
624
625
626
627
628
629
630
631
632
633
634
635
636
637
638
639
640
641
642
643
644
645
646
647
648
649
650
651
652
653
654
655
656
657
658
659
660
661
662
663
664
665
666
667
668
669
670
671
672
673
674
675
676
677
678
679
680
681
682
683
684
685
686
687
688
689
690
691
692
693
694
695
696
697
698
699
700
701
702
703
704
705
706
707
708
709
710
711
712
713
714
715
716
717
718
719
720
721
722
723
724
725
726
727
728
729
730
731
732
733
734
735
736
737
738
739
740
741
742
743
744
745
746
747
748
749
750
751
752
753
754
755
756
757
758
759
760
761
762
763
764
765
766
767
768
769
770
771
772
773
774
775
776
777
778
779
780
781
782
783
784
785
786
787
788
789
790
791
792
793
794
795
796
797
798
799
800
801
802
803
804
805
806
807
808
809
810
811
812
813
814
815
816
817
818
819
820
821
822
823
824
825
826
827
828
829
830
831
832
833
834
835
836
837
838
839
840
841
842
843
844
845
846
847
848
849
850
851
852
853
854
855
856
857
858
859
860
861
862
863
864
865
866
867
868
869
870
871
872
873
874
875
876
877
878
879
880
881
882
883
884
885
886
887
888
889
890
891
892
893
894
895
896
897
898
899
900
901
902
903
904
905
906
907
908
909
910
911
912
913
914
915
916
917
918
919
920
921
922
923
924
925
926
927
928
929
930
931
932
933
934
935
936
937
938
939
940
941
942
943
944
945
946
947
948
949
950
951
952
953
954
955
956
957
958
959
960
961
962
963
964
965
966
967
968
969
970
971
972
973
974
975
976
977
978
979
980
981
982
983
984
985
986
987
988
989
990
991
992
993
994
995
996
997
998
999
1000
const Obsidian = require("obsidian");
// require built-in crypto module
const crypto = require("crypto");
const DEFAULT_SETTINGS = {
api_key: "",
file_exclusions: "",
folder_exclusions: "",
header_exclusions: "",
path_only: "",
show_full_path: false,
expanded_view: true,
group_nearest_by_file: false,
language: "en",
log_render: false,
log_render_files: false,
skip_sections: false,
smart_chat_model: "gpt-3.5-turbo",
results_count: 30,
view_open: true,
version: "",
};
const MAX_EMBED_STRING_LENGTH = 25000;
const VERSION = "1.2.8";
// get the language specific self-referential pronouns
const SELF_REFERENTIAL_PRONOUNS = {}
// loop through the languages directory inside the plugin for .json files
require("fs")
.readdirSync( __dirname + "/languages" )
.forEach( file => {
// require the .json files and assign their properties to the SELF_REFERENTIAL_PRONOUNS object
Object.assign(
SELF_REFERENTIAL_PRONOUNS,
require( `${ __dirname }/languages/${ file }` )
)
});
class SmartConnectionsPlugin extends Obsidian.Plugin {
// constructor
constructor() {
super(...arguments);
this.api = null;
this.embeddings = null;
this.embeddings_external = null;
this.file_exclusions = [];
this.has_new_embeddings = false;
this.header_exclusions = [];
this.nearest_cache = {};
this.path_only = [];
this.render_log = {};
this.render_log.deleted_embeddings = 0;
this.render_log.exclusions_logs = {};
this.render_log.failed_embeddings = [];
this.render_log.files = [];
this.render_log.new_embeddings = 0;
this.render_log.skipped_low_delta = {};
this.render_log.token_usage = 0;
this.render_log.tokens_saved_by_cache = 0;
this.retry_notice_timeout = null;
this.save_timeout = null;
this.self_ref_kw_regex = null;
}
async loadSettings() {
this.settings = Object.assign({}, DEFAULT_SETTINGS, await this.loadData());
// load file exclusions if not blank
if(this.settings.file_exclusions && this.settings.file_exclusions.length > 0) {
// split file exclusions into array and trim whitespace
this.file_exclusions = this.settings.file_exclusions.split(",").map((file) => {
return file.trim();
});
}
// load folder exclusions if not blank
if(this.settings.folder_exclusions && this.settings.folder_exclusions.length > 0) {
// add slash to end of folder name if not present
const folder_exclusions = this.settings.folder_exclusions.split(",").map((folder) => {
// trim whitespace
folder = folder.trim();
if(folder.slice(-1) !== "/") {
return folder + "/";
} else {
return folder;
}
});
// merge folder exclusions with file exclusions
this.file_exclusions = this.file_exclusions.concat(folder_exclusions);
}
// load header exclusions if not blank
if(this.settings.header_exclusions && this.settings.header_exclusions.length > 0) {
this.header_exclusions = this.settings.header_exclusions.split(",").map((header) => {
return header.trim();
});
}
// load path_only if not blank
if(this.settings.path_only && this.settings.path_only.length > 0) {
this.path_only = this.settings.path_only.split(",").map((path) => {
return path.trim();
});
}
// load self_ref_kw_regex
this.self_ref_kw_regex = new RegExp(`\\b(${SELF_REFERENTIAL_PRONOUNS[this.settings.language].join("|")})\\b`, "gi");
// load failed files
await this.load_failed_files();
}
async saveSettings(rerender=false) {
await this.saveData(this.settings);
// re-load settings into memory
await this.loadSettings();
// re-render view if set to true (for example, after adding API key)
if(rerender) {
this.nearest_cache = {};
await this.make_connections();
}
}
async onload() {
this.addIcon();
await this.loadSettings();
console.log("loading plugin");
this.addCommand({
id: "sc-find-notes",
name: "Find: Make Smart Connections",
icon: "pencil_icon",
hotkeys: [],
// editorCallback: async (editor) => {
editorCallback: async (editor) => {
if(editor.somethingSelected()) {
// get selected text
let selected_text = editor.getSelection();
// render connections from selected text
await this.make_connections(selected_text);
} else {
// clear nearest_cache on manual call to make connections
this.nearest_cache = {};
// console.log("Cleared nearest_cache");
await this.make_connections();
}
}
});
this.addCommand({
id: "smart-connections-view",
name: "Open: View Smart Connections",
callback: () => {
this.open_view();
}
});
// open chat command
this.addCommand({
id: "smart-connections-chat",
name: "Open: Smart Chat Conversation",
callback: () => {
this.open_chat();
}
});
// get all files in vault
this.addSettingTab(new SmartConnectionsSettingsTab(this.app, this));
// register main view type
this.registerView(SMART_CONNECTIONS_VIEW_TYPE, (leaf) => (new SmartConnectionsView(leaf, this)));
// register chat view type
this.registerView(SMART_CONNECTIONS_CHAT_VIEW_TYPE, (leaf) => (new SmartConnectionsChatView(leaf, this)));
// initialize when layout is ready
this.app.workspace.onLayoutReady(this.initialize.bind(this));
/**
* EXPERIMENTAL
* - window-based API access
* - code-block rendering
*/
this.api = new ScSearchApi(this.app, this);
// register API to global window object
(window["SmartSearchApi"] = this.api) && this.register(() => delete window["SmartSearchApi"]);
// code-block renderer
this.registerMarkdownCodeBlockProcessor("smart-connections", this.render_code_block.bind(this));
}
async render_code_block(contents, container, ctx) {
let nearest;
if(contents.trim().length > 0) {
nearest = await this.api.search(contents);
} else {
// use ctx to get file
console.log(ctx);
const file = this.app.vault.getAbstractFileByPath(ctx.sourcePath);
nearest = await this.find_note_connections(file);
}
if (nearest.length) {
this.update_results(container, nearest);
// const list = container.createEl("ul");
// list.addClass("smart-connections-list");
// for (const item of nearest) {
// const el = list.createEl("li", {
// cls: "smart-connections-item",
// text: item.link
// });
// }
}
}
async make_connections(selected_text=null) {
let view = this.get_view();
if (!view) {
// open view if not open
await this.open_view();
view = this.get_view();
}
await view.render_connections(selected_text);
}
async initialize() {
// if this settings.view_open is true, open view on startup
if(this.settings.view_open) {
this.open_view();
}
// on new version
if(this.settings.version !== VERSION) {
// update version
this.settings.version = VERSION;
// save settings
await this.saveSettings();
// open view
this.open_view();
}
this.add_to_gitignore();
}
addIcon(){
Obsidian.addIcon("smart-connections", `<path d="M50,20 L80,40 L80,60 L50,100" stroke="currentColor" stroke-width="4" fill="none"/>
<path d="M30,50 L55,70" stroke="currentColor" stroke-width="5" fill="none"/>
<circle cx="50" cy="20" r="9" fill="currentColor"/>
<circle cx="80" cy="40" r="9" fill="currentColor"/>
<circle cx="80" cy="70" r="9" fill="currentColor"/>
<circle cx="50" cy="100" r="9" fill="currentColor"/>
<circle cx="30" cy="50" r="9" fill="currentColor"/>`);
}
async open_view() {
this.app.workspace.detachLeavesOfType(SMART_CONNECTIONS_VIEW_TYPE);
await this.app.workspace.getRightLeaf(false).setViewState({
type: SMART_CONNECTIONS_VIEW_TYPE,
active: true,
});
this.app.workspace.revealLeaf(
this.app.workspace.getLeavesOfType(SMART_CONNECTIONS_VIEW_TYPE)[0]
);
}
// source: https://github.com/obsidianmd/obsidian-releases/blob/master/plugin-review.md#avoid-managing-references-to-custom-views
get_view() {
for (let leaf of this.app.workspace.getLeavesOfType(SMART_CONNECTIONS_VIEW_TYPE)) {
if (leaf.view instanceof SmartConnectionsView) {
return leaf.view;
}
}
}
// open chat view
async open_chat() {
this.app.workspace.detachLeavesOfType(SMART_CONNECTIONS_CHAT_VIEW_TYPE);
await this.app.workspace.getRightLeaf(false).setViewState({
type: SMART_CONNECTIONS_CHAT_VIEW_TYPE,
active: true,
});
this.app.workspace.revealLeaf(
this.app.workspace.getLeavesOfType(SMART_CONNECTIONS_CHAT_VIEW_TYPE)[0]
);
}
// get embeddings for all files
async get_all_embeddings() {
// get all files in vault
const files = await this.app.vault.getMarkdownFiles();
// get open files to skip if file is currently open
const open_files = this.app.workspace.getLeavesOfType("markdown").map((leaf) => leaf.view.file);
this.render_log.total_files = files.length;
this.clean_up_embeddings(files);
// batch embeddings
let batch_promises = [];
for (let i = 0; i < files.length; i++) {
// skip if path contains a #
if(files[i].path.indexOf("#") > -1) {
// console.log("skipping file '"+files[i].path+"' (path contains #)");
this.log_exclusion("path contains #");
continue;
}
const curr_key = crypto.createHash("md5").update(files[i].path).digest("hex");
// skip if file already has embedding and embedding.mtime is greater than or equal to file.mtime
if((this.embeddings[curr_key]) && (this.embeddings[curr_key].meta.mtime >= files[i].stat.mtime)) {
// log skipping file
//console.log("skipping file (mtime)");
continue;
}
// check if file is in failed_files
if(this.settings.failed_files.indexOf(files[i].path) > -1) {
// log skipping file
// console.log("skipping previously failed file, use button in settings to retry");
// use setTimeout to prevent multiple notices
if(this.retry_notice_timeout) {
clearTimeout(this.retry_notice_timeout);
this.retry_notice_timeout = null;
}
this.retry_notice_timeout = setTimeout(() => {
new Obsidian.Notice("Smart Connections: Skipping previously failed file, use button in settings to retry");
}, 3000);
continue;
}
// skip files where path contains any exclusions
let skip = false;
for(let j = 0; j < this.file_exclusions.length; j++) {
if(files[i].path.indexOf(this.file_exclusions[j]) > -1) {
skip = true;
this.log_exclusion(this.file_exclusions[j]);
// break out of loop
break;
}
}
if(skip) {
continue; // to next file
}
// check if file is open
if(open_files.indexOf(files[i]) > -1) {
// console.log("skipping file (open)");
continue;
}
try {
// push promise to batch_promises
batch_promises.push(this.get_file_embeddings(files[i], false));
} catch (error) {
console.log(error);
}
// if batch_promises length is 10
if(batch_promises.length > 3) {
// wait for all promises to resolve
await Promise.all(batch_promises);
// clear batch_promises
batch_promises = [];
}
// save embeddings JSON to file every 100 files to save progress on bulk embedding
if(i > 0 && i % 100 === 0) {
await this.save_embeddings_to_file();
}
}
// console.log(this.embeddings);
// wait for all promises to resolve
await Promise.all(batch_promises);
// write embeddings JSON to file
await this.save_embeddings_to_file();
// if render_log.failed_embeddings then update failed_embeddings.txt
if(this.render_log.failed_embeddings.length > 0) {
await this.save_failed_embeddings();
}
}
async save_embeddings_to_file(force=false) {
if(!this.has_new_embeddings){
return;
}
// console.log("new embeddings, saving to file");
if(!force) {
// prevent excessive writes to embeddings file by waiting 1 minute before writing
if(this.save_timeout) {
clearTimeout(this.save_timeout);
this.save_timeout = null;
}
this.save_timeout = setTimeout(() => {
// console.log("writing embeddings to file");
this.save_embeddings_to_file(true);
// clear timeout
if(this.save_timeout) {
clearTimeout(this.save_timeout);
this.save_timeout = null;
}
}, 60000);
// console.log("scheduled save");
return;
}
const embeddings = JSON.stringify(this.embeddings);
// check if embeddings file exists
const embeddings_file_exists = await this.app.vault.adapter.exists(".smart-connections/embeddings-2.json");
// if embeddings file exists then check if new embeddings file size is significantly smaller than existing embeddings file size
if(embeddings_file_exists) {
// esitmate file size of embeddings
const new_file_size = embeddings.length;
// get existing file size
const existing_file_size = await this.app.vault.adapter.stat(".smart-connections/embeddings-2.json").then((stat) => stat.size);
// console.log("new file size: "+new_file_size);
// console.log("existing file size: "+existing_file_size);
// if new file size is at least 50% of existing file size then write embeddings to file
if(new_file_size > (existing_file_size * 0.5)) {
// write embeddings to file
await this.app.vault.adapter.write(".smart-connections/embeddings-2.json", embeddings);
this.has_new_embeddings = false;
console.log("embeddings file size: "+new_file_size+" bytes");
}else{
// if new file size is significantly smaller than existing file size then throw error
// show warning message including file sizes
const warning_message = [
"Warning: New embeddings file size is significantly smaller than existing embeddings file size.",
"Aborting to prevent possible loss of embeddings data.",
"New file size: "+new_file_size+" bytes.",
"Existing file size: "+existing_file_size+" bytes.",
"Restarting Obsidian may fix this."
];
console.log(warning_message.join(" "));
// save embeddings to file named unsaved-embeddings.json
await this.app.vault.adapter.write(".smart-connections/unsaved-embeddings.json", embeddings);
new Obsidian.Notice("Smart Connections: Warning: New embeddings file size is significantly smaller than existing embeddings file size. Aborting to prevent possible loss of embeddings data. See Smart Connections view for more details.");
throw new Error("Error: New embeddings file size is significantly smaller than existing embeddings file size. Aborting to prevent possible loss of embeddings data.");
}
}else{
await this.init_embeddings_file();
await this.save_embeddings_to_file();
}
}
// save failed embeddings to file from render_log.failed_embeddings
async save_failed_embeddings () {
// write failed_embeddings to file one line per failed embedding
let failed_embeddings = [];
// if file already exists then read it
const failed_embeddings_file_exists = await this.app.vault.adapter.exists(".smart-connections/failed-embeddings.txt");
if(failed_embeddings_file_exists) {
failed_embeddings = await this.app.vault.adapter.read(".smart-connections/failed-embeddings.txt");
// split failed_embeddings into array
failed_embeddings = failed_embeddings.split("\r\n");
}
// merge failed_embeddings with render_log.failed_embeddings
failed_embeddings = failed_embeddings.concat(this.render_log.failed_embeddings);
// remove duplicates
failed_embeddings = [...new Set(failed_embeddings)];
// sort failed_embeddings array alphabetically
failed_embeddings.sort();
// convert failed_embeddings array to string
failed_embeddings = failed_embeddings.join("\r\n");
// write failed_embeddings to file
await this.app.vault.adapter.write(".smart-connections/failed-embeddings.txt", failed_embeddings);
// reload failed_embeddings to prevent retrying failed files until explicitly requested
await this.load_failed_files();
}
// test writing file to check if file system is read-only
async test_file_writing () {
// wrap in try catch to prevent error from crashing plugin
let log = "Begin test:";
try {
// check if test file already exists
const test_file_exists = await this.app.vault.adapter.exists(".smart-connections/embeddings-test.json");
// if test file exists then delete it
if(test_file_exists) {
await this.app.vault.adapter.remove(".smart-connections/embeddings-test.json");
}
// write test file
await this.app.vault.adapter.write(".smart-connections/embeddings-test.json", "test");
// update test file
if(this.embeddings){
await this.app.vault.adapter.write(".smart-connections/embeddings-test.json", JSON.stringify(this.embeddings));
}else{
log += "<br>No embeddings to test, writing test content to file."
await this.app.vault.adapter.write(".smart-connections/embeddings-test.json", "test2");
}
// delete test file
// await this.app.vault.adapter.remove(".smart-connections/embeddings-test.json");
// return "File writing test passed."
log += "<br>File writing test passed.";
}catch(error) {
// return error message
log += "<br>File writing test failed: "+error;
}
return log;
}
// load failed files from failed-embeddings.txt
async load_failed_files () {
// check if failed-embeddings.txt exists
const failed_embeddings_file_exists = await this.app.vault.adapter.exists(".smart-connections/failed-embeddings.txt");
if(!failed_embeddings_file_exists) {
this.settings.failed_files = [];
console.log("No failed files.");
return;
}
// read failed-embeddings.txt
const failed_embeddings = await this.app.vault.adapter.read(".smart-connections/failed-embeddings.txt");
// split failed_embeddings into array and remove empty lines
const failed_embeddings_array = failed_embeddings.split("\r\n");
// split at '#' and reduce into unique file paths
const failed_files = failed_embeddings_array.map(embedding => embedding.split("#")[0]).reduce((unique, item) => unique.includes(item) ? unique : [...unique, item], []);
// return failed_files
this.settings.failed_files = failed_files;
// console.log(failed_files);
}
// retry failed embeddings
async retry_failed_files () {
// remove failed files from failed_files
this.settings.failed_files = [];
// if failed-embeddings.txt exists then delete it
const failed_embeddings_file_exists = await this.app.vault.adapter.exists(".smart-connections/failed-embeddings.txt");
if(failed_embeddings_file_exists) {
await this.app.vault.adapter.remove(".smart-connections/failed-embeddings.txt");
}
// run get all embeddings
await this.get_all_embeddings();
}
// check if key from embeddings exists in files
clean_up_embeddings(files) {
for (let key in this.embeddings) {
// console.log("key: "+key);
const path = this.embeddings[key].meta.path;
// if no key starts with file path
if(!files.find(file => path.startsWith(file.path))) {
// delete key if it doesn't exist
delete this.embeddings[key];
this.render_log.deleted_embeddings++;
// console.log("deleting (deleted file): " + key);
continue;
}
// if key contains '#'
if(path.indexOf("#") > -1) {
// split at '#' and get first part
const file_key = this.embeddings[key].meta.file;
// if file_key and file.hashes exists and block hash not in file.hashes
if(!this.embeddings[file_key]){
// delete key
delete this.embeddings[key];
this.render_log.deleted_embeddings++;
// console.log("deleting (missing file embedding)");
continue;
}
if(!this.embeddings[file_key].meta){
// delete key
delete this.embeddings[key];
this.render_log.deleted_embeddings++;
// console.log("deleting (missing file meta)");
continue;
}
if(this.embeddings[file_key].meta.blocks && (this.embeddings[file_key].meta.blocks.indexOf(key) < 0)) {
// delete key
delete this.embeddings[key];
this.render_log.deleted_embeddings++;
// console.log("deleting (missing block in file)");
continue;
}
// DEPRECATED - currently included to prevent existing embeddings from being refreshed all at once
if(this.embeddings[file_key].meta.mtime &&
this.embeddings[key].meta.mtime &&
(this.embeddings[file_key].meta.mtime > this.embeddings[key].meta.mtime)
) {
// delete key
delete this.embeddings[key];
this.render_log.deleted_embeddings++;
// console.log("deleting (stale block - mtime): " + key);
}
}
}
}
async init_embeddings_file() {
// check if folder exists
if (!(await this.app.vault.adapter.exists(".smart-connections"))) {
// create folder
await this.app.vault.adapter.mkdir(".smart-connections");
console.log("created folder: .smart-connections");
// if .gitignore file exists then add .smart-connections to .gitignore
await this.add_to_gitignore();
}else{
console.log("folder already exists: .smart-connections");
}
// check if embeddings file exists
if (!(await this.app.vault.adapter.exists(".smart-connections/embeddings-2.json"))) {
// create embeddings file
await this.app.vault.adapter.write(".smart-connections/embeddings-2.json", "{}");
console.log("created embeddings file: .smart-connections/embeddings-2.json");
}else{
console.log("embeddings file already exists: .smart-connections/embeddings-2.json");
}
}
/**
* migrate embeddings.json to embeddings-2.json
* - embeddings-2.json is a new file format that uses a different method to store embeddings
* - move key to meta.source
* - replace key with md5(meta.source)
* - move values to vec
*/
// if embeddings.json exists then use it to create embeddings-2.json
async migrate_embeddings_to_v2() {
// get view and set to loading
// read embeddings.json
const embeddings = await this.app.vault.adapter.read(".smart-connections/embeddings.json");
// parse embeddings.json
const embeddings_json = JSON.parse(embeddings);
// create new embeddings-2.json
const embeddings_2_json = {};
// loop through embeddings.json
for (let key in embeddings_json) {
// create new key using crypto SHA1 hash
const new_key = crypto.createHash('md5').update(key).digest('hex');
// create new embeddings-2.json entry
embeddings_2_json[new_key] = {
"vec": embeddings_json[key].values,
"meta": {
"path": key,
"hash": embeddings_json[key].hash,
"mtime": embeddings_json[key].mtime,
"tokens": embeddings_json[key].tokens,
},
}
// if has hashes
if(embeddings_json[key].hashes) {
embeddings_2_json[new_key].meta.blocks = [];
// loop through hashes
for (let hash of embeddings_json[key].hashes) {
// iterate through embeddings_json
for(let key2 in embeddings_json) {
if (embeddings_json[key2].hash == hash) {
// create hash from key
const hash_key = crypto.createHash('md5').update(key2).digest('hex');
embeddings_2_json[new_key].meta.blocks.push(hash_key);
}
}
}
// sort blocks
embeddings_2_json[new_key].meta.blocks.sort();
}
// if key contains '#'
if(key.indexOf("#") > -1) {
// split at '#' and get first part
const file_key = crypto.createHash('md5').update(key.split("#")[0]).digest('hex');
embeddings_2_json[new_key].meta.file = file_key;
}
// re-write object create to exclude any undefined values
embeddings_2_json[new_key] = JSON.parse(JSON.stringify(embeddings_2_json[new_key]));
}
// write embeddings-2.json
await this.app.vault.adapter.write(".smart-connections/embeddings-2.json", JSON.stringify(embeddings_2_json));
}
// add .smart-connections to .gitignore to prevent issues with large, frequently updated embeddings file(s)
async add_to_gitignore() {
if(!(await this.app.vault.adapter.exists(".gitignore"))) {
return; // if .gitignore doesn't exist then don't add .smart-connections to .gitignore
}
let gitignore_file = await this.app.vault.adapter.read(".gitignore");
// if .smart-connections not in .gitignore
if (gitignore_file.indexOf(".smart-connections") < 0) {
// add .smart-connections to .gitignore
let add_to_gitignore = "\n\n# Ignore Smart Connections folder because embeddings file is large and updated frequently";
add_to_gitignore += "\n.smart-connections";
await this.app.vault.adapter.write(".gitignore", gitignore_file + add_to_gitignore);
console.log("added .smart-connections to .gitignore");
}
}
// force refresh embeddings file but first rename existing embeddings file to .smart-connections/embeddings-YYYY-MM-DD.json
async force_refresh_embeddings_file() {
// get current datetime as unix timestamp
let current_datetime = Math.floor(Date.now() / 1000);
// rename existing embeddings file to .smart-connections/embeddings-YYYY-MM-DD.json
await this.app.vault.adapter.rename(".smart-connections/embeddings-2.json", ".smart-connections/embeddings-"+current_datetime+".json");
// create new embeddings file
await this.app.vault.adapter.write(".smart-connections/embeddings-2.json", "{}");
new Obsidian.Notice("Smart Connections: embeddings file Force Refreshed, making new connections...");
// clear this.embeddings
this.embeddings = null;
this.embeddings = {};
// trigger making new connections
await this.get_all_embeddings();
this.output_render_log();
new Obsidian.Notice("Smart Connections: embeddings file Force Refreshed, new connections made.");
}
// get embeddings for embed_input
async get_file_embeddings(curr_file, save=true) {
// let batch_promises = [];
let req_batch = [];
let blocks = [];
// initiate curr_file_key from md5(curr_file.path)
const curr_file_key = crypto.createHash('md5').update(curr_file.path).digest('hex');
// intiate file_file_embed_input by removing .md and converting file path to breadcrumbs (" > ")
let file_embed_input = curr_file.path.replace(".md", "");
file_embed_input = file_embed_input.replace(/\//g, " > ");
// embed on file.name/title only if path_only path matcher specified in settings
let path_only = false;
for(let j = 0; j < this.path_only.length; j++) {
if(curr_file.path.indexOf(this.path_only[j]) > -1) {
path_only = true;
console.log("title only file with matcher: " + this.path_only[j]);
// break out of loop
break;
}
}
// return early if path_only
if(path_only) {
// await this.get_embeddings(curr_file_key, file_embed_input, {
// mtime: curr_file.stat.mtime,
// path: curr_file.path,
// });
req_batch.push([curr_file_key, file_embed_input, {
mtime: curr_file.stat.mtime,
path: curr_file.path,
}]);
await this.get_embeddings_batch(req_batch);
return;
}
/**
* BEGIN Block "section" embedding
*/
// get file contents
const note_contents = await this.app.vault.cachedRead(curr_file);
let processed_since_last_save = 0;
const note_sections = this.block_parser(note_contents, curr_file.path);
// console.log(note_sections);
// if note has more than one section (if only one then its same as full-content)
if(note_sections.length > 1) {
// for each section in file
//console.log("Sections: " + note_sections.length);
for (let j = 0; j < note_sections.length; j++) {
// get embed_input for block
const block_embed_input = note_sections[j].text;
// console.log(note_sections[j].path);
// get block key from block.path (contains both file.path and header path)
const block_key = crypto.createHash('md5').update(note_sections[j].path).digest('hex');
blocks.push(block_key);
let block_hash; // set hash of block_embed_input in correct scope
if (this.embeddings[block_key] && this.embeddings[block_key].meta) {
// skip if length of block_embed_input same as length of embeddings[block_key].meta.len
if (block_embed_input.length === this.embeddings[block_key].meta.len) {
// log skipping file
// console.log("skipping block (len)");
continue;
}
// add hash to blocks to prevent empty blocks triggering full-file embedding
// skip if embeddings key already exists and block mtime is greater than or equal to file mtime
if (this.embeddings[block_key].meta.mtime >= curr_file.stat.mtime) {
// log skipping file
// console.log("skipping block (mtime)");
continue;
}
// skip if hash is present in this.embeddings and hash of block_embed_input is equal to hash in this.embeddings
block_hash = this.get_embed_hash(block_embed_input);
if(this.embeddings[block_key].meta.hash === block_hash) {
// log skipping file
// console.log("skipping block (hash)");
continue;
}
}
// create req_batch for batching requests
req_batch.push([block_key, block_embed_input, {
// oldmtime: curr_file.stat.mtime,
// get current datetime as unix timestamp
mtime: Date.now(),
hash: block_hash,
file: curr_file_key,
path: note_sections[j].path,
len: block_embed_input.length,
}]);
if(req_batch.length > 9) {
// add batch to batch_promises
await this.get_embeddings_batch(req_batch);
processed_since_last_save += req_batch.length;
// log embedding
// console.log("embedding: " + curr_file.path);
if (processed_since_last_save >= 30) {
// write embeddings JSON to file
await this.save_embeddings_to_file();
// reset processed_since_last_save
processed_since_last_save = 0;
}
// reset req_batch
req_batch = [];
}
}
}
// if req_batch is not empty
if(req_batch.length > 0) {
// process remaining req_batch
await this.get_embeddings_batch(req_batch);
req_batch = [];
processed_since_last_save += req_batch.length;
}
/**
* BEGIN File "full note" embedding
*/
// if file length is less than ~8000 tokens use full file contents
// else if file length is greater than 8000 tokens build file_embed_input from file headings
file_embed_input += `:\n`;
/**
* TODO: improve/refactor the following "large file reduce to headings" logic
*/
if(note_contents.length < MAX_EMBED_STRING_LENGTH) {
file_embed_input += note_contents
}else{
const note_meta_cache = this.app.metadataCache.getFileCache(curr_file);
// for each heading in file
if(typeof note_meta_cache.headings === "undefined") {
// console.log("no headings found, using first chunk of file instead");
file_embed_input += note_contents.substring(0, MAX_EMBED_STRING_LENGTH);
// console.log("chuck len: " + file_embed_input.length);
}else{
let note_headings = "";
for (let j = 0; j < note_meta_cache.headings.length; j++) {
// get heading level
const heading_level = note_meta_cache.headings[j].level;
// get heading text
const heading_text = note_meta_cache.headings[j].heading;
// build markdown heading
let md_heading = "";
for (let k = 0; k < heading_level; k++) {
md_heading += "#";
}
// add heading to note_headings
note_headings += `${md_heading} ${heading_text}\n`;
}
//console.log(note_headings);
file_embed_input += note_headings
if(file_embed_input.length > MAX_EMBED_STRING_LENGTH) {
file_embed_input = file_embed_input.substring(0, MAX_EMBED_STRING_LENGTH);
}
}
}
// skip embedding full file if blocks is not empty and all hashes are present in this.embeddings
// better than hashing file_embed_input because more resilient to inconsequential changes (whitespace between headings)
const file_hash = this.get_embed_hash(file_embed_input);
const existing_hash = (this.embeddings[curr_file_key] && this.embeddings[curr_file_key].meta) ? this.embeddings[curr_file_key].meta.hash : null;
if(existing_hash && (file_hash === existing_hash)) {
// console.log("skipping file (hash): " + curr_file.path);
this.update_render_log(blocks, file_embed_input);
return;
};
// if not already skipping and blocks are present
const existing_blocks = (this.embeddings[curr_file_key] && this.embeddings[curr_file_key].meta) ? this.embeddings[curr_file_key].meta.blocks : null;
let existing_has_all_blocks = true;
if(existing_blocks && Array.isArray(existing_blocks) && (blocks.length > 0)) {
// if all blocks are in existing_blocks then skip (allows deletion of small blocks without triggering full file embedding)
for (let j = 0; j < blocks.length; j++) {
if(existing_blocks.indexOf(blocks[j]) === -1) {
existing_has_all_blocks = false;
break;
}
}
}
// if existing has all blocks then check file size for delta
if(existing_has_all_blocks){
// get current note file size
const curr_file_size = curr_file.stat.size;
// get file size from this.embeddings
let prev_file_size = 0;
if (this.embeddings[curr_file_key] && this.embeddings[curr_file_key].meta && this.embeddings[curr_file_key].meta.size) {
prev_file_size = this.embeddings[curr_file_key].meta.size;
// if curr file size is less than 10% different from prev file size
const file_delta_pct = Math.round((Math.abs(curr_file_size - prev_file_size) / curr_file_size) * 100);
if(file_delta_pct < 10) {
// skip embedding
// console.log("skipping file (size) " + curr_file.path);
this.render_log.skipped_low_delta[curr_file.name] = file_delta_pct + "%";
this.update_render_log(blocks, file_embed_input);
return;
}
}
}
let meta = {
mtime: curr_file.stat.mtime,
hash: file_hash,
path: curr_file.path,
size: curr_file.stat.size,
blocks: blocks,
};
// batch_promises.push(this.get_embeddings(curr_file_key, file_embed_input, meta));
req_batch.push([curr_file_key, file_embed_input, meta]);
// send batch request
await this.get_embeddings_batch(req_batch);
// log embedding
// console.log("embedding: " + curr_file.path);
if (save) {
// write embeddings JSON to file
await this.save_embeddings_to_file();
}
}
update_render_log(blocks, file_embed_input) {
if (blocks.length > 0) {
// multiply by 2 because implies we saved token spending on blocks(sections), too
this.render_log.tokens_saved_by_cache += file_embed_input.length / 2;
} else {
// calc tokens saved by cache: divide by 4 for token estimate
this.render_log.tokens_saved_by_cache += file_embed_input.length / 4;
}
}
// async get_embeddings(key, embed_input, meta={}) {
// const requestResults = await this.request_embedding_from_input(embed_input);
// // if requestResults is null then return
// if(!requestResults) {
// console.log("failed embedding: " + meta.path);
// // log failed file names to render_log
// this.render_log.failed_embeddings.push(meta.path);
// return;
// }
// // if requestResults is not null
// if(requestResults){
// // add embedding key to render_log
// if(this.settings.log_render){
// if(this.settings.log_render_files){
// this.render_log.files.push(meta);
// }
// this.render_log.new_embeddings++;
// // add token usage to render_log
// this.render_log.token_usage += requestResults.usage.total_tokens;
// }
// const vec = requestResults.data[0].embedding;
// if(vec) {
// this.embeddings[key] = {};
// this.embeddings[key].vec = vec;
// this.embeddings[key].meta = meta;
// this.embeddings[key].meta.tokens = requestResults.usage.total_tokens;
// }
// }
// }
async get_embeddings_batch(req_batch) {
// if req_batch is empty then return
if(req_batch.length === 0) return;
// create arrary of embed_inputs from req_batch[i][1]
const embed_inputs = req_batch.map((req) => req[1]);
// request embeddings from embed_inputs
const requestResults = await this.request_embedding_from_input(embed_inputs);
// if requestResults is null then return
if(!requestResults) {
console.log("failed embedding batch");
// log failed file names to render_log
this.render_log.failed_embeddings = [...this.render_log.failed_embeddings, ...req_batch.map((req) => req[2].path)];
return;
}
// if requestResults is not null
if(requestResults){
this.has_new_embeddings = true;
// add embedding key to render_log
if(this.settings.log_render){
if(this.settings.log_render_files){
this.render_log.files = [...this.render_log.files, ...req_batch.map((req) => req[2].path)];
}
this.render_log.new_embeddings += req_batch.length;
// add token usage to render_log
this.render_log.token_usage += requestResults.usage.total_tokens;
}
// console.log(requestResults.data.length);
// loop through requestResults.data
for(let i = 0; i < requestResults.data.length; i++) {
const vec = requestResults.data[i].embedding;
const index = requestResults.data[i].index;
if(vec) {
const key = req_batch[index][0];
const meta = req_batch[index][2];
this.embeddings[key] = {};
this.embeddings[key].vec = vec;
this.embeddings[key].meta = meta;
// this.embeddings[key].meta.tokens = requestResults.usage.total_tokens;
}
}
}
}
// md5 hash of embed_input using built in crypto module
get_embed_hash(embed_input) {
/**
* TODO remove more/all whitespace from embed_input
* - all newlines
* - all tabs
* - all spaces?
*/
// trim excess whitespace
embed_input = embed_input.trim();
return crypto.createHash('md5').update(embed_input).digest("hex");
}
async request_embedding_from_input(embed_input, retries = 0) {
// (FOR TESTING) test fail process by forcing fail
// return null;
// check if embed_input is a string
// if(typeof embed_input !== "string") {
// console.log("embed_input is not a string");
// return null;
// }
// check if embed_input is empty
if(embed_input.length === 0) {
console.log("embed_input is empty");
return null;
}