diff --git a/main.js b/main.js index d30750c2..575f7a66 100644 --- a/main.js +++ b/main.js @@ -572,7 +572,6 @@ class SmartConnectionsPlugin extends Obsidian.Plugin { /** * BEGIN Block "section" embedding */ - let has_blocks = false; // get file contents const note_contents = await this.app.vault.cachedRead(curr_file); let processed_since_last_save = 0; @@ -580,7 +579,6 @@ class SmartConnectionsPlugin extends Obsidian.Plugin { // console.log(note_sections); // if note has more than one section (if only one then its same as full-content) if(note_sections.length > 1) { - has_blocks = true; // for each section in file //console.log("Sections: " + note_sections.length); for (let j = 0; j < note_sections.length; j++) { @@ -592,6 +590,12 @@ class SmartConnectionsPlugin extends Obsidian.Plugin { blocks.push(block_key); let block_hash; // set hash of block_embed_input in correct scope if (this.embeddings[block_key] && this.embeddings[block_key].meta) { + // skip if length of block_embed_input same as length of embeddings[block_key].meta.len + if (block_embed_input.length === this.embeddings[block_key].meta.len) { + // log skipping file + // console.log("skipping block (len)"); + continue; + } // add hash to blocks to prevent empty blocks triggering full-file embedding // skip if embeddings key already exists and block mtime is greater than or equal to file mtime if (this.embeddings[block_key].meta.mtime >= curr_file.stat.mtime) { @@ -607,36 +611,16 @@ class SmartConnectionsPlugin extends Obsidian.Plugin { continue; } } - // get embeddings for block - // add block_embeddings to embeddings - // batch_promises.push(this.get_embeddings(block_key, block_embed_input, { - // mtime: curr_file.stat.mtime, - // hash: block_hash, - // file: curr_file_key, - // path: note_sections[j].path, - // })); - // if(batch_promises.length > 4) { - // await Promise.all(batch_promises); - // processed_since_last_save += batch_promises.length; - // // log embedding - // // console.log("embedding: " + curr_file.path); - // if (processed_since_last_save >= 30) { - // // write embeddings JSON to file - // await this.save_embeddings_to_file(); - // // reset processed_since_last_save - // processed_since_last_save = 0; - // } - // // reset batch_promises - // batch_promises = []; - // } // create req_batch for batching requests req_batch.push([block_key, block_embed_input, { - mtime: curr_file.stat.mtime, + // oldmtime: curr_file.stat.mtime, + // get current datetime as unix timestamp + mtime: Date.now(), hash: block_hash, file: curr_file_key, path: note_sections[j].path, - len: note_sections[j].length, + len: block_embed_input.length, }]); if(req_batch.length > 9) { // add batch to batch_promises @@ -666,22 +650,6 @@ class SmartConnectionsPlugin extends Obsidian.Plugin { /** * BEGIN File "full note" embedding */ - let skip = false; - // get current note file size - const curr_file_size = curr_file.stat.size; - // get file size from this.embeddings - let prev_file_size = 0; - if (this.embeddings[curr_file_key] && this.embeddings[curr_file_key].meta && this.embeddings[curr_file_key].meta.size) { - prev_file_size = this.embeddings[curr_file_key].meta.size; - // if curr file size is less than 10% different from prev file size - const file_delta_pct = Math.round((Math.abs(curr_file_size - prev_file_size) / curr_file_size) * 100); - if(file_delta_pct < 10) { - // skip embedding - // console.log("skipping file (size) " + curr_file.path); - this.render_log.skipped_low_delta[curr_file.name] = file_delta_pct + "%"; - skip = true; - } - } // if file length is less than ~8000 tokens use full file contents // else if file length is greater than 8000 tokens build file_embed_input from file headings @@ -689,99 +657,87 @@ class SmartConnectionsPlugin extends Obsidian.Plugin { /** * TODO: improve/refactor the following "large file reduce to headings" logic */ - if(!skip){ - if(note_contents.length < MAX_EMBED_STRING_LENGTH) { - file_embed_input += note_contents - }else{ - const note_meta_cache = this.app.metadataCache.getFileCache(curr_file); - // for each heading in file - if(typeof note_meta_cache.headings === "undefined") { - // console.log("no headings found, using first chunk of file instead"); - file_embed_input += note_contents.substring(0, MAX_EMBED_STRING_LENGTH); - // console.log("chuck len: " + file_embed_input.length); - }else{ - let note_headings = ""; - for (let j = 0; j < note_meta_cache.headings.length; j++) { - // get heading level - const heading_level = note_meta_cache.headings[j].level; - // get heading text - const heading_text = note_meta_cache.headings[j].heading; - // build markdown heading - let md_heading = ""; - for (let k = 0; k < heading_level; k++) { - md_heading += "#"; - } - // add heading to note_headings - note_headings += `${md_heading} ${heading_text}\n`; - } - //console.log(note_headings); - file_embed_input += note_headings - if(file_embed_input.length > MAX_EMBED_STRING_LENGTH) { - file_embed_input = file_embed_input.substring(0, MAX_EMBED_STRING_LENGTH); + if(note_contents.length < MAX_EMBED_STRING_LENGTH) { + file_embed_input += note_contents + }else{ + const note_meta_cache = this.app.metadataCache.getFileCache(curr_file); + // for each heading in file + if(typeof note_meta_cache.headings === "undefined") { + // console.log("no headings found, using first chunk of file instead"); + file_embed_input += note_contents.substring(0, MAX_EMBED_STRING_LENGTH); + // console.log("chuck len: " + file_embed_input.length); + }else{ + let note_headings = ""; + for (let j = 0; j < note_meta_cache.headings.length; j++) { + // get heading level + const heading_level = note_meta_cache.headings[j].level; + // get heading text + const heading_text = note_meta_cache.headings[j].heading; + // build markdown heading + let md_heading = ""; + for (let k = 0; k < heading_level; k++) { + md_heading += "#"; } + // add heading to note_headings + note_headings += `${md_heading} ${heading_text}\n`; + } + //console.log(note_headings); + file_embed_input += note_headings + if(file_embed_input.length > MAX_EMBED_STRING_LENGTH) { + file_embed_input = file_embed_input.substring(0, MAX_EMBED_STRING_LENGTH); } } } - // skip embedding full file if blocks is not empty and all hashes are present in this.embeddings // better than hashing file_embed_input because more resilient to inconsequential changes (whitespace between headings) - let file_hash = this.get_embed_hash(file_embed_input); + const file_hash = this.get_embed_hash(file_embed_input); const existing_hash = (this.embeddings[curr_file_key] && this.embeddings[curr_file_key].meta) ? this.embeddings[curr_file_key].meta.hash : null; - if(!skip && existing_hash) { - if(file_hash === existing_hash) { - skip = true; - } + if(existing_hash && (file_hash === existing_hash)) { + // console.log("skipping file (hash): " + curr_file.path); + this.update_render_log(blocks, file_embed_input); + return; }; + // if not already skipping and blocks are present const existing_blocks = (this.embeddings[curr_file_key] && this.embeddings[curr_file_key].meta) ? this.embeddings[curr_file_key].meta.blocks : null; - if(!skip && existing_blocks && has_blocks && Array.isArray(existing_blocks) && (blocks.length > 0)) { - // if blocks is equal to existing_blocks - if(blocks.length === existing_blocks.length) { - skip = true; - for(let j = 0; j < blocks.length; j++) { - // triggers re-embedding if blocks were re-ordered - if(blocks[j] !== existing_blocks[j]) { - skip = false; - break; - } + let existing_has_all_blocks = true; + if(existing_blocks && Array.isArray(existing_blocks) && (blocks.length > 0)) { + // if all blocks are in existing_blocks then skip (allows deletion of small blocks without triggering full file embedding) + for (let j = 0; j < blocks.length; j++) { + if(existing_blocks.indexOf(blocks[j]) === -1) { + existing_has_all_blocks = false; + break; } } } - // skip if skip is true - if(!skip) { - let meta = { - mtime: curr_file.stat.mtime, - hash: file_hash, - path: curr_file.path, - size: curr_file.stat.size, - }; - if(has_blocks && (blocks.length > 0)) { - meta.blocks = blocks; - } - // batch_promises.push(this.get_embeddings(curr_file_key, file_embed_input, meta)); - req_batch.push([curr_file_key, file_embed_input, meta]); - }else{ - if(has_blocks && (blocks.length > 0)) { - // multiply by 2 because implies we saved token spending on blocks(sections), too - this.render_log.tokens_saved_by_cache += file_embed_input.length/2; - }else{ - // calc tokens saved by cache: divide by 4 for token estimate - this.render_log.tokens_saved_by_cache += file_embed_input.length/4; + // if existing has all blocks then check file size for delta + if(existing_has_all_blocks){ + // get current note file size + const curr_file_size = curr_file.stat.size; + // get file size from this.embeddings + let prev_file_size = 0; + if (this.embeddings[curr_file_key] && this.embeddings[curr_file_key].meta && this.embeddings[curr_file_key].meta.size) { + prev_file_size = this.embeddings[curr_file_key].meta.size; + // if curr file size is less than 10% different from prev file size + const file_delta_pct = Math.round((Math.abs(curr_file_size - prev_file_size) / curr_file_size) * 100); + if(file_delta_pct < 10) { + // skip embedding + // console.log("skipping file (size) " + curr_file.path); + this.render_log.skipped_low_delta[curr_file.name] = file_delta_pct + "%"; + this.update_render_log(blocks, file_embed_input); + return; + } } - // log skipping file - // console.log("skipping cached file"); - } - // if batch_promises is empty then return - // if(batch_promises.length === 0) { - // return; - // } - - // wait for all promises to resolve - // await Promise.all(batch_promises); - - if(req_batch.length === 0) { - return; } + let meta = { + mtime: curr_file.stat.mtime, + hash: file_hash, + path: curr_file.path, + size: curr_file.stat.size, + blocks: blocks, + }; + // batch_promises.push(this.get_embeddings(curr_file_key, file_embed_input, meta)); + req_batch.push([curr_file_key, file_embed_input, meta]); // send batch request await this.get_embeddings_batch(req_batch); @@ -791,6 +747,16 @@ class SmartConnectionsPlugin extends Obsidian.Plugin { // write embeddings JSON to file await this.save_embeddings_to_file(); } + + } + update_render_log(blocks, file_embed_input) { + if (blocks.length > 0) { + // multiply by 2 because implies we saved token spending on blocks(sections), too + this.render_log.tokens_saved_by_cache += file_embed_input.length / 2; + } else { + // calc tokens saved by cache: divide by 4 for token estimate + this.render_log.tokens_saved_by_cache += file_embed_input.length / 4; + } } async get_embeddings(key, embed_input, meta={}) { diff --git a/manifest.json b/manifest.json index 2cb940ff..87bfbf5d 100644 --- a/manifest.json +++ b/manifest.json @@ -3,7 +3,7 @@ "name": "Smart Connections", "author": "Brian Petro", "description": "Find links to similar notes using artificial intelligence from OpenAI.", - "version": "1.1.15", + "version": "1.1.16", "minAppVersion": "1.1.0", "authorUrl": "https://wfhbrian.com", "isDesktopOnly": true