Skip to content

Commit

Permalink
Merge pull request #184 from OP-Engineering/oscar/custom-c-files
Browse files Browse the repository at this point in the history
Custom Tokenizers
  • Loading branch information
ospfranco authored Nov 18, 2024
2 parents 7877875 + 52cdf25 commit 3338141
Show file tree
Hide file tree
Showing 28 changed files with 501 additions and 67 deletions.
7 changes: 2 additions & 5 deletions .github/workflows/ci.yml
Original file line number Diff line number Diff line change
Expand Up @@ -420,12 +420,9 @@ jobs:
env:
TURBO_CACHE_DIR: .turbo/android
steps:
- name: Checkout
uses: actions/checkout@v4
- uses: actions/checkout@v4

- name: Turn on libsql
run: |
node ./scripts/turnOnLibsql.js
- run: node ./scripts/turnOnLibsql.js

- name: Setup
uses: ./.github/actions/setup
Expand Down
4 changes: 3 additions & 1 deletion .gitignore
Original file line number Diff line number Diff line change
Expand Up @@ -81,4 +81,6 @@ android/gradle/
!.yarn/plugins
!.yarn/releases
!.yarn/sdks
!.yarn/versions
!.yarn/versions

android/c_sources
5 changes: 0 additions & 5 deletions Gemfile

This file was deleted.

8 changes: 8 additions & 0 deletions android/CMakeLists.txt
Original file line number Diff line number Diff line change
Expand Up @@ -10,6 +10,7 @@ set (BUILD_DIR ${CMAKE_SOURCE_DIR}/build)
../cpp
../cpp/sqlcipher
../cpp/libsql
# ../example/c_sources
)

add_definitions(
Expand Down Expand Up @@ -72,6 +73,13 @@ find_package(ReactAndroid REQUIRED CONFIG)
find_package(fbjni REQUIRED CONFIG)
find_library(LOG_LIB log)

# Add user defined files
if (USER_DEFINED_SOURCE_FILES)
target_sources(${PACKAGE_NAME} PRIVATE ${USER_DEFINED_SOURCE_FILES})

add_definitions("-DTOKENIZERS_HEADER_PATH=\"${USER_DEFINED_TOKENIZERS_HEADER_PATH}\"")
endif()

if (USE_SQLCIPHER)
if (ReactAndroid_VERSION_MINOR GREATER_EQUAL 76)
target_link_libraries(
Expand Down
41 changes: 37 additions & 4 deletions android/build.gradle
Original file line number Diff line number Diff line change
Expand Up @@ -35,9 +35,18 @@ def sqliteFlags = ""
def enableFTS5 = false
def useSqliteVec = false
def enableRtree = false
def tokenizers = []

def packageJsonFile = new File("$rootDir/../package.json")
def packageJson = new JsonSlurper().parseText(packageJsonFile.text)
def isInsideNodeModules = rootDir.absolutePath.contains("node_modules")
def packageJson

if ( isInsideNodeModules ) {
def packageJsonFile = new File("$rootDir/../../../package.json")
packageJson = new JsonSlurper().parseText(packageJsonFile.text)
} else {
def packageJsonFile = new File("$rootDir/../package.json")
packageJson = new JsonSlurper().parseText(packageJsonFile.text)
}

def opsqliteConfig = packageJson["op-sqlite"]
if(opsqliteConfig) {
Expand All @@ -49,6 +58,7 @@ if(opsqliteConfig) {
enableFTS5 = opsqliteConfig["fts5"]
useLibsql = opsqliteConfig["libsql"]
enableRtree = opsqliteConfig["rtree"]
tokenizers = opsqliteConfig["tokenizers"] ? opsqliteConfig["tokenizers"] : []
}

if(useSQLCipher) {
Expand Down Expand Up @@ -83,6 +93,11 @@ if(useSqliteVec) {
println "[OP-SQLITE] Sqlite Vec enabled! ↗️"
}


if (!tokenizers.isEmpty()) {
println "[OP-SQLITE] Tokenizers enabled! 🧾 Tokenizers: " + tokenizers
}

if (isNewArchitectureEnabled()) {
apply plugin: "com.facebook.react"
}
Expand Down Expand Up @@ -153,14 +168,32 @@ android {
cppFlags += "-DOP_SQLITE_USE_SQLITE_VEC=1"
}

cppFlags "-O2", "-fexceptions", "-frtti", "-std=c++1y", "-DONANDROID"
// This are zeroes because they will be passed as C flags, so they become falsy
def sourceFiles = 0
// def tokenizerInitStrings = 0
def tokenizersHeaderPath = 0
if (!tokenizers.isEmpty()) {
def sourceDir = isInsideNodeModules ? file("$rootDir/../../../c_sources") : file("$rootDir/../c_sources")
def destDir = file("$buildscript.sourceFile.parentFile/c_sources")
copy {
from sourceDir
into destDir
include "**/*.cpp", "**/*.h"
}
sourceFiles = fileTree(dir: destDir, include: ["**/*.cpp", "**/*.h"]).files.join(";")
tokenizersHeaderPath = "../c_sources/tokenizers.h"
}

cppFlags "-O2", "-fexceptions", "-DONANDROID"
abiFilters 'x86', 'x86_64', 'armeabi-v7a', 'arm64-v8a'
arguments "-DANDROID_STL=c++_shared",
"-DSQLITE_FLAGS='$sqliteFlags'",
"-DUSE_SQLCIPHER=${useSQLCipher ? 1 : 0}",
"-DUSE_CRSQLITE=${useCRSQLite ? 1 : 0}",
"-DUSE_LIBSQL=${useLibsql ? 1 : 0}",
"-DUSE_SQLITE_VEC=${useSqliteVec ? 1 : 0}"
"-DUSE_SQLITE_VEC=${useSqliteVec ? 1 : 0}",
"-DUSER_DEFINED_SOURCE_FILES=${sourceFiles}",
"-DUSER_DEFINED_TOKENIZERS_HEADER_PATH='${tokenizersHeaderPath}'"
}
}

Expand Down
88 changes: 88 additions & 0 deletions c_sources/tokenizers.cpp
Original file line number Diff line number Diff line change
@@ -0,0 +1,88 @@
#include "tokenizers.h"
#include <cctype>
#include <memory>
#include <string>

namespace opsqlite {

fts5_api *fts5_api_from_db(sqlite3 *db) {
fts5_api *pRet = 0;
sqlite3_stmt *pStmt = 0;

if (SQLITE_OK == sqlite3_prepare_v2(db, "SELECT fts5(?1)", -1, &pStmt, 0)) {
sqlite3_bind_pointer(pStmt, 1, (void *)&pRet, "fts5_api_ptr", NULL);
sqlite3_step(pStmt);
}
sqlite3_finalize(pStmt);
return pRet;
}

class WordTokenizer {
public:
WordTokenizer() = default;
~WordTokenizer() = default;
};

// Define `xCreate`, which initializes the tokenizer
int wordTokenizerCreate(void *pUnused, const char **azArg, int nArg,
Fts5Tokenizer **ppOut) {
auto tokenizer = std::make_unique<WordTokenizer>();
*ppOut = reinterpret_cast<Fts5Tokenizer *>(
tokenizer.release()); // Cast to Fts5Tokenizer*
return SQLITE_OK;
}

// Define `xDelete`, which frees the tokenizer
void wordTokenizerDelete(Fts5Tokenizer *pTokenizer) {
delete reinterpret_cast<WordTokenizer *>(pTokenizer);
}

// Define `xTokenize`, which performs the actual tokenization
int wordTokenizerTokenize(Fts5Tokenizer *pTokenizer, void *pCtx, int flags,
const char *pText, int nText,
int (*xToken)(void *, int, const char *, int, int,
int)) {
int start = 0;
int i = 0;

while (i <= nText) {
if (i == nText || !std::isalnum(static_cast<unsigned char>(pText[i]))) {
if (start < i) { // Found a token
int rc = xToken(pCtx, 0, pText + start, i - start, start, i);
if (rc != SQLITE_OK)
return rc;
}
start = i + 1;
}
i++;
}
return SQLITE_OK;
}

int opsqlite_wordtokenizer_init(sqlite3 *db, char **error,
sqlite3_api_routines const *api) {
fts5_tokenizer wordtokenizer = {wordTokenizerCreate, wordTokenizerDelete,
wordTokenizerTokenize};

fts5_api *ftsApi = (fts5_api *)fts5_api_from_db(db);
if (ftsApi == NULL)
return SQLITE_ERROR;

return ftsApi->xCreateTokenizer(ftsApi, "wordtokenizer", NULL, &wordtokenizer,
NULL);
}

int opsqlite_porter_init(sqlite3 *db, char **error,
sqlite3_api_routines const *api) {
fts5_tokenizer porter_tokenizer = {wordTokenizerCreate, wordTokenizerDelete,
wordTokenizerTokenize};

fts5_api *ftsApi = (fts5_api *)fts5_api_from_db(db);
if (ftsApi == nullptr)
return SQLITE_ERROR;

return ftsApi->xCreateTokenizer(ftsApi, "portertokenizer", NULL,
&porter_tokenizer, NULL);
}

} // namespace opsqlite
15 changes: 15 additions & 0 deletions c_sources/tokenizers.h
Original file line number Diff line number Diff line change
@@ -0,0 +1,15 @@
#ifndef TOKENIZERS_H
#define TOKENIZERS_H

#define TOKENIZER_LIST opsqlite_wordtokenizer_init(db,&errMsg,nullptr);opsqlite_porter_init(db,&errMsg,nullptr);

#include "sqlite3.h"

namespace opsqlite {

int opsqlite_wordtokenizer_init(sqlite3 *db, char **error, sqlite3_api_routines const *api);
int opsqlite_porter_init(sqlite3 *db, char **error, sqlite3_api_routines const *api);

} // namespace opsqlite

#endif // TOKENIZERS_H
3 changes: 2 additions & 1 deletion cpp/bindings.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -46,7 +46,8 @@ void clearState() {
thread_pool->restartPool();
}

void install(jsi::Runtime &rt, const std::shared_ptr<react::CallInvoker>& invoker,
void install(jsi::Runtime &rt,
const std::shared_ptr<react::CallInvoker> &invoker,
const char *base_path, const char *crsqlite_path,
const char *sqlite_vec_path) {
invalidated = false;
Expand Down
10 changes: 9 additions & 1 deletion cpp/bridge.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -4,9 +4,16 @@
#include "logs.h"
#include "utils.h"
#include <iostream>
#include <sstream>
#include <unordered_map>
#include <variant>

#ifdef TOKENIZERS_HEADER_PATH
#include TOKENIZERS_HEADER_PATH
#else
#define TOKENIZER_LIST
#endif

namespace opsqlite {

/// Maps to hold the different objects
Expand Down Expand Up @@ -109,9 +116,10 @@ BridgeResult opsqlite_open(std::string const &name,
if (errMsg != nullptr) {
return {.type = SQLiteError, .message = errMsg};
}

#endif

TOKENIZER_LIST

return {.type = SQLiteOk, .affectedRows = 0};
}

Expand Down
10 changes: 10 additions & 0 deletions cpp/utils.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -341,4 +341,14 @@ int mkdir(std::string const &path) {
return 0;
}

std::vector<std::string> parse_string_list(const std::string& str) {
std::vector<std::string> result;
std::istringstream stream(str);
std::string token;
while (std::getline(stream, token, ',')) {
result.push_back(token);
}
return result;
}

} // namespace opsqlite
10 changes: 10 additions & 0 deletions cpp/utils.h
Original file line number Diff line number Diff line change
Expand Up @@ -9,20 +9,28 @@
#include <jsi/jsilib.h>
#include <map>
#include <vector>
#include <string>

namespace opsqlite {

namespace jsi = facebook::jsi;

jsi::Value toJSI(jsi::Runtime &rt, const JSVariant &value);

JSVariant toVariant(jsi::Runtime &rt, jsi::Value const &value);

std::vector<std::string> to_string_vec(jsi::Runtime &rt, jsi::Value const &xs);

std::vector<JSVariant> to_variant_vec(jsi::Runtime &rt, jsi::Value const &xs);

std::vector<int> to_int_vec(jsi::Runtime &rt, jsi::Value const &xs);

jsi::Value createResult(jsi::Runtime &rt, BridgeResult status,
std::vector<DumbHostObject> *results,
std::shared_ptr<std::vector<SmartHostObject>> metadata);

jsi::Value create_js_rows(jsi::Runtime &rt, const BridgeResult &status);

jsi::Value
create_raw_result(jsi::Runtime &rt, BridgeResult status,
const std::vector<std::vector<JSVariant>> *results);
Expand All @@ -38,6 +46,8 @@ bool folder_exists(const std::string &foldername);

bool file_exists(const std::string &path);

std::vector<std::string> parse_string_list(const std::string& str);

} // namespace opsqlite

#endif /* utils_h */
6 changes: 4 additions & 2 deletions example/Gemfile
Original file line number Diff line number Diff line change
Expand Up @@ -2,5 +2,7 @@ source 'https://rubygems.org'

ruby '>= 2.7.6'

gem 'cocoapods', '>= 1.13', '!= 1.15.0', '!= 1.15.1'
gem 'activesupport', '>= 6.1.7.5', '!= 7.1.0'
gem 'cocoapods', '=1.15.2'
gem 'activesupport', '>= 6.1.7.5', '!= 7.1.0'
gem 'bigdecimal'
gem 'mutex_m'
Loading

0 comments on commit 3338141

Please sign in to comment.