Skip to content

Commit

Permalink
still trying to get search to work
Browse files Browse the repository at this point in the history
  • Loading branch information
mhughes2k committed Mar 11, 2024
1 parent 36e7bf5 commit 596fd89
Show file tree
Hide file tree
Showing 6 changed files with 197 additions and 9 deletions.
23 changes: 23 additions & 0 deletions search/engine/solrrag/classes/document.php
Original file line number Diff line number Diff line change
Expand Up @@ -78,4 +78,27 @@ public function export_file_for_engine($file) {
public function fetch_document_contents() {

}
public function set_data_from_engine($docdata) {
$fields = static::$requiredfields + static::$optionalfields + static::$enginefields;
foreach ($fields as $fieldname => $field) {

// Optional params might not be there.
if (isset($docdata[$fieldname])) {
if ($field['type'] === 'tdate') {
// Time fields may need a preprocessing.
$this->set($fieldname, static::import_time_from_engine($docdata[$fieldname]));
} else {
// No way we can make this work if there is any multivalue field.
if($fieldname === 'solr_vector_1536' || $fieldname === 'solr_vector_3072') {
debugging("Skipping $fieldname");
continue;
}
if (is_array($docdata[$fieldname])) {
throw new \core_search\engine_exception('multivaluedfield', 'search_solr', '', $fieldname);
}
$this->set($fieldname, $docdata[$fieldname]);
}
}
}
}
}
48 changes: 46 additions & 2 deletions search/engine/solrrag/classes/engine.php
Original file line number Diff line number Diff line change
Expand Up @@ -72,6 +72,17 @@ public function is_server_ready()
*/
public function add_document($document, $fileindexing = false) {
$docdata = $document->export_for_engine();
debugging("Adding document");
if ($this->aiprovider->use_for_embeddings() && $this->aiclient) {
debugging('Generating vector using provider');
$vector = $this->aiclient->embed_query($document['content']);
$vlength = count($vector);
$vectorfield = "solr_vector_" . $vlength;
$docdata[$vectorfield] = $vector;
var_dump($docdata);
} else {
debugging("Err didn't do any vector stuff!");
}

if (!$this->add_solr_document($docdata)) {
return false;
Expand All @@ -85,6 +96,37 @@ public function add_document($document, $fileindexing = false) {
return true;
}

public function add_document_batch(array $documents, bool $fileindexing = false): array {
$docdatabatch = [];
foreach ($documents as $document) {
//$docdatabatch[] = $document->export_for_engine();
$doc = $document->export_for_engine();
if ($this->aiprovider->use_for_embeddings() && $this->aiclient) {
debugging('Generating vector using provider');
$vector = $this->aiclient->embed_query($doc['content']);
$vlength = count($vector);
$vectorfield = "solr_vector_" . $vlength;
$doc[$vectorfield] = $vector;
var_dump($doc);
} else {
debugging("Err didn't do any vector stuff!");
}
$docdatabatch[] = $doc;
}

$resultcounts = $this->add_solr_documents($docdatabatch);

// Files are processed one document at a time (if there are files it's slow anyway).
if ($fileindexing) {
foreach ($documents as $document) {
// This will take care of updating all attached files in the index.
$this->process_document_files($document);
}
}

return $resultcounts;
}

/**
* Adds a file to the search engine.
*
Expand Down Expand Up @@ -291,7 +333,7 @@ protected function create_solr_document(array $doc): \SolrInputDocument {
* @throws \core_search\engine_exception
*/
public function execute_query($filters, $accessinfo, $limit = 0) {
var_dump($filters->similarity);

if (isset($filters->similarity) &&
$filters->similarity
) {
Expand All @@ -300,11 +342,13 @@ public function execute_query($filters, $accessinfo, $limit = 0) {
$this->execute_solr_knn_query($filters, $accessinfo, $limit);
} else {
debugging("Running regular search", DEBUG_DEVELOPER);
print_r($filters);
print_r($accessinfo);
return parent::execute_query($filters, $accessinfo, $limit);
}
}

protected function execute_solr_knn_query($filters, $accessinfo, $limit) {
public function execute_solr_knn_query($filters, $accessinfo, $limit) {
$vector = $filters->vector;
$topK = 3; // Nearest neighbours to retrieve.
$field = "solr_vector_" . count($vector);
Expand Down
51 changes: 50 additions & 1 deletion search/engine/solrrag/classes/schema.php
Original file line number Diff line number Diff line change
Expand Up @@ -2,6 +2,7 @@

namespace search_solrrag;

use search_solr\document;
use \search_solrrag\engine;

class schema extends \search_solr\schema
Expand Down Expand Up @@ -94,6 +95,54 @@ protected function validate_fields(&$fields, $requireexisting = false) {
}
}
}
public function setup($checkexisting = true) {
$fields = \search_solrrag\document::get_default_fields_definition();

// Field id is already there.
unset($fields['id']);

$this->check_index();

$return = $this->add_fields($fields, $checkexisting);

// Tell the engine we are now using the latest schema version.
$this->engine->record_applied_schema_version(document::SCHEMA_VERSION);

return $return;
}
protected function validate_add_field_result($result) {

if (!$result) {
throw new \moodle_exception('errorcreatingschema', 'search_solrrag', '', get_string('nodatafromserver', 'search_solrrag'));
}

$results = json_decode($result);
if (!$results) {
if (is_scalar($result)) {
$errormsg = $result;
} else {
$errormsg = json_encode($result);
}
throw new \moodle_exception('errorcreatingschema', 'search_solrrag', '', $errormsg);
}

// It comes as error when fetching fields data.
if (!empty($results->error)) {
throw new \moodle_exception('errorcreatingschema', 'search_solrrag', '', $results->error);
}

// It comes as errors when adding fields.
if (!empty($results->errors)) {

// We treat this error separately.
$errorstr = '';
foreach ($results->errors as $error) {
$errorstr .= implode(', ', $error->errorMessages);
}
throw new \moodle_exception('errorcreatingschema', 'search_solrrag', '', $errorstr);
}

}
// public function can_setup_server() {
//print_r($this->engine);
// $status = $this->engine->is_server_configured();
Expand All @@ -111,4 +160,4 @@ protected function validate_fields(&$fields, $requireexisting = false) {
//
// return true;
// }
}
}
Original file line number Diff line number Diff line change
@@ -1,7 +1,9 @@
<?php
define("CLI_SCRIPT", true);
require_once("../../../../config.php");

require_once($CFG->libdir ."/clilib.php");
$admin = $DB->get_record('user', ['id' => 2]);
\core\session\manager::set_user($admin);
$search = $search = \core_search\manager::instance(true, true);

$engine = $search->get_engine();
Expand All @@ -14,24 +16,38 @@
$doccontent = file_get_contents($CFG->dirroot . "/search/engine/solrrag/tests/testdoc.txt");
if (file_exists($CFG->dirroot . "/search/engine/solrrag/tests/testdoc_vector.txt")) {
$vector = file_get_contents($CFG->dirroot . "/search/engine/solrrag/tests/testdoc_vector.txt");
$vector = json_decode($vector, true);
} else {
$client = new \core\ai\AIClient($provider);
$vector = $client->embed_query($doccontent);
file_put_contents($CFG->dirroot . "/search/engine/solrrag/tests/testdoc_vector.txt", $vector);
file_put_contents(
$CFG->dirroot . "/search/engine/solrrag/tests/testdoc_vector.txt",
json_encode($vector)
);
}
$doc = [
'id' => 'testdoc',
'solr_vector_1356' => $vector,
'title' => "this is a test document"
];
cli_heading("Adding document to solr");

$document = new \search_solrrag\document("1", "mod_xaichat", "files");
$document = new \search_solrrag\document("1", "mod_page", "activity");
$document->set('title', 'test document');
$document->set('solr_vector_1536', $vector);
$document->set('content',$doccontent);
$document->set('contextid', context_system::instance()->id);
$document->set('contextid', \core\context\system::instance()->id);
$document->set('courseid', SITEID);
$document->set('owneruserid', $USER->id);
$document->set('modified', time());
$engine->add_document($document);
var_dump($document);

$result = $engine->add_document($document);
var_dump($result);
if ($result == false) {
cli_error("Failed to add document");
} else {
cli_writeln("Document added to solr");
}

cli_writeln("End of script");
56 changes: 56 additions & 0 deletions search/engine/solrrag/cli/search.php
Original file line number Diff line number Diff line change
@@ -0,0 +1,56 @@
<?php
define("CLI_SCRIPT", true);
require_once("../../../../config.php");
require_once($CFG->libdir ."/clilib.php");
$search = $search = \core_search\manager::instance(true, true);

$engine = $search->get_engine();

/**
* \core\ai\AIProvider
*/
$provider = core\ai\api::get_provider(1);

$doccontent = file_get_contents($CFG->dirroot . "/search/engine/solrrag/tests/testdoc.txt");
if (file_exists($CFG->dirroot . "/search/engine/solrrag/tests/testdoc_vector.txt")) {
$vector = file_get_contents($CFG->dirroot . "/search/engine/solrrag/tests/testdoc_vector.txt");
$vector = json_decode($vector, true);
} else {
$client = new \core\ai\AIClient($provider);
$vector = $client->embed_query($doccontent);
file_put_contents(
$CFG->dirroot . "/search/engine/solrrag/tests/testdoc_vector.txt",
json_encode($vector)
);
}
$admin = $DB->get_record('user', ['id' => 2]);
\core\session\manager::set_user($admin);
$doc = [
'id' => 'testdoc',
'solr_vector_1356' => $vector,
'title' => "this is a test document"
];
$formdata = (object) [
'q' => 'directory',
'areaids' => [],
'title' => '',
'courseids' => [],
'timestart' => 0,
'timeend' => 0,
'context' => \core\context\system::instance(),
];
//print_r($formdata);
cli_heading("Searching for document");
$result = $search->search($formdata,0);

var_dump($result);


cli_heading("Similarity search");
$formdata->similarity = true;
$formdata->vector = $vector;
$result = $search->search($formdata,0);
var_dump($result);


cli_writeln("End of script");
Loading

0 comments on commit 596fd89

Please sign in to comment.