Skip to content

Commit

Permalink
solrrag/xaichat reference implementations of a moodle plugin
Browse files Browse the repository at this point in the history
  • Loading branch information
mhughes2k committed Mar 7, 2024
1 parent 74c7a74 commit fddf4e1
Show file tree
Hide file tree
Showing 4 changed files with 114 additions and 30 deletions.
41 changes: 41 additions & 0 deletions search/engine/solrrag/classes/ai/aiclient.php
Original file line number Diff line number Diff line change
Expand Up @@ -5,6 +5,9 @@
* Base client for AI providers that uses simple http request.
*/
class AIClient extends \curl {
/**
* @var AIProvider
*/
private $provider;
public function __construct(
\core\ai\AIProvider $provider
Expand All @@ -24,6 +27,44 @@ public function get_chat_completions_url(): string {
return $this->provider->get('baseurl') . $this->provider->get('completions');
}

/**
* @param $messages
* @return array String array of each line of the AI's Response.
* @throws \coding_exception
*/
public function chat($messages) {
$params = [
"model" => $this->provider->get('completionmodel'),
"messages" => $messages
];
$params = json_encode($params);
$rawresult = $this->post($this->get_chat_completions_url(), $params);
$jsonresult = json_decode($rawresult);
if (!isset($jsonresult->choices)) {
exit();
return [];
}
$result = $this->convert_chat_completion($jsonresult->choices);
if (isset($jsonresult->usage)) {
$this->provider->increment_prompt_usage($jsonresult->usage->prompt_tokens);
$this->provider->increment_completion_tokens($jsonresult->usage->completion_tokens);
$this->provider->increment_total_tokens($jsonresult->usage->total_tokens);
}
return $result;
}

/**
* Converts an OpenAI Type of response to an array of sentences
* @param $completion
* @return array
*/
protected function convert_chat_completion($choices) {
$responses = [];
foreach($choices as $choice) {
array_push($responses, $choice->message);
}
return $responses;
}
/**
* @param $document
* @return array
Expand Down
17 changes: 15 additions & 2 deletions search/engine/solrrag/classes/ai/aiprovider.php
Original file line number Diff line number Diff line change
Expand Up @@ -70,6 +70,18 @@ public function increment_prompt_usage($change) {
$new = $current + $change;
set_config($key, $new, 'ai');
}
public function increment_completion_tokens($change) {
return;
$key = [
'completiontokens',
$this->get('id'),
$this->get('apikey'),
];
$key = implode("_", $key);
$current = get_config('ai', $key);
$new = $current + $change;
set_config($key, $new, 'ai');
}
public function increment_total_tokens($change) {
return;
$key = [
Expand All @@ -95,6 +107,7 @@ public function increment_total_tokens($change) {
* @return array
*/
public static function get_records($filters = array(), $sort = '', $order = 'ASC', $skip = 0, $limit = 0) {
global $_ENV;
$records = [];
$fake = new static(0, (object) [
'id' => 1,
Expand All @@ -104,9 +117,9 @@ public static function get_records($filters = array(), $sort = '', $order = 'ASC
'baseurl' => 'https://api.openai.com/v1/',
'embeddings' => 'embeddings',
'embeddingmodel' => 'text-embedding-3-small',
'completions' => 'completions',
'completions' => 'chat/completions',
'completionmodel' => 'gpt-4-turbo-preview',
'apikey'=> ''
'apikey'=> $_ENV['OPENAIKEY']
]);
array_push($records, $fake);
return $records;
Expand Down
7 changes: 6 additions & 1 deletion search/engine/solrrag/classes/ai/api.php
Original file line number Diff line number Diff line change
Expand Up @@ -5,7 +5,12 @@

class api {

public static function get_all_providers() {
/**
* Return a list of AIProviders that are available for specified context.
* @param $context
* @return array
*/
public static function get_all_providers($context = null) {
return array_values(AIProvider::get_records());
}
public static function get_provider(int $id): AIProvider {
Expand Down
79 changes: 52 additions & 27 deletions search/engine/solrrag/classes/engine.php
Original file line number Diff line number Diff line change
Expand Up @@ -63,6 +63,28 @@ public function is_server_ready()
return true;
}

/**
* Adds a document to the engine, optionally (if available) generating embeddings for it.
* @param $document
* @param $fileindexing
* @return bool
* @throws \coding_exception
*/
public function add_document($document, $fileindexing = false) {
$docdata = $document->export_for_engine();

if (!$this->add_solr_document($docdata)) {
return false;
}

if ($fileindexing) {
// This will take care of updating all attached files in the index.
$this->process_document_files($document);
}

return true;
}

/**
* Adds a file to the search engine.
*
Expand Down Expand Up @@ -120,8 +142,12 @@ protected function add_stored_file($document, $storedfile)

// This sets the true filename for Tika.
$url->param('resource.name', $storedfile->get_filename());
$url->param('extractOnly', "true");
// $url->param("xpath", "/xhtml:html/xhtml:body/xhtml:div//node()");
// If we're not doing embeddings, then we can just use the "original" implementation which will
// extract and index the file without passing the content back.
if (!$this->aiprovider->use_for_embeddings()) {
$url->param('extractOnly', "true");
}

// A giant block of code that is really just error checking around the curl request.
try {
$requesturl = $url->out(false);
Expand Down Expand Up @@ -168,41 +194,40 @@ protected function add_stored_file($document, $storedfile)
debugging($message, DEBUG_DEVELOPER);
} else {
// The document was successfully indexed.
debugging("Got SOLR update/extract response");
preg_match('/<str>(?<Content>.*)<\/str>/imsU', $result, $streamcontent);

if ($streamcontent[1]!== 0) {
$xmlcontent = html_entity_decode($streamcontent[1]);
$xml = simplexml_load_string($xmlcontent);
$filedoc['content'] = (string)$xml->body->asXML();
$metadata = $xml->head->meta;
foreach($metadata as $meta) {
$name = (string)$meta['name'];
$content = (string)$meta['content'];
if ($content != null) {
$filedoc[$name] = $content;
} else {
$filedoc[$name] = "";

if ($this->aiprovider->use_for_embeddings() && $this->aiclient) {
preg_match('/<str>(?<Content>.*)<\/str>/imsU', $result, $streamcontent);
debugging("Got SOLR update/extract response");
if ($streamcontent[1]!== 0) {
$xmlcontent = html_entity_decode($streamcontent[1]);
$xml = simplexml_load_string($xmlcontent);
$filedoc['content'] = (string)$xml->body->asXML();
$metadata = $xml->head->meta;
foreach($metadata as $meta) {
$name = (string)$meta['name'];
$content = (string)$meta['content'];
if ($content != null) {
$filedoc[$name] = $content;
} else {
$filedoc[$name] = "";

}
}
}
}
/**
* Since solr has given us back the content, we can now send it off to the AI provider.
*/
if ($this->aiprovider->use_for_embeddings() && $this->aiclient) {
/**
* Since solr has given us back the content, we can now send it off to the AI provider.
*/

// garnish $filedoc with the embedding vector. It would be nice if this could be done
// via the export_file_for_engine() call above, that has no awareness of the engine.
// We expect $filedoc['content'] to be set.
$vector = $this->aiclient->embed_query($filedoc['content']);
$vlength = count($vector);
$vectorfield = "solr_vector_" . $vlength;
// TODO Check if a field of this length actually exists or not.
$filedoc[$vectorfield] = $vector;
debugging("Using vector field $vectorfield");
} else {
// potentially warn that selected provider can't be used for
// generating embeddings for RAG.
// As before if embeddings is not in use, then we can bail
// as the document is already indexed.
return;
}
$this->add_solr_document($filedoc);
return;
Expand Down

0 comments on commit fddf4e1

Please sign in to comment.