Skip to content

Commit

Permalink
Removed mapper-attachments plugin. Now use the ingest-attachment plug…
Browse files Browse the repository at this point in the history
…in (#1375)

[Mapper Attachment plugin has been removed](elastic/elasticsearch#20416) Use Ingest-attachment plugin and attachment processors with pipeline to ingest new documents.

the flow for ingesting a file into Elasticsearch changed a bit :

- u should create a Pipeline with an *Attachment Processor*
- add a file to the document
- ad the document to the index using a query string param of this format : **pipeline=name_of_the_pipeline**
  • Loading branch information
p365labs authored and ruflin committed Sep 14, 2017
1 parent 6a696f0 commit ca47cd2
Show file tree
Hide file tree
Showing 3 changed files with 195 additions and 166 deletions.
1 change: 1 addition & 0 deletions CHANGELOG.md
Original file line number Diff line number Diff line change
Expand Up @@ -28,6 +28,7 @@ All notable changes to this project will be documented in this file based on the
- The disable_coord parameter of the bool and common_terms queries has been removed. If provided, it will be ignored and issue a deprecation warning. [#1369](https://github.com/ruflin/Elastica/pull/1369)
- [Unfiltered nested source](https://github.com/elastic/elasticsearch/pull/26102) should keep its full path [#1366](https://github.com/ruflin/Elastica/pull/1366)
- [Analyze Explain](https://www.elastic.co/guide/en/elasticsearch/reference/6.0/_explain_analyze.html) no more support [request parameters](https://www.elastic.co/guide/en/elasticsearch/reference/5.5/indices-analyze.html), use request body instead. [#1370](https://github.com/ruflin/Elastica/pull/1370)
- [Mapper Attachment plugin has been removed](https://github.com/elastic/elasticsearch/pull/20416) Use Ingest-attachment plugin and attachment processors with pipeline to ingest new documents. [#1375](https://github.com/ruflin/Elastica/pull/1375)

### Bugfixes
- Enforce [Content-Type requirement on the layer Rest](https://github.com/elastic/elasticsearch/pull/23146), a [PR on Elastica #1301](https://github.com/ruflin/Elastica/issues/1301) solved it (it has been implemented only in the HTTP Transport), but it was not implemented in the Guzzle Transport. [#1349](https://github.com/ruflin/Elastica/pull/1349)
Expand Down
166 changes: 0 additions & 166 deletions test/Elastica/IndexTest.php
Original file line number Diff line number Diff line change
Expand Up @@ -118,172 +118,6 @@ public function testParent()
$this->assertEquals(['title' => 'Foo bar'], $resultSet->current()->getData());
}

/**
* @group functional
*/
public function testAddPdfFile()
{
$this->markTestSkipped('ES6 update: use ingest attachment : No handler for type [attachment] declared on field [file]');
$indexMapping = ['file' => ['type' => 'attachment'], 'text' => ['type' => 'text']];

$indexParams = ['index' => ['number_of_shards' => 1, 'number_of_replicas' => 0]];

$index = $this->_createIndex();
$type = new Type($index, 'test');

$index->create($indexParams, true);
$type->setMapping($indexMapping);

$doc1 = new Document(1);
$doc1->addFile('file', BASE_PATH.'/data/test.pdf', 'application/pdf');
$doc1->set('text', 'basel world');
$type->addDocument($doc1);

$doc2 = new Document(2);
$doc2->set('text', 'running in basel');
$type->addDocument($doc2);

$index->forcemerge();

$resultSet = $type->search('xodoa');
$this->assertEquals(1, $resultSet->count());

$resultSet = $type->search('basel');
$this->assertEquals(2, $resultSet->count());

// Author is ruflin
$resultSet = $type->search('ruflin');
$this->assertEquals(1, $resultSet->count());

// String does not exist in file
$resultSet = $type->search('guschti');
$this->assertEquals(0, $resultSet->count());
}

/**
* @group functional
*/
public function testAddPdfFileContent()
{
$this->markTestSkipped('ES6 update: use ingest attachment : No handler for type [attachment] declared on field [file]');
$indexMapping = ['file' => ['type' => 'attachment'], 'text' => ['type' => 'text']];

$indexParams = ['index' => ['number_of_shards' => 1, 'number_of_replicas' => 0]];

$index = $this->_createIndex();
$type = new Type($index, 'test');

$index->create($indexParams, true);
$type->setMapping($indexMapping);

$doc1 = new Document(1);
$doc1->addFileContent('file', file_get_contents(BASE_PATH.'/data/test.pdf'));
$doc1->set('text', 'basel world');
$type->addDocument($doc1);

$doc2 = new Document(2);
$doc2->set('text', 'running in basel');
$type->addDocument($doc2);

$index->forcemerge();

$resultSet = $type->search('xodoa');
$this->assertEquals(1, $resultSet->count());

$resultSet = $type->search('basel');
$this->assertEquals(2, $resultSet->count());

// Author is ruflin
$resultSet = $type->search('ruflin');
$this->assertEquals(1, $resultSet->count());

// String does not exist in file
$resultSet = $type->search('guschti');
$this->assertEquals(0, $resultSet->count());
}

/**
* @group functional
*/
public function testAddWordxFile()
{
$this->markTestSkipped('ES6 update: use ingest attachment : No handler for type [attachment] declared on field [file]');
$indexMapping = ['file' => ['type' => 'attachment'], 'text' => ['type' => 'text']];

$indexParams = ['index' => ['number_of_shards' => 1, 'number_of_replicas' => 0]];

$index = $this->_createIndex();
$type = new Type($index, 'content');

$index->create($indexParams, true);
$type->setMapping($indexMapping);

$doc1 = new Document(1);
$doc1->addFile('file', BASE_PATH.'/data/test.docx');
$doc1->set('text', 'basel world');
$type->addDocument($doc1);

$index->forcemerge();
$index->refresh();

$doc2 = new Document(2);
$doc2->set('text', 'running in basel');
$type->addDocument($doc2);

$index->forcemerge();
$index->refresh();

$resultSet = $type->search('basel');
$this->assertEquals(2, $resultSet->count());

$resultSet = $type->search('ruflin');
$this->assertEquals(0, $resultSet->count());

$resultSet = $type->search('Xodoa');
$this->assertEquals(1, $resultSet->count());
}

/**
* @group functional
*/
public function testExcludeFileSource()
{
$this->markTestSkipped('ES6 update: use ingest attachment : No handler for type [attachment] declared on field [file]');
$indexMapping = ['file' => ['type' => 'attachment'], 'text' => ['type' => 'text', 'store' => true],
'title' => ['type' => 'text', 'store' => true], ];

$indexParams = ['index' => ['number_of_shards' => 1, 'number_of_replicas' => 0]];

$index = $this->_createIndex();
$type = new Type($index, 'content');

$mapping = Mapping::create($indexMapping);
$mapping->setSource(['excludes' => ['file']]);

$mapping->setType($type);

$index->create($indexParams, true);
$type->setMapping($mapping);

$docId = 1;
$text = 'Basel World';
$title = 'No Title';

$doc1 = new Document($docId);
$doc1->addFile('file', BASE_PATH.'/data/test.docx');
$doc1->set('text', $text);
$doc1->set('title', $title);
$type->addDocument($doc1);

// Optimization necessary, as otherwise source still in realtime get
$index->forcemerge();

$data = $type->getDocument($docId)->getData();
$this->assertEquals($data['title'], $title);
$this->assertEquals($data['text'], $text);
$this->assertFalse(isset($data['file']));
}

/**
* @group functional
* @expectedException \Elastica\Exception\ResponseException
Expand Down
194 changes: 194 additions & 0 deletions test/Elastica/Processor/AttachmentTest.php
Original file line number Diff line number Diff line change
@@ -1,8 +1,11 @@
<?php
namespace Elastica\Test\Processor;

use Elastica\Bulk;
use Elastica\Document;
use Elastica\Processor\Attachment;
use Elastica\Test\BasePipeline as BasePipelineTest;
use Elastica\Type;

class AttachmentTest extends BasePipelineTest
{
Expand Down Expand Up @@ -45,4 +48,195 @@ public function testAttachmentWithNonDefaultOptions()

$this->assertEquals($expected, $processor->toArray());
}

/**
* @group functional
*/
public function testAttachmentAddPdf()
{
$attachment = new Attachment('data');
$pipeline = $this->_createPipeline('my_custom_pipeline_attachment', 'pipeline for Attachment');
$pipeline->addProcessor($attachment);
$pipeline->create();

$index = $this->_createIndex();
$type = $index->getType('bulk_test');

$bulk = new Bulk($index->getClient());
$bulk->setIndex($index);
$bulk->setType($type);

$doc1 = new Document(null);
$doc1->addFile('data', BASE_PATH.'/data/test.pdf');

$doc2 = new Document(2, ['data' => '', 'text' => 'test running in basel']);

$bulk->addDocuments([
$doc1, $doc2
]);
$bulk->setRequestParam('pipeline', 'my_custom_pipeline_attachment');

$bulk->send();
$index->refresh();

$resultSet = $type->search('xodoa');
$this->assertEquals(1, $resultSet->count());

$resultSet = $type->search('test');
$this->assertEquals(2, $resultSet->count());

// Author is ruflin
$resultSet = $type->search('ruflin');
$this->assertEquals(1, $resultSet->count());

// String does not exist in file
$resultSet = $type->search('guschti');
$this->assertEquals(0, $resultSet->count());
}

/**
* @group functional
*/
public function testAttachmentAddPdfFileContent()
{
$attachment = new Attachment('data');
$pipeline = $this->_createPipeline('my_custom_pipeline_attachment', 'pipeline for Attachment');
$pipeline->addProcessor($attachment);
$pipeline->create();

$index = $this->_createIndex();
$type = $index->getType('bulk_test');

$bulk = new Bulk($index->getClient());
$bulk->setIndex($index);
$bulk->setType($type);

$doc1 = new Document(null);
$doc1->addFile('data', BASE_PATH.'/data/test.pdf');
$doc1->set('text', 'basel world');

$doc2 = new Document(2, ['data' => '', 'text' => 'test running in basel']);
$doc2->set('text', 'running in basel');

$bulk->addDocuments([
$doc1, $doc2
]);
$bulk->setRequestParam('pipeline', 'my_custom_pipeline_attachment');

$bulk->send();
$index->forcemerge();

$resultSet = $type->search('xodoa');
$this->assertEquals(1, $resultSet->count());

$resultSet = $type->search('basel');
$this->assertEquals(2, $resultSet->count());

// Author is ruflin
$resultSet = $type->search('ruflin');
$this->assertEquals(1, $resultSet->count());

// String does not exist in file
$resultSet = $type->search('guschti');
$this->assertEquals(0, $resultSet->count());
}

/**
* @group functional
*/
public function testAddWordxFile()
{
$attachment = new Attachment('data');
$pipeline = $this->_createPipeline('my_custom_pipeline_attachment', 'pipeline for Attachment');
$pipeline->addProcessor($attachment);
$pipeline->create();

$index = $this->_createIndex();
$type = $index->getType('bulk_test');

$bulk = new Bulk($index->getClient());
$bulk->setIndex($index);
$bulk->setType($type);

$doc1 = new Document(null);
$doc1->addFile('data', BASE_PATH.'/data/test.docx');
$doc1->set('text', 'basel world');

$doc2 = new Document(2, ['data' => '', 'text' => 'test running in basel']);

$bulk->addDocuments([
$doc1, $doc2
]);
$bulk->setRequestParam('pipeline', 'my_custom_pipeline_attachment');

$bulk->send();
$index->refresh();

$resultSet = $type->search('basel');
$this->assertEquals(2, $resultSet->count());

$resultSet = $type->search('ruflin');
$this->assertEquals(0, $resultSet->count());

$resultSet = $type->search('Xodoa');
$this->assertEquals(1, $resultSet->count());

// String does not exist in file
$resultSet = $type->search('guschti');
$this->assertEquals(0, $resultSet->count());
}

/**
* @group functional
*/
public function testExcludeFileSource()
{
$attachment = new Attachment('data');
$pipeline = $this->_createPipeline('my_custom_pipeline_attachment', 'pipeline for Attachment');
$pipeline->addProcessor($attachment);
$pipeline->create();

$indexMapping = ['data' => ['type' => 'text'], 'text' => ['type' => 'text', 'store' => true],
'title' => ['type' => 'text', 'store' => true], ];

$indexParams = ['index' => ['number_of_shards' => 1, 'number_of_replicas' => 0]];

$index = $this->_createIndex();
$type = new Type($index, 'content');

$mapping = Type\Mapping::create($indexMapping);
$mapping->setSource(['excludes' => ['data']]);

$mapping->setType($type);

$index->create($indexParams, true);
$type->setMapping($mapping);

$docId = 1;
$text = 'Basel World';
$title = 'No Title';

$doc1 = new Document($docId);
$doc1->set('text', $text);
$doc1->set('title', $title);
$doc1->addFile('data', BASE_PATH.'/data/test.docx');

$bulk = new Bulk($index->getClient());
$bulk->setIndex($index);
$bulk->setType($type);

$bulk->addDocuments([
$doc1
]);
$bulk->setRequestParam('pipeline', 'my_custom_pipeline_attachment');

// Optimization necessary, as otherwise source still in realtime get
$bulk->send();
$index->forcemerge();

$data = $type->getDocument($docId)->getData();
$this->assertEquals($data['title'], $title);
$this->assertEquals($data['text'], $text);
$this->assertFalse(isset($data['file']));
}
}

0 comments on commit ca47cd2

Please sign in to comment.