Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

FileFetcher should always reflect use-local-file config #4035

Merged
merged 38 commits into from
Nov 3, 2023
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
Show all changes
38 commits
Select commit Hold shift + click to select a range
af9a9e1
basic first start
paul-m Oct 3, 2023
586bf99
Merge branch '2.x' into importinfo-refactor
paul-m Oct 4, 2023
70e470f
Merge branch '2.x' into 17093-importinfo-refactor
paul-m Oct 6, 2023
6868b75
Merge branch '2.x' into 17093-importinfo-refactor
paul-m Oct 6, 2023
6ef337c
file fetcher has a hashed ID now
paul-m Oct 6, 2023
8cf51e3
un-cs
paul-m Oct 6, 2023
4011da6
big test gets bigger
paul-m Oct 6, 2023
cd6b3f9
Merge branch '2.x' into 17093-importinfo-refactor
paul-m Oct 8, 2023
50b8660
foo
paul-m Oct 10, 2023
28bc032
subclass FileFetcher
paul-m Oct 12, 2023
889b66c
even more read-only filefetcher
paul-m Oct 13, 2023
e53a694
use filefetcherfactory when getting a filefetcher.
paul-m Oct 14, 2023
4207024
not so great
paul-m Oct 15, 2023
76eca86
back to basics
paul-m Oct 15, 2023
0a94200
revert mapping changes to datastoreservice
paul-m Oct 16, 2023
bc832c1
dictionaryenforcertest needs help
paul-m Oct 16, 2023
bb0f008
still some to go
paul-m Oct 16, 2023
c6ff8bc
more whimsy
paul-m Oct 17, 2023
b3cdf1c
Merge branch '2.x' into 17093-importinfo-refactor
paul-m Oct 24, 2023
8ac73f5
some cleanup
paul-m Oct 24, 2023
4603181
cs
paul-m Oct 24, 2023
614db67
more phail
paul-m Oct 25, 2023
3c4e564
passing
paul-m Oct 25, 2023
317c248
cleanup
paul-m Oct 25, 2023
f574ae0
search_api
paul-m Oct 25, 2023
2fe7b13
revert dkanfilefetcher
paul-m Oct 25, 2023
fffbbf5
trying to make ci happy
paul-m Oct 26, 2023
43b3d00
case-sensitive
paul-m Oct 26, 2023
a691824
back to versioned release of procrastinator
paul-m Oct 27, 2023
b0f24a0
revert some of dictionaryenforcertest
paul-m Oct 27, 2023
c346dfe
refactor for cognitive complexity
paul-m Oct 29, 2023
1b7d852
new release of filefetcher
paul-m Oct 30, 2023
415a643
cleanup
paul-m Oct 30, 2023
8b4967f
revert search_api constraint
paul-m Oct 30, 2023
e8f0c0c
Merge branch '2.x' into 17093-importinfo-refactor
paul-m Nov 1, 2023
f4053fe
Document DkanFileFetcher
dafeder Nov 3, 2023
d4f6a44
changed test name
paul-m Nov 3, 2023
f4b7bb2
CS
paul-m Nov 3, 2023
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
2 changes: 1 addition & 1 deletion composer.json
Original file line number Diff line number Diff line change
Expand Up @@ -15,7 +15,7 @@
"fmizzell/maquina": "^1.1.1",
"getdkan/contracts": "^1.0.0",
"getdkan/csv-parser": "^1.3.0",
"getdkan/file-fetcher" : "^4.2.1",
"getdkan/file-fetcher" : "^4.2.2",
"getdkan/harvest": "^1.0.0",
"getdkan/procrastinator": "^4.0.0",
"getdkan/rooted-json-data": "^0.1",
Expand Down
10 changes: 5 additions & 5 deletions modules/common/src/DataResource.php
Original file line number Diff line number Diff line change
Expand Up @@ -395,16 +395,16 @@ private static function getDistribution($identifier) {
*/
public function generateChecksum() {
try {
$this->checksum = md5_file($this->filePath);
$this->checksum = md5_file($this->getFilePath());
}
catch (\Throwable $throwable) {
// Re-throw the throwable if we're not in the perspective of a local file
// that doesn't exist. It's valid for a local file to not exist in some
// circumstances.
if (
$this->getPerspective() !== ResourceLocalizer::LOCAL_FILE_PERSPECTIVE ||
!file_exists($this->filePath)
) {
if (!(
$this->getPerspective() === ResourceLocalizer::LOCAL_FILE_PERSPECTIVE &&
!file_exists($this->getFilePath())
)) {
throw $throwable;
}
}
Expand Down
76 changes: 76 additions & 0 deletions modules/common/src/FileFetcher/DkanFileFetcher.php
Original file line number Diff line number Diff line change
@@ -0,0 +1,76 @@
<?php

namespace Drupal\common\FileFetcher;

use FileFetcher\FileFetcher;

/**
* Allows FileFetcher to be reconfigured for using existing local files.
*
* This DKAN-specific extension of the FileFetcher (which comes from an
* external library) applies the DKAN configuration
* common.settings.always_use_existing_local_perspective
* when selecting the processor. The configuration itself is retrieved
* in FileFetcherFactory and passed to DkanFileFetcher on getInstance().
*/
class DkanFileFetcher extends FileFetcher {

/**
* Tell this file fetcher whether to use local files if they exist.
*
* @param bool $use_local_file
* (optional) Whether to use the local file. If TRUE, we'll use the file
* processor that prefers to use local files. Defaults to TRUE.
*
* @return self
* Fluent interface.
*
* @see https://dkan.readthedocs.io/en/2.x/user-guide/guide_local_files.html
*/
public function setAlwaysUseExistingLocalPerspective(bool $use_local_file = TRUE) : self {
// @todo Re-computing the custom processor classes should be in another
// method that is in the parent class.
if ($use_local_file) {
$this->dkanUseLocalFileProcessor();
}
else {
$this->dkanUseDefaultFileProcessor();
}
return $this;
}

/**
* Configure the processor to respect the local file if it already exists.
*/
protected function dkanUseLocalFileProcessor() {
// Set the state/config to use our remote class.
$this->setProcessors(['processors' => [FileFetcherRemoteUseExisting::class]]);
$this->setStateProperty('processor', FileFetcherRemoteUseExisting::class);
// At this very early stage, update the status if the file already exists.
/** @var \Drupal\common\FileFetcher\FileFetcherRemoteUseExisting $processor */
$processor = $this->getProcessor();
$existing_status = $processor->discoverStatusForExistingFile(
$this->getState(),
$this->getResult()
);
$this->setState($existing_status['state']);
}

/**
* Configure the processor to use its default behavior.
*/
protected function dkanUseDefaultFileProcessor() {
// @todo This ignores any other custom processor classes that might have
// been configured. Improve this situation.
$this->customProcessorClasses = [];
$state = $this->getState();
foreach ($this->getProcessors() as $processor) {
if ($processor->isServerCompatible($state)) {
$state['processor'] = get_class($processor);
break;
}
}
$this->getResult()->setData(json_encode($state));
}

}
13 changes: 7 additions & 6 deletions modules/common/src/FileFetcher/FileFetcherFactory.php
Original file line number Diff line number Diff line change
Expand Up @@ -49,16 +49,17 @@ public function __construct(JobStoreFactory $jobStoreFactory, ConfigFactoryInter
*/
public function getInstance(string $identifier, array $config = []) {
$config = array_merge($this->configDefault, $config);
// Use our bespoke file fetcher class that uses the existing file if we're
// configured to do so.
if ($this->dkanConfig->get('always_use_existing_local_perspective') ?? FALSE) {
$config['processors'] = [FileFetcherRemoteUseExisting::class];
}
return FileFetcher::get(
$file_fetcher = DkanFileFetcher::get(
$identifier,
$this->jobStoreFactory->getInstance(FileFetcher::class),
$config
);
// Inject our special configuration into the file fetcher, so it can use
// local files rather than re-downloading them.
$file_fetcher->setAlwaysUseExistingLocalPerspective(
(bool) $this->dkanConfig->get('always_use_existing_local_perspective')
);
return $file_fetcher;
}

}
23 changes: 21 additions & 2 deletions modules/common/src/FileFetcher/FileFetcherRemoteUseExisting.php
Original file line number Diff line number Diff line change
Expand Up @@ -21,12 +21,31 @@ class FileFetcherRemoteUseExisting extends Remote {
*/
public function copy(array $state, Result $result, int $timeLimit = PHP_INT_MAX): array {
// Always short-circuit if the file already exists.
$existing_status = $this->discoverStatusForExistingFile($state, $result);
if ($existing_status['result']->getStatus() === Result::DONE) {
return $existing_status;
}
return parent::copy($state, $result, $timeLimit);
}

/**
* Check for the existing file, setting state and result appropriately.
*
* @param array $state
* State.
* @param \Procrastinator\Result $result
* Result object.
*
* @return array
* Array of $state and $result. Appropriate for return from
* ProcessorInterface::copy.
*/
public function discoverStatusForExistingFile(array $state, Result $result): array {
if (file_exists($state['destination'])) {
$state['total_bytes_copied'] = $state['total_bytes'] = $this->getFilesize($state['destination']);
$result->setStatus(Result::DONE);
return ['state' => $state, 'result' => $result];
}
return parent::copy($state, $result, $timeLimit);
return ['state' => $state, 'result' => $result];
}

}
Original file line number Diff line number Diff line change
@@ -0,0 +1,52 @@
<?php

namespace Drupal\Tests\common\Unit\FileFetcher;

use Drupal\common\FileFetcher\FileFetcherRemoteUseExisting;
use org\bovigo\vfs\vfsStream;
use PHPUnit\Framework\TestCase;
use Procrastinator\Result;

/**
* @covers \Drupal\common\FileFetcher\FileFetcherRemoteUseExisting
* @coversDefaultClass \Drupal\common\FileFetcher\FileFetcherRemoteUseExisting
*/
class FileFetcherRemoteUseExistingTest extends TestCase {

/**
* @covers ::copy
*/
public function testCopy() {
$result = new Result();
$remote = new FileFetcherRemoteUseExisting();

// Set up a file system.
$root = vfsStream::setup();
$file = $root->url() . '/nine_bytes.csv';
$file_contents = '0123,4567';

// Config for processor.
$state = [
'destination' => $file,
'source' => 'https://example.com/bad_path.csv',
];

// Run it for error condition because the file doesn't already exist, so it
// will try to copy the source, but the source URL is bad.
$result_state = $remote->copy($state, $result);
$this->assertEquals(Result::ERROR, $result_state['result']->getStatus());

// Add existing file contents.
file_put_contents($file, $file_contents);

// Run it for re-use of existing file. This will succeed because the file
// is there.
$result_state = $remote->copy($state, $result);

$this->assertEquals(Result::DONE, $result_state['result']->getStatus());
$this->assertEquals(9, $result_state['state']['total_bytes']);
$this->assertEquals(9, $result_state['state']['total_bytes_copied']);
$this->assertStringEqualsFile($file, $file_contents);
}

}
109 changes: 72 additions & 37 deletions modules/datastore/tests/src/Functional/DictionaryEnforcerTest.php
Original file line number Diff line number Diff line change
Expand Up @@ -6,23 +6,33 @@

use Drupal\datastore\Controller\ImportController;
use Drupal\metastore\DataDictionary\DataDictionaryDiscovery;
use Drupal\Tests\BrowserTestBase;
use Drupal\Tests\common\Traits\CleanUp;
use Drupal\Tests\common\Traits\GetDataTrait;
use Drupal\Tests\metastore\Unit\MetastoreServiceTest;

use RootedData\RootedJsonData;
use Symfony\Component\HttpFoundation\Request;
use weitzman\DrupalTestTraits\ExistingSiteBase;

/**
* DictionaryEnforcer QueueWorker test.
*
* @package Drupal\Tests\datastore\Functional
* @group datastore
* @group functional
* @group btb
*/
class DictionaryEnforcerTest extends ExistingSiteBase {
class DictionaryEnforcerTest extends BrowserTestBase {

use GetDataTrait, CleanUp;

protected $defaultTheme = 'stark';

protected static $modules = [
'datastore',
'node',
];

/**
* Uploaded resource file destination.
*
Expand Down Expand Up @@ -84,7 +94,7 @@ class DictionaryEnforcerTest extends ExistingSiteBase {
*
* @var \Drupal\datastore\Controller\ImportController
*/
protected $webServiceApi;
protected $importController;

/**
* External URL for the fixture CSV file.
Expand All @@ -100,30 +110,25 @@ public function setUp(): void {
parent::setUp();

// Initialize services.
$this->cron = \Drupal::service('cron');
$this->metastore = \Drupal::service('dkan.metastore.service');
$this->uuid = \Drupal::service('uuid');
$this->cron = $this->container->get('cron');
$this->metastore = $this->container->get('dkan.metastore.service');
$this->uuid = $this->container->get('uuid');
$this->validMetadataFactory = MetastoreServiceTest::getValidMetadataFactory($this);
$this->webServiceApi = ImportController::create(\Drupal::getContainer());
$this->datasetStorage = \Drupal::service('dkan.metastore.storage')
$this->importController = ImportController::create(\Drupal::getContainer());
$this->datasetStorage = $this->container->get('dkan.metastore.storage')
->getInstance('dataset');
// Copy resource file to uploads directory.
/** @var \Drupal\Core\File\FileSystemInterface $file_system */
$file_system = \Drupal::service('file_system');
$file_system = $this->container->get('file_system');
$upload_path = $file_system->realpath(self::UPLOAD_LOCATION);
$file_system->prepareDirectory($upload_path, FileSystemInterface::CREATE_DIRECTORY);
$file_system->copy(self::TEST_DATA_PATH . self::RESOURCE_FILE, $upload_path, FileSystemInterface::EXISTS_REPLACE);
// Create resource URL.
$this->resourceUrl = \Drupal::service('stream_wrapper_manager')
$this->resourceUrl = $this->container->get('stream_wrapper_manager')
->getViaUri(self::UPLOAD_LOCATION . self::RESOURCE_FILE)
->getExternalUrl();
}

public function tearDown(): void {
parent::tearDown();
$this->removeAllMappedFiles();
}

/**
* Test dictionary enforcement.
*/
Expand Down Expand Up @@ -177,40 +182,52 @@ public function testDictionaryEnforcement(): void {
];
$data_dict = $this->validMetadataFactory->get($this->getDataDictionary($fields, $indexes, $dict_id), 'data-dictionary');
// Create data-dictionary.
$this->metastore->post('data-dictionary', $data_dict);
$this->metastore->publish('data-dictionary', $dict_id);
$this->assertEquals(
$dict_id,
$this->metastore->post('data-dictionary', $data_dict)
);
// Publish should return FALSE, because the node was already published.
$this->assertFalse($this->metastore->publish('data-dictionary', $dict_id));

// Set global data-dictinary in metastore config.
$metastore_config = \Drupal::configFactory()
->getEditable('metastore.settings');
$metastore_config->set('data_dictionary_mode', DataDictionaryDiscovery::MODE_SITEWIDE);
$metastore_config->set('data_dictionary_sitewide', $dict_id);
$metastore_config->save();
$metastore_config = $this->config('metastore.settings');
$metastore_config->set('data_dictionary_mode', DataDictionaryDiscovery::MODE_SITEWIDE)
->set('data_dictionary_sitewide', $dict_id)
->save();

// Build dataset.
$dataset_id = $this->uuid->generate();
$dataset = $this->validMetadataFactory->get($this->getDataset($dataset_id, 'Test ' . $dataset_id, [$this->resourceUrl], TRUE), 'dataset');
$this->assertInstanceOf(
RootedJsonData::class,
$dataset = $this->validMetadataFactory->get(
$this->getDataset($dataset_id, 'Test ' . $dataset_id, [$this->resourceUrl], TRUE),
'dataset'
)
);
// Create dataset.
$this->metastore->post('dataset', $dataset);
$this->metastore->publish('dataset', $dataset_id);
$this->assertEquals($dataset_id, $this->metastore->post('dataset', $dataset));
// Publish should return FALSE, because the node was already published.
$this->assertFalse($this->metastore->publish('dataset', $dataset_id));

// Run cron to import dataset into datastore.
$this->cron->run();
// Run cron to apply data-dictionary.
$this->cron->run();
// Run queue items to perform the import.
$this->runQueues(['datastore_import', 'post_import']);

// Retrieve dataset distribution ID.
$dataset = $this->metastore->get('dataset', $dataset_id);
$dist_id = $dataset->{'$["%Ref:distribution"][0].identifier'};
// Build mock request.
$request = Request::create('http://blah/api');
$this->assertInstanceOf(
RootedJsonData::class,
$dataset = $this->metastore->get('dataset', $dataset_id)
);
$this->assertNotEmpty(
$dist_id = $dataset->{'$["%Ref:distribution"][0].identifier'} ?? NULL
);
// Retrieve schema for dataset resource.
$response = $this->webServiceApi->summary($dist_id, $request);
$response = $this->importController->summary(
$dist_id,
Request::create('http://blah/api')
);
$this->assertEquals(200, $response->getStatusCode(), $response->getContent());
$result = json_decode($response->getContent(), TRUE);

// Clean up after ourselves, before performing the assertion.
$this->metastore->delete('dataset', $dataset_id);

// Validate schema.
$this->assertEquals([
'numOfColumns' => 6,
Expand Down Expand Up @@ -266,4 +283,22 @@ public function testDictionaryEnforcement(): void {
], $result);
}

/**
* Process queues in a predictable order.
*/
private function runQueues(array $relevantQueues = []) {
/** @var \Drupal\Core\Queue\QueueWorkerManager $queueWorkerManager */
$queueWorkerManager = \Drupal::service('plugin.manager.queue_worker');
/** @var \Drupal\Core\Queue\QueueFactory $queueFactory */
$queueFactory = $this->container->get('queue');
foreach ($relevantQueues as $queueName) {
$worker = $queueWorkerManager->createInstance($queueName);
$queue = $queueFactory->get($queueName);
while ($item = $queue->claimItem()) {
$worker->processItem($item->data);
$queue->deleteItem($item);
}
}
}

}
Loading