Skip to content

Commit

Permalink
WCMS-16115: Apply HTML filtering to only properties that are configur…
Browse files Browse the repository at this point in the history
…ed to allow HTML. (#4013)
  • Loading branch information
jastraat authored Sep 18, 2023
1 parent 6d7af6f commit 447a605
Show file tree
Hide file tree
Showing 16 changed files with 342 additions and 33 deletions.
2 changes: 1 addition & 1 deletion cypress/integration/09_admin_links.spec.js
Original file line number Diff line number Diff line change
Expand Up @@ -9,7 +9,7 @@ context('Administration pages', () => {
it('I should see a link for the dataset properties configuration', () => {
cy.get('.toolbar-icon-system-admin-dkan').contains('DKAN').next('.toolbar-menu').then($el=>{
cy.wrap($el).invoke('show')
cy.wrap($el).contains('Metastore referencer')
cy.wrap($el).contains('Metastore configuration')
})
cy.visit(baseurl + "/admin/dkan/properties")
cy.get('.option').should('contain.text', 'Distribution (distribution)')
Expand Down
1 change: 1 addition & 0 deletions modules/metastore/config/install/metastore.settings.yml
Original file line number Diff line number Diff line change
Expand Up @@ -19,4 +19,5 @@ property_list:
'spatial': '0'
'temporal': '0'
'isPartOf': '0'
html_allowed_properties: []
resource_perspective_display: source
8 changes: 7 additions & 1 deletion modules/metastore/config/schema/metastore.schema.yml
Original file line number Diff line number Diff line change
Expand Up @@ -14,6 +14,12 @@ metastore.settings:
sequence:
type: string
label: 'Property'
html_allowed_properties:
type: sequence
label: 'HTML Allowed Properties'
sequence:
type: string
label: 'Property'
resource_perspective_display:
type: string
label: 'Resource download url display'
label: 'Resource download url display'
4 changes: 2 additions & 2 deletions modules/metastore/metastore.links.menu.yml
Original file line number Diff line number Diff line change
@@ -1,7 +1,7 @@
dkan.metastore.config_properties:
title: Metastore referencer
title: Metastore configuration
route_name: dkan.metastore.config_properties
description: Configure dataset properties for referencing an API endpoint.
description: Configure dataset properties for referencing sub-schemas and for HTML sanitization.
parent: system.admin_dkan
weight: 14

Expand Down
12 changes: 12 additions & 0 deletions modules/metastore/metastore.post_update.php
Original file line number Diff line number Diff line change
@@ -0,0 +1,12 @@
<?php

/**
* @file
* Post update functions for Metastore module.
*/

/**
* Trigger clear cache to pick up admin menu update.
*/
function metastore_post_update_menu_clear_cache(&$sandbox = NULL) {
}
2 changes: 1 addition & 1 deletion modules/metastore/metastore.routing.yml
Original file line number Diff line number Diff line change
Expand Up @@ -153,7 +153,7 @@ dkan.metastore.config_properties:
path: '/admin/dkan/properties'
defaults:
_form: '\Drupal\metastore\Form\DkanDataSettingsForm'
_title: 'Metastore referencer configuration'
_title: 'Metastore configuration'
requirements:
_permission: 'access administration pages'
options:
Expand Down
1 change: 1 addition & 0 deletions modules/metastore/metastore.services.yml
Original file line number Diff line number Diff line change
Expand Up @@ -33,6 +33,7 @@ services:
class: \Drupal\metastore\Storage\DataFactory
arguments:
- '@entity_type.manager'
- '@config.factory'

dkan.metastore.referencer:
class: \Drupal\metastore\Reference\Referencer
Expand Down
26 changes: 19 additions & 7 deletions modules/metastore/src/Form/DkanDataSettingsForm.php
Original file line number Diff line number Diff line change
Expand Up @@ -80,20 +80,31 @@ public function getFormId() {
*/
public function buildForm(array $form, FormStateInterface $form_state) {
$config = $this->config('metastore.settings');
$options = $this->schemaHelper->retrieveSchemaProperties('dataset');
$default_values = $config->get('property_list');

$form['description'] = [
'#markup' => $this->t(
'Select properties from the dataset schema to be available as individual objects.
Each property will be assigned a unique identifier in addition to its original schema value.'
'Configure the metastore settings.'
),
];

$form['html_allowed_properties'] = [
'#type' => 'checkboxes',
'#title' => $this->t('Properties that allow HTML'),
'#description' => $this->t('Metadata properties that may contain HTML elements.'),
'#options' => $this->schemaHelper->retrieveStringSchemaProperties(),
'#default_value' => $config->get('html_allowed_properties') ?:
['dataset_description', 'distribution_description'],
];

$form['property_list'] = [
'#type' => 'checkboxes',
'#title' => $this->t('Dataset properties'),
'#options' => $options,
'#default_value' => $default_values,
'#title' => $this->t('Dataset properties to be stored as separate entities; use caution'),
'#description' => $this->t('Select properties from the dataset schema to be available as individual objects.
Each property will be assigned a unique identifier in addition to its original schema value.'),
'#options' => $this->schemaHelper->retrieveSchemaProperties(),
'#default_value' => $config->get('property_list'),
];

return parent::buildForm($form, $form_state);
}

Expand All @@ -107,6 +118,7 @@ public function submitForm(array &$form, FormStateInterface $form_state) {

$this->config('metastore.settings')
->set('property_list', $form_state->getValue('property_list'))
->set('html_allowed_properties', $form_state->getValue('html_allowed_properties'))
->save();

// Rebuild routes, without clearing all caches.
Expand Down
86 changes: 85 additions & 1 deletion modules/metastore/src/SchemaPropertiesHelper.php
Original file line number Diff line number Diff line change
Expand Up @@ -38,7 +38,7 @@ public function __construct(SchemaRetriever $schemaRetriever) {
}

/**
* Retrieve schema properties.
* Retrieve dataset schema properties.
*
* @return array
* List of schema properties' title and description.
Expand All @@ -62,4 +62,88 @@ public function retrieveSchemaProperties(): array {
return $property_list;
}

/**
* Retrieve all string schema properties.
*
* @return array
* List of schema properties' title and description.
*/
public function retrieveStringSchemaProperties(): array {
// Create a json object from our schema.
$schema = $this->schemaRetriever->retrieve('dataset');
$schema_object = json_decode($schema);

return $this->buildPropertyList($schema_object->properties);
}

/**
* Build a list of JSON schema properties.
*
* @param object $input
* JSON Schema object we're parsing.
* @param string $parent
* Parent object.
* @param array $property_list
* Array we're building of schema properties.
*
* @return array
* List of schema properties' title and description.
*
* @see https://json-schema.org/understanding-json-schema/reference/object.html#properties
*/
private function buildPropertyList($input, string $parent = 'dataset', array &$property_list = []): array {
foreach ($input as $name => $property) {
$this->parseProperty($name, $property, $parent, $property_list);
}
return $property_list;
}

/**
* Parse a single property from a JSON schema.
*
* @param string $name
* Property name.
* @param mixed $property
* JSON schema "property" object.
* @param string $parent
* The parent JSON Schema propety of the current property.
* @param array $property_list
* Array we're building of schema properties.
*/
private function parseProperty(string $name, $property, string $parent, array &$property_list) {
// Exclude properties starting with @ or that are not proper objects.
if (substr($name, 0, 1) == '@' || gettype($property) != 'object' || !isset($property->type)) {
return;
}

// Strings can be added directly to the list.
if ($property->type == 'string') {
$title = isset($property->title) ? $property->title . ' (' . $name . ')' : ucfirst($name);
$property_list[$parent . '_' . $name] = ucfirst($parent) . ': ' . $title;
}
// Non-strings (arrays and objects) can be parsed for nested properties.
else {
$this->parseNestedProperties($name, $property, $property_list);
}
}

/**
* Parse nested schema properties.
*
* @param string $name
* Property ID.
* @param object $property
* JSON Schema "property" object we're parsing.
* @param array $property_list
* Array we're building of schema properties.
*/
private function parseNestedProperties(string $name, $property, array &$property_list = []) {
if (isset($property->properties) && gettype($property->properties == 'object')) {
$property_list = $this->buildPropertyList($property->properties, $name, $property_list);
}
elseif (isset($property->items) && gettype($property->items) == 'object' && isset($property->items->properties)) {
$property_list = $this->buildPropertyList($property->items->properties, $name, $property_list);
}
}

}
33 changes: 26 additions & 7 deletions modules/metastore/src/Storage/Data.php
Original file line number Diff line number Diff line change
Expand Up @@ -3,6 +3,7 @@
namespace Drupal\metastore\Storage;

use Drupal\common\LoggerTrait;
use Drupal\Core\Config\ConfigFactoryInterface;
use Drupal\Core\Entity\ContentEntityInterface;
use Drupal\Core\Entity\EntityPublishedInterface;
use Drupal\Core\Entity\EntityTypeManagerInterface;
Expand Down Expand Up @@ -84,13 +85,21 @@ abstract class Data implements MetastoreEntityStorageInterface {
*/
protected $schemaIdField;

/**
* The config factory.
*
* @var \Drupal\Core\Config\ConfigFactoryInterface
*/
protected $configFactory;

/**
* Constructor.
*/
public function __construct(string $schemaId, EntityTypeManagerInterface $entityTypeManager) {
public function __construct(string $schemaId, EntityTypeManagerInterface $entityTypeManager, ConfigFactoryInterface $config_factory) {
$this->entityTypeManager = $entityTypeManager;
$this->entityStorage = $this->entityTypeManager->getStorage($this->entityType);
$this->schemaId = $schemaId;
$this->configFactory = $config_factory;
}

/**
Expand Down Expand Up @@ -303,7 +312,7 @@ public function remove(string $uuid) {
public function store($data, string $uuid = NULL): string {
$data = json_decode($data);

$data = $this->filterHtml($data);
$data = $this->filterHtml($data, $this->schemaId);

$uuid = (!$uuid && isset($data->identifier)) ? $data->identifier : $uuid;

Expand Down Expand Up @@ -365,7 +374,7 @@ private function updateExistingEntity(ContentEntityInterface $entity, $data): ?s
private function createNewEntity(string $uuid, $data) {
$title = '';
if ($this->schemaId === 'dataset') {
$title = isset($data->title) ? $data->title : $data->name;
$title = $data->title ?? $data->name;
}
else {
$title = MetastoreService::metadataHash($data->data);
Expand Down Expand Up @@ -393,20 +402,30 @@ private function createNewEntity(string $uuid, $data) {
*
* @param mixed $input
* Unfiltered input.
* @param string $parent
* The parent schema of a given property.
*
* @return mixed
* Filtered output.
*/
private function filterHtml($input) {
// @todo find out if we still need it.
private function filterHtml($input, string $parent = 'dataset') {
$html_allowed = $this->configFactory->get('metastore.settings')->get('html_allowed_properties')
?: ['dataset_description', 'distribution_description'];
switch (gettype($input)) {
case "string":
return $this->htmlPurifier($input);

case "array":
case "object":
foreach ($input as &$value) {
$value = $this->filterHtml($value);
foreach ($input as $name => &$value) {
// Only apply filtering to properties that allow HTML.
if (in_array($parent . '_' . $name, $html_allowed)) {
$value = $this->filterHtml($value, $name);
}
// Nested properties; check using parent.
elseif ($name == 'data' && gettype($value) == 'object') {
$value = $this->filterHtml($value, $parent);
}
}
return $input;

Expand Down
13 changes: 11 additions & 2 deletions modules/metastore/src/Storage/DataFactory.php
Original file line number Diff line number Diff line change
Expand Up @@ -3,6 +3,7 @@
namespace Drupal\metastore\Storage;

use Contracts\FactoryInterface;
use Drupal\Core\Config\ConfigFactoryInterface;
use Drupal\Core\Entity\EntityTypeManager;

/**
Expand All @@ -24,11 +25,19 @@ class DataFactory implements FactoryInterface {
*/
private $entityTypeManager;

/**
* The config factory.
*
* @var \Drupal\Core\Config\ConfigFactoryInterface
*/
protected $configFactory;

/**
* Constructor.
*/
public function __construct(EntityTypeManager $entityTypeManager) {
public function __construct(EntityTypeManager $entityTypeManager, ConfigFactoryInterface $config_factory) {
$this->entityTypeManager = $entityTypeManager;
$this->configFactory = $config_factory;
}

/**
Expand Down Expand Up @@ -79,7 +88,7 @@ private function getEntityTypeBySchema(string $schema_id) : string {
* Storage object.
*/
protected function createNodeInstance(string $identifier) {
return new NodeData($identifier, $this->entityTypeManager);
return new NodeData($identifier, $this->entityTypeManager, $this->configFactory);
}

/**
Expand Down
5 changes: 3 additions & 2 deletions modules/metastore/src/Storage/NodeData.php
Original file line number Diff line number Diff line change
Expand Up @@ -2,6 +2,7 @@

namespace Drupal\metastore\Storage;

use Drupal\Core\Config\ConfigFactoryInterface;
use Drupal\Core\Entity\EntityTypeManagerInterface;

/**
Expand All @@ -12,14 +13,14 @@ class NodeData extends Data {
/**
* NodeData constructor.
*/
public function __construct(string $schemaId, EntityTypeManagerInterface $entityTypeManager) {
public function __construct(string $schemaId, EntityTypeManagerInterface $entityTypeManager, ConfigFactoryInterface $config_factory) {
$this->entityType = 'node';
$this->bundle = 'data';
$this->bundleKey = "type";
$this->labelKey = "title";
$this->schemaIdField = "field_data_type";
$this->metadataField = "field_json_metadata";
parent::__construct($schemaId, $entityTypeManager);
parent::__construct($schemaId, $entityTypeManager, $config_factory);
}

/**
Expand Down
Loading

0 comments on commit 447a605

Please sign in to comment.