Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Task/rdmp 254 ordering insert #2019

Draft
wants to merge 10 commits into
base: develop
Choose a base branch
from
Draft
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
5 changes: 5 additions & 0 deletions CHANGELOG.md
Original file line number Diff line number Diff line change
Expand Up @@ -4,6 +4,11 @@ All notable changes to this project will be documented in this file.
The format is based on [Keep a Changelog](https://keepachangelog.com/en/1.0.0/),
and this project adheres to [Semantic Versioning](https://semver.org/spec/v2.0.0.html).

## [8.4.0] - Unreleased

- Add Ordering to Filters
- Add ability to optimise inserts for sequential keys during Data Load

## [8.3.1] - Unreleased

- Improve Performance of regenerating problems with child providers
Expand Down
8 changes: 7 additions & 1 deletion Rdmp.Core/CommandExecution/AtomicCommandFactory.cs
Original file line number Diff line number Diff line change
Expand Up @@ -509,7 +509,13 @@ public IEnumerable<IAtomicCommand> CreateCommands(object o)
var reservedTest = lmd.AllowReservedPrefix ? "Drop" : "Allow";
yield return new ExecuteCommandToggleAllowReservedPrefixForLoadMetadata(lmd)
{
OverrideCommandName=$"{reservedTest} Reserved Prefix Columns"
OverrideCommandName = $"{reservedTest} Reserved Prefix Columns",
SuggestedCategory = "Advanced"
};
yield return new ExecuteCommandToggleInsertOrderingPrefixForLoadMetadata(lmd)
{
OverrideCommandName = $"{(lmd.OrderInsertsByPrimaryKey ? "Don't" : "")} Optimise Inserts for Sequential Keys",
SuggestedCategory = "Advanced"
};

yield return new ExecuteCommandSetGlobalDleIgnorePattern(_activator) { SuggestedCategory = Advanced };
Expand Down
Original file line number Diff line number Diff line change
@@ -0,0 +1,30 @@
// Copyright (c) The University of Dundee 2024-2024
// This file is part of the Research Data Management Platform (RDMP).
// RDMP is free software: you can redistribute it and/or modify it under the terms of the GNU General Public License as published by the Free Software Foundation, either version 3 of the License, or (at your option) any later version.
// RDMP is distributed in the hope that it will be useful, but WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for more details.
// You should have received a copy of the GNU General Public License along with RDMP. If not, see <https://www.gnu.org/licenses/>.

using Rdmp.Core.Curation.Data;
using Rdmp.Core.Curation.Data.DataLoad;

namespace Rdmp.Core.CommandExecution.AtomicCommands;

/// <summary>
/// Toggles the LoadMetadata's ability to sort index items by pk before inserting
/// </summary>
public class ExecuteCommandToggleInsertOrderingPrefixForLoadMetadata : BasicCommandExecution
{
private LoadMetadata _loadMetadata;
public ExecuteCommandToggleInsertOrderingPrefixForLoadMetadata([DemandsInitialization("The LoadMetadata to update")] LoadMetadata loadMetadata)
{

_loadMetadata = loadMetadata;
}

public override void Execute()
{
base.Execute();
_loadMetadata.OrderInsertsByPrimaryKey = !_loadMetadata.OrderInsertsByPrimaryKey;
_loadMetadata.SaveToDatabase();
}
}
6 changes: 5 additions & 1 deletion Rdmp.Core/Curation/Data/Aggregation/AggregateFilter.cs
Original file line number Diff line number Diff line change
@@ -1,4 +1,4 @@
// Copyright (c) The University of Dundee 2018-2019
// Copyright (c) The University of Dundee 2018-2024
// This file is part of the Research Data Management Platform (RDMP).
// RDMP is free software: you can redistribute it and/or modify it under the terms of the GNU General Public License as published by the Free Software Foundation, either version 3 of the License, or (at your option) any later version.
// RDMP is distributed in the hope that it will be useful, but WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for more details.
Expand Down Expand Up @@ -35,6 +35,7 @@ public class AggregateFilter : ConcreteFilter, IDisableable
private int? _clonedFromExtractionFilterID;
private int? _associatedColumnInfoID;
private bool _isDisabled;
private int _order;

/// <inheritdoc/>
public override int? ClonedFromExtractionFilter_ID
Expand Down Expand Up @@ -90,6 +91,8 @@ public IEnumerable<AggregateFilterParameter> AggregateFilterParameters
? Repository.GetObjectByID<AggregateFilterContainer>(FilterContainer_ID.Value)
: null;

public override int Order { get => _order; set => SetField(ref _order, value); }

#endregion

public AggregateFilter()
Expand Down Expand Up @@ -121,6 +124,7 @@ internal AggregateFilter(ICatalogueRepository repository, DbDataReader r) : base
Name = r["Name"] as string;
IsMandatory = (bool)r["IsMandatory"];
ClonedFromExtractionFilter_ID = ObjectToNullableInt(r["ClonedFromExtractionFilter_ID"]);
Order = int.Parse(r["Order"].ToString());

var associatedColumnInfo_ID = r["AssociatedColumnInfo_ID"];
if (associatedColumnInfo_ID != DBNull.Value)
Expand Down
5 changes: 3 additions & 2 deletions Rdmp.Core/Curation/Data/ConcreteFilter.cs
Original file line number Diff line number Diff line change
@@ -1,4 +1,4 @@
// Copyright (c) The University of Dundee 2018-2019
// Copyright (c) The University of Dundee 2018-2024
// This file is part of the Research Data Management Platform (RDMP).
// RDMP is free software: you can redistribute it and/or modify it under the terms of the GNU General Public License as published by the Free Software Foundation, either version 3 of the License, or (at your option) any later version.
// RDMP is distributed in the hope that it will be useful, but WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for more details.
Expand All @@ -25,7 +25,7 @@ namespace Rdmp.Core.Curation.Data;
/// <para>ConcreteFilter is used to provide UI editing of an IFilter without having to add persistence / DatabaseEntity logic to IFilter (which would break
/// SpontaneouslyInventedFilters)</para>
/// </summary>
public abstract class ConcreteFilter : DatabaseEntity, IFilter, ICheckable
public abstract class ConcreteFilter : DatabaseEntity, IFilter, ICheckable, IOrderable
{
/// <inheritdoc/>
protected ConcreteFilter(IRepository repository, DbDataReader r) : base(repository, r)
Expand Down Expand Up @@ -100,6 +100,7 @@ public bool IsMandatory
/// <inheritdoc cref="FilterContainer_ID"/>
[NoMappingToDatabase]
public abstract IContainer FilterContainer { get; }
public abstract int Order { get; set; }

#endregion

Expand Down
6 changes: 6 additions & 0 deletions Rdmp.Core/Curation/Data/DataLoad/ILoadMetadata.cs
Original file line number Diff line number Diff line change
Expand Up @@ -61,6 +61,12 @@ public interface ILoadMetadata : INamed, ILoggedActivityRootObject
/// </summary>
bool AllowReservedPrefix { get; }


/// <summary>
/// Optional - Allows Data Load inserts to be ordered by the PK for efficiency when using clustered PKS. Requires access to sys.indexes, INFORMATION_SCHEMA.TABLE_CONSTRAINTS and INFORMATION_SCHEMA.KEY_COLUMN_USAGE
/// </summary>
bool OrderInsertsByPrimaryKey { get; }

/// <summary>
/// List of all the user configured steps in a data load. For example you could have 2 ProcessTasks, one that downloads files from an FTP server and one that loads RAW.
/// </summary>
Expand Down
8 changes: 8 additions & 0 deletions Rdmp.Core/Curation/Data/DataLoad/LoadMetadata.cs
Original file line number Diff line number Diff line change
Expand Up @@ -61,6 +61,7 @@ public class LoadMetadata : DatabaseEntity, ILoadMetadata, IHasDependencies, IHa
private string _folder;
private DateTime? _lastLoadTime;
private bool _allowReservedPrefix;
private bool _orderInsertsByPK;

public string DefaultForLoadingPath = Path.Combine("Data", "ForLoading");
public string DefaultForArchivingPath = Path.Combine("Data", "ForArchiving");
Expand Down Expand Up @@ -91,6 +92,13 @@ public bool AllowReservedPrefix
set => SetField(ref _allowReservedPrefix, value);
}

/// <inheritdoc/>
public bool OrderInsertsByPrimaryKey
{
get => _orderInsertsByPK;
set => SetField(ref _orderInsertsByPK, value);
}

/// <inheritdoc/>
public string LocationOfForLoadingDirectory
{
Expand Down
6 changes: 5 additions & 1 deletion Rdmp.Core/Curation/Data/ExtractionFilter.cs
Original file line number Diff line number Diff line change
@@ -1,4 +1,4 @@
// Copyright (c) The University of Dundee 2018-2019
// Copyright (c) The University of Dundee 2018-2024
// This file is part of the Research Data Management Platform (RDMP).
// RDMP is free software: you can redistribute it and/or modify it under the terms of the GNU General Public License as published by the Free Software Foundation, either version 3 of the License, or (at your option) any later version.
// RDMP is distributed in the hope that it will be useful, but WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for more details.
Expand Down Expand Up @@ -40,6 +40,7 @@ public class ExtractionFilter : ConcreteFilter, IHasDependencies, IInjectKnown<E

private int _extractionInformationID;
private Lazy<ExtractionFilterParameterSet[]> _knownExtractionFilterParameterSets;
private int _order;

/// <summary>
/// The column in the <see cref="Catalogue"/> which is best/most associated with this filter. A filter can query any column in any of the table(s) under
Expand Down Expand Up @@ -133,6 +134,8 @@ internal ExtractionFilter(ICatalogueRepository repository, DbDataReader r)
Description = r["Description"] as string;
Name = r["Name"] as string;
IsMandatory = (bool)r["IsMandatory"];
Order = int.Parse(r["Order"].ToString());


ClearAllInjections();
}
Expand All @@ -154,6 +157,7 @@ public override int? ClonedFromExtractionFilter_ID
set => throw new NotSupportedException(
"ClonedFromExtractionFilter_ID is only supported on lower level filters e.g. DeployedExtractionFilter and AggregateFilter");
}
public override int Order { get => _order; set => SetField(ref _order,value); }

/// <inheritdoc/>
public IHasDependencies[] GetObjectsThisDependsOn()
Expand Down
Original file line number Diff line number Diff line change
@@ -1,4 +1,4 @@
// Copyright (c) The University of Dundee 2018-2019
// Copyright (c) The University of Dundee 2018-2024
// This file is part of the Research Data Management Platform (RDMP).
// RDMP is free software: you can redistribute it and/or modify it under the terms of the GNU General Public License as published by the Free Software Foundation, either version 3 of the License, or (at your option) any later version.
// RDMP is distributed in the hope that it will be useful, but WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for more details.
Expand All @@ -22,6 +22,7 @@ public class SpontaneouslyInventedFilter : ConcreteFilter
{
private readonly MemoryCatalogueRepository _repo;
private readonly ISqlParameter[] _filterParametersIfAny;
private int _order =0;

/// <summary>
/// Creates a new temporary (unsaveable) filter in the given memory <paramref name="repo"/>
Expand Down Expand Up @@ -68,6 +69,8 @@ public SpontaneouslyInventedFilter(MemoryCatalogueRepository repo, IFilter copyF
? _repo.GetObjectByID<IContainer>(FilterContainer_ID.Value)
: null;

public override int Order { get => _order; set => SetField(ref _order, value); }

public override ColumnInfo GetColumnInfoIfExists() => null;

public override IFilterFactory GetFilterFactory() => null;
Expand Down
6 changes: 5 additions & 1 deletion Rdmp.Core/DataExport/Data/DeployedExtractionFilter.cs
Original file line number Diff line number Diff line change
@@ -1,4 +1,4 @@
// Copyright (c) The University of Dundee 2018-2019
// Copyright (c) The University of Dundee 2018-2024
// This file is part of the Research Data Management Platform (RDMP).
// RDMP is free software: you can redistribute it and/or modify it under the terms of the GNU General Public License as published by the Free Software Foundation, either version 3 of the License, or (at your option) any later version.
// RDMP is distributed in the hope that it will be useful, but WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for more details.
Expand Down Expand Up @@ -36,6 +36,7 @@ public class DeployedExtractionFilter : ConcreteFilter

private int? _clonedFromExtractionFilterID;
private int? _filterContainerID;
private int _order;

/// <inheritdoc/>
public override int? ClonedFromExtractionFilter_ID
Expand Down Expand Up @@ -69,6 +70,8 @@ public override int? FilterContainer_ID
? Repository.GetObjectByID<FilterContainer>(FilterContainer_ID.Value)
: null;

public override int Order { get => _order; set => SetField(ref _order, value); }

#endregion

/// <inheritdoc/>
Expand Down Expand Up @@ -138,6 +141,7 @@ internal DeployedExtractionFilter(IDataExportRepository repository, DbDataReader
FilterContainer_ID = null;

ClonedFromExtractionFilter_ID = ObjectToNullableInt(r["ClonedFromExtractionFilter_ID"]);
Order = int.Parse(r["Order"].ToString());
}

/// <summary>
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -6,12 +6,15 @@

using System;
using System.Collections.Generic;
using System.Data;
using System.Linq;
using System.Text;
using FAnsi;
using FAnsi.Connections;
using FAnsi.Discovery;
using FAnsi.Discovery.QuerySyntax;
using MongoDB.Driver;
using NPOI.SS.Formula.Functions;
using Rdmp.Core.DataFlowPipeline;
using Rdmp.Core.DataLoad.Engine.Job;
using Rdmp.Core.DataLoad.Triggers;
Expand Down Expand Up @@ -100,6 +103,37 @@ CrossDatabaseMergeCommandTo..ToTable.Age is null
sbInsert.AppendLine(
$"{columnsToMigrate.DestinationTable.GetFullyQualifiedName()}.{syntax.EnsureWrapped(columnsToMigrate.PrimaryKeys.First().GetRuntimeName())} IS NULL");

if (job.LoadMetadata.OrderInsertsByPrimaryKey && columnsToMigrate.PrimaryKeys.Any())
{
var orderSQL = $@"
SELECT KU.ORDINAL_POSITION AS ORDINAL_POSITION
, COLUMN_NAME, TC.CONSTRAINT_NAME as name, is_descending_key
FROM
INFORMATION_SCHEMA.TABLE_CONSTRAINTS AS TC
INNER JOIN
INFORMATION_SCHEMA.KEY_COLUMN_USAGE AS KU ON TC.CONSTRAINT_TYPE = 'PRIMARY KEY'
AND TC.CONSTRAINT_NAME = KU.CONSTRAINT_NAME
AND KU.TABLE_NAME = '{columnsToMigrate.DestinationTable.GetRuntimeName()}'
LEFT JOIN(select COL_NAME(ic.object_id,ic.column_id) as cn, i.name, is_descending_key FROM sys.indexes i
INNER JOIN sys.data_spaces ds ON i.data_space_id = ds.data_space_id
INNER JOIN sys.index_columns ic ON i.object_id = ic.object_id AND i.index_id = ic.index_id
where is_primary_key=1) SY ON SY.cn = KU.COLUMN_NAME AND SY.name = KU.CONSTRAINT_NAME
ORDER BY ORDINAL_POSITION asc
";
var dt = new DataTable();
dt.BeginLoadData();

using (var orderCmd = server.GetCommand(orderSQL, _managedConnection))
{
orderCmd.CommandTimeout = Timeout;
using var da = server.GetDataAdapter(orderCmd);
da.Fill(dt);
}
var orderList = String.Join(", ", dt.AsEnumerable().Select(row => $"{row[1]} {((bool)row[3]? "DESC":"ASC")}"));
var orderString = $"ORDER BY {orderList}";
sbInsert.Append(orderString);
}

//right at the end of the SELECT
if (columnsToMigrate.DestinationTable.Database.Server.DatabaseType == DatabaseType.MySql)
sbInsert.Append(" FOR UPDATE");
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -141,6 +141,7 @@ CREATE TABLE [dbo].[AggregateFilter](
[AssociatedColumnInfo_ID] [int] NULL,
[ID] [int] IDENTITY(1,1) NOT NULL,
[SoftwareVersion] [nvarchar](50) NOT NULL,
[Order] [int] NOT NULL DEFAULT 0
CONSTRAINT [PK_AggregateFilter] PRIMARY KEY CLUSTERED
(
[ID] ASC
Expand Down Expand Up @@ -464,6 +465,7 @@ CREATE TABLE [dbo].[ExtractionFilter](
[Name] [varchar](100) NOT NULL,
[IsMandatory] [bit] NOT NULL,
[SoftwareVersion] [nvarchar](50) NOT NULL,
[Order] [int] NOT NULL DEFAULT 0
CONSTRAINT [PK_ExtractionFilter] PRIMARY KEY CLUSTERED
(
[ID] ASC
Expand Down Expand Up @@ -599,6 +601,7 @@ CREATE TABLE [dbo].[LoadMetadata](
[CacheArchiveType] [int] NOT NULL,
[SoftwareVersion] [nvarchar](50) NOT NULL,
[AllowReservedPrefix] [bit] NOT NULL default 0,
[OrderInsertsByPrimaryKey] [bit] NOT NULL default 0,
CONSTRAINT [PK_LoadMetadata] PRIMARY KEY CLUSTERED
(
[ID] ASC
Expand Down
Original file line number Diff line number Diff line change
@@ -0,0 +1,13 @@
----Version: 8.4.0
----Description: Add Order to Aggregate Filters

if not exists (select 1 from sys.columns where name = 'Order' and OBJECT_NAME(object_id) = 'AggregateFilter')
BEGIN
ALTER TABLE [dbo].[AggregateFilter]
ADD [Order] [int] NOT NULL DEFAULT 0 WITH VALUES
END
if not exists (select 1 from sys.columns where name = 'Order' and OBJECT_NAME(object_id) = 'ExtractionFilter')
BEGIN
ALTER TABLE [dbo].[ExtractionFilter]
ADD [Order] [int] NOT NULL DEFAULT 0 WITH VALUES
END
Original file line number Diff line number Diff line change
@@ -0,0 +1,8 @@
--Version: 8.4.0
--Description: Add ability to allow data loads to order inserts based on promary key

if not exists (select 1 from sys.columns where name = 'OrderInsertsByPrimaryKey' and OBJECT_NAME(object_id) = 'LoadMetadata')
BEGIN
ALTER TABLE [dbo].[LoadMetadata]
ADD OrderInsertsByPrimaryKey [bit] NOT NULL DEFAULT 0 WITH VALUES
END
Binary file not shown.
Original file line number Diff line number Diff line change
@@ -0,0 +1,8 @@
----Version: 8.4.0
----Description: Add Order to Aggregate Filters

if not exists (select 1 from sys.columns where name = 'Order' and OBJECT_NAME(object_id) = 'DeployedExtractionFilter')
BEGIN
ALTER TABLE [dbo].[DeployedExtractionFilter]
ADD [Order] [int] NOT NULL DEFAULT 0 WITH VALUES
END
6 changes: 6 additions & 0 deletions Rdmp.Core/Rdmp.Core.csproj
Original file line number Diff line number Diff line change
Expand Up @@ -129,6 +129,8 @@
<None Remove="Databases\CatalogueDatabase\up\084_AddLoadDirectorySplit.sql" />
<None Remove="Databases\CatalogueDatabase\up\085_AddTicketingReleaseStatuses.sql" />
<None Remove="Databases\CatalogueDatabase\up\086_AddDataLoadPrefixOverride.sql" />
<None Remove="Databases\CatalogueDatabase\up\087_AddDataLoadInsertOrder.sql" />
<None Remove="Databases\CatalogueDatabase\up\087_AddAggregateFilterOrdering.sql" />
<None Remove="Databases\DataExportDatabase\runAfterCreateDatabase\CreateDataExportManager.sql" />
<None Remove="Databases\DataExportDatabase\up\001_AddUsernamePasswordFieldsToExternalCohortTable.sql" />
<None Remove="Databases\DataExportDatabase\up\002_FixServerAndDatabaseNameOnExternalCohort.sql" />
Expand Down Expand Up @@ -157,6 +159,7 @@
<None Remove="Databases\DataExportDatabase\up\024_AddExtractionProgressTable.sql" />
<None Remove="Databases\DataExportDatabase\up\025_AddExtractionProgressRetry.sql" />
<None Remove="Databases\DataExportDatabase\up\025_AddFolders.sql" />
<None Remove="Databases\DataExportDatabase\up\026_AddFilterOrder.sql" />
<None Remove="Databases\DataQualityEngineDatabase\runAfterCreateDatabase\CreateTables.sql" />
<None Remove="Databases\DataQualityEngineDatabase\up\001_AnnotationsAndDiagram.sql" />
<None Remove="Databases\DataQualityEngineDatabase\up\002_AddPivotCategoryLogic.sql" />
Expand Down Expand Up @@ -255,6 +258,8 @@
<EmbeddedResource Include="Databases\CatalogueDatabase\up\084_AddLoadDirectorySplit.sql" />
<EmbeddedResource Include="Databases\CatalogueDatabase\up\085_AddTicketingReleaseStatuses.sql" />
<EmbeddedResource Include="Databases\CatalogueDatabase\up\086_AddDataLoadPrefixOverride.sql" />
<EmbeddedResource Include="Databases\CatalogueDatabase\up\087_AddDataLoadInsertOrder.sql"/>
<EmbeddedResource Include="Databases\CatalogueDatabase\up\087_AddAggregateFilterOrdering.sql" />
<EmbeddedResource Include="Databases\CatalogueDatabase\up\079_AddProcessTaskConfiguration.sql" />
<EmbeddedResource Include="Databases\CatalogueDatabase\up\082_AddCohortVersioning.sql" />
<EmbeddedResource Include="Databases\CatalogueDatabase\up\083_AddGroupBy.sql" />
Expand Down Expand Up @@ -297,6 +302,7 @@
<EmbeddedResource Include="Databases\DataExportDatabase\up\020_AddReleaseLogIDColumn.sql" />
<EmbeddedResource Include="Databases\DataExportDatabase\up\022_RowVer.sql" />
<EmbeddedResource Include="Databases\DataExportDatabase\up\025_AddFolders.sql" />
<EmbeddedResource Include="Databases\DataExportDatabase\up\026_AddFilterOrder.sql" />
<EmbeddedResource Include="Databases\DataQualityEngineDatabase\runAfterCreateDatabase\CreateTables.sql" />
<EmbeddedResource Include="Databases\DataQualityEngineDatabase\up\001_AnnotationsAndDiagram.sql" />
<EmbeddedResource Include="Databases\DataQualityEngineDatabase\up\002_AddPivotCategoryLogic.sql" />
Expand Down
Loading