Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Add fourth column for graph name #1337

Merged
merged 19 commits into from
Aug 21, 2024
Merged
Show file tree
Hide file tree
Changes from 17 commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
2 changes: 1 addition & 1 deletion src/engine/HasPredicateScan.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -115,7 +115,7 @@
case ScanType::FULL_SCAN:
return "HasPredicateScan full scan";
case ScanType::SUBQUERY_S:
return "HasPredicateScan with a subquery on " + subject_.toRdfLiteral();
return "HasPredicateScan with subquery";

Check warning on line 118 in src/engine/HasPredicateScan.cpp

View check run for this annotation

Codecov / codecov/patch

src/engine/HasPredicateScan.cpp#L118

Added line #L118 was not covered by tests
default:
return "HasPredicateScan";
}
Expand Down
8 changes: 7 additions & 1 deletion src/engine/IndexScan.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -95,7 +95,13 @@
// _____________________________________________________________________________
vector<ColumnIndex> IndexScan::resultSortedOn() const {
auto resAsView = ad_utility::integerRange(ColumnIndex{numVariables_});
return std::vector<ColumnIndex>{resAsView.begin(), resAsView.end()};
std::vector<ColumnIndex> result{resAsView.begin(), resAsView.end()};
for (size_t i = 0; i < additionalColumns_.size(); ++i) {
if (additionalColumns_.at(i) == ADDITIONAL_COLUMN_GRAPH_ID) {
result.push_back(numVariables_ + i);

Check warning on line 101 in src/engine/IndexScan.cpp

View check run for this annotation

Codecov / codecov/patch

src/engine/IndexScan.cpp#L101

Added line #L101 was not covered by tests
}
}
return result;
}

// _____________________________________________________________________________
Expand Down
6 changes: 5 additions & 1 deletion src/engine/idTable/IdTable.h
Original file line number Diff line number Diff line change
Expand Up @@ -20,6 +20,7 @@
#include "util/Iterators.h"
#include "util/LambdaHelpers.h"
#include "util/ResetWhenMoved.h"
#include "util/SourceLocation.h"
#include "util/UninitializedAllocator.h"
#include "util/Views.h"

Expand Down Expand Up @@ -299,7 +300,10 @@ class IdTable {
// TODO<joka921, C++23> Use the multidimensional subscript operator.
// TODO<joka921, C++23> Use explicit object parameters ("deducing this").
T& operator()(size_t row, size_t column) requires(!isView) {
AD_EXPENSIVE_CHECK(column < data().size());
AD_EXPENSIVE_CHECK(column < data().size(), [&]() {
return absl::StrCat(row, " , ", column, ", ", data().size(), " ",
numColumns(), ", ", numStaticColumns);
});
AD_EXPENSIVE_CHECK(row < data().at(column).size());
return data()[column][row];
}
Expand Down
1 change: 1 addition & 0 deletions src/engine/sparqlExpressions/RelationalExpressions.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -308,6 +308,7 @@ RelationalExpression<Comp>::getLanguageFilterExpression() const {
return getLangFilterData(child2, child1);
}
}

namespace {
// _____________________________________________________________________________
SparqlExpression::Estimates getEstimatesForFilterExpressionImpl(
Expand Down
12 changes: 10 additions & 2 deletions src/global/Constants.h
Original file line number Diff line number Diff line change
Expand Up @@ -56,6 +56,9 @@ static const std::string INTERNAL_TEXT_MATCH_PREDICATE =
static const std::string HAS_PREDICATE_PREDICATE =
makeInternalIri("has-predicate");
static const std::string HAS_PATTERN_PREDICATE = makeInternalIri("has-pattern");
static const std::string DEFAULT_GRAPH_IRI = makeInternalIri("default-graph");
static const std::string INTERNAL_GRAPH_IRI = makeInternalIri("internal-graph");

static constexpr std::pair<std::string_view, std::string_view> GEOF_PREFIX = {
"geof:", "http://www.opengis.net/def/function/geosparql/"};
static constexpr std::pair<std::string_view, std::string_view> MATH_PREFIX = {
Expand Down Expand Up @@ -181,11 +184,16 @@ static constexpr int DEFAULT_MAX_NUM_COLUMNS_STATIC_ID_TABLE = 5;
// `CancellationHandle::throwIfCancelled` is called regularly.
constexpr std::chrono::milliseconds DESIRED_CANCELLATION_CHECK_INTERVAL{50};

// In all permutations, the graph ID of the triple is stored as the fourth
// entry. During the index building it is important that this is the first
// column after the "actual" triple.
constexpr size_t ADDITIONAL_COLUMN_GRAPH_ID = 3;

// In the PSO and PSO permutations the patterns of the subject and object are
// stored at the following indices. Note that the col0 (the P) is not part of
// the result, so the column order for PSO is S O PatternS PatternO.
constexpr size_t ADDITIONAL_COLUMN_INDEX_SUBJECT_PATTERN = 3;
constexpr size_t ADDITIONAL_COLUMN_INDEX_OBJECT_PATTERN = 4;
constexpr size_t ADDITIONAL_COLUMN_INDEX_SUBJECT_PATTERN = 4;
constexpr size_t ADDITIONAL_COLUMN_INDEX_OBJECT_PATTERN = 5;

#ifdef _PARALLEL_SORT
static constexpr bool USE_PARALLEL_SORT = true;
Expand Down
4 changes: 3 additions & 1 deletion src/global/SpecialIds.h
Original file line number Diff line number Diff line change
Expand Up @@ -18,7 +18,9 @@ namespace qlever {
static const inline ad_utility::HashMap<std::string, Id> specialIds = []() {
ad_utility::HashMap<std::string, Id> result{
{HAS_PREDICATE_PREDICATE, Id::fromBits(1)},
{HAS_PATTERN_PREDICATE, Id::fromBits(2)}};
{HAS_PATTERN_PREDICATE, Id::fromBits(2)},
{DEFAULT_GRAPH_IRI, Id::fromBits(3)},
{INTERNAL_GRAPH_IRI, Id::fromBits(4)}};

// Perform the following checks: All the special IDs are unique, all of them
// have the `Undefined` datatype, but none of them is equal to the "actual"
Expand Down
3 changes: 2 additions & 1 deletion src/index/CompressedRelation.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -992,7 +992,8 @@ auto CompressedRelationWriter::createPermutationPair(
IdTableStatic<0> relation{numColumns, alloc};
size_t numBlocksCurrentRel = 0;
auto compare = [](const auto& a, const auto& b) {
return std::tie(a[c1Idx], a[c2Idx]) < std::tie(b[c1Idx], b[c2Idx]);
return std::tie(a[c1Idx], a[c2Idx], a[ADDITIONAL_COLUMN_GRAPH_ID]) <
std::tie(b[c1Idx], b[c2Idx], b[ADDITIONAL_COLUMN_GRAPH_ID]);
};
// TODO<joka921> Use `CALL_FIXED_SIZE`.
ad_utility::CompressedExternalIdTableSorter<decltype(compare), 0>
Expand Down
2 changes: 2 additions & 0 deletions src/index/ConstantsIndexBuilding.h
Original file line number Diff line number Diff line change
Expand Up @@ -98,3 +98,5 @@ inline size_t BUFFER_SIZE_PARTIAL_TO_GLOBAL_ID_MAPPINGS = 10'000;
// infeasible. 250K seems to be a reasonable tradeoff here.
constexpr ad_utility::MemorySize
UNCOMPRESSED_BLOCKSIZE_COMPRESSED_METADATA_PER_COLUMN = 250_kB;

static constexpr size_t NumColumnsIndexBuilding = 4;
43 changes: 22 additions & 21 deletions src/index/IndexBuilderTypes.h
Original file line number Diff line number Diff line change
Expand Up @@ -55,16 +55,9 @@ struct TripleComponentWithIndex {
};

using TripleComponentOrId = std::variant<PossiblyExternalizedIriOrLiteral, Id>;
// A triple that also knows for each entry, whether this entry should be
// part of the external vocabulary.
using Triple = std::array<TripleComponentOrId, 3>;

// Convert a triple of `std::string` to a triple of `TripleComponents`. All
// three entries will have `isExternal()==false` and an uninitialized ID.
inline Triple makeTriple(std::array<std::string, 3>&& t) {
using T = PossiblyExternalizedIriOrLiteral;
return {T{t[0]}, T{t[1]}, T{t[2]}};
}
// A triple + GraphId that also knows for each entry, whether this entry should
// be part of the external vocabulary.
using Triple = std::array<TripleComponentOrId, NumColumnsIndexBuilding>;

/// The index of a word and the corresponding `SplitVal`.
struct LocalVocabIndexAndSplitVal {
Expand Down Expand Up @@ -177,8 +170,9 @@ struct alignas(256) ItemMapManager {
}

/// call getId for each of the Triple elements.
std::array<Id, 3> getId(const Triple& t) {
return {getId(t[0]), getId(t[1]), getId(t[2])};
std::array<Id, NumColumnsIndexBuilding> getId(const Triple& t) {
return std::apply(
[this](const auto&... els) { return std::array{getId(els)...}; }, t);
}
ItemMapAndBuffer map_;
uint64_t minId_ = 0;
Expand Down Expand Up @@ -242,7 +236,8 @@ auto getIdMapLambdas(
itemArray[j]->getId(TripleComponent{
ad_utility::triple_component::Iri::fromIriref(LANGUAGE_PREDICATE)});
}
using OptionalIds = std::array<std::optional<std::array<Id, 3>>, 3>;
using OptionalIds =
std::array<std::optional<std::array<Id, NumColumnsIndexBuilding>>, 3>;

/* given an index idx, returns a lambda that
* - Takes a triple and a language tag
Expand All @@ -253,7 +248,9 @@ auto getIdMapLambdas(
* - All Ids are assigned according to itemArray[idx]
*/
const auto itemMapLamdaCreator = [&itemArray, indexPtr](const size_t idx) {
return [&map = *itemArray[idx], indexPtr](ad_utility::Rvalue auto&& tr) {
auto internalGraphId = qlever::specialIds.at(INTERNAL_GRAPH_IRI);
return [&map = *itemArray[idx], indexPtr,
internalGraphId](ad_utility::Rvalue auto&& tr) {
auto lt = indexPtr->tripleToInternalRepresentation(AD_FWD(tr));
OptionalIds res;
// get Ids for the actual triple and store them in the result.
Expand All @@ -272,16 +269,20 @@ auto getIdMapLambdas(
auto& spoIds = *res[0]; // ids of original triple
// TODO replace the std::array by an explicit IdTriple class,
// then the emplace calls don't need the explicit type.
using Arr = std::array<Id, NumColumnsIndexBuilding>;
static_assert(NumColumnsIndexBuilding == 4,
" The following lines probably have to be changed when "
"the number of payload columns changes");
// extra triple <subject> @language@<predicate> <object>
// The additional triples all have the graph ID of the internal graph.
res[1].emplace(
std::array<Id, 3>{spoIds[0], langTaggedPredId, spoIds[2]});
Arr{spoIds[0], langTaggedPredId, spoIds[2], internalGraphId});
// extra triple <object> ql:language-tag <@language>
res[2].emplace(std::array<Id, 3>{
spoIds[2],
map.getId(
TripleComponent{ad_utility::triple_component::Iri::fromIriref(
LANGUAGE_PREDICATE)}),
langTagId});
res[2].emplace(Arr{spoIds[2],
map.getId(TripleComponent{
ad_utility::triple_component::Iri::fromIriref(
LANGUAGE_PREDICATE)}),
langTagId, internalGraphId});
}
return res;
};
Expand Down
2 changes: 1 addition & 1 deletion src/index/IndexFormatVersion.h
Original file line number Diff line number Diff line change
Expand Up @@ -36,5 +36,5 @@ struct IndexFormatVersion {
// The actual index version. Change it once the binary format of the index
// changes.
inline const IndexFormatVersion& indexFormatVersion{
1405, DateYearOrDuration{Date{2024, 7, 22}}};
1337, DateYearOrDuration{Date{2024, 8, 20}}};
} // namespace qlever
Loading
Loading