Skip to content

Commit

Permalink
Merge branch 'master' into add-graph-column
Browse files Browse the repository at this point in the history
  • Loading branch information
joka921 committed Aug 16, 2024
2 parents 14d9414 + 4a18d3b commit 55d53a3
Show file tree
Hide file tree
Showing 83 changed files with 1,277 additions and 498 deletions.
4 changes: 2 additions & 2 deletions .github/workflows/check_index_version.yml
Original file line number Diff line number Diff line change
Expand Up @@ -27,11 +27,11 @@ jobs:


steps:
- uses: actions/checkout@v3
- uses: actions/checkout@v4
with:
submodules: 'recursive'
path: 'pr'
- uses: actions/checkout@v3
- uses: actions/checkout@v4
with:
submodules: 'recursive'
path: 'master'
Expand Down
4 changes: 2 additions & 2 deletions .github/workflows/code-coverage.yml
Original file line number Diff line number Diff line change
Expand Up @@ -37,7 +37,7 @@ jobs:

runs-on: ubuntu-22.04
steps:
- uses: actions/checkout@v3
- uses: actions/checkout@v4
with:
submodules: "recursive"

Expand Down Expand Up @@ -83,7 +83,7 @@ jobs:
# case we are on the master branch and have access to the Codecov token.
- name: "Submit coverage data to codecov.io"
if: github.event_name != 'pull_request'
uses: codecov/codecov-action@v3
uses: codecov/codecov-action@v4
with:
file: ${{github.workspace}}/build/test/coverage.lcov
# Note: technically, a `token` is not required for codecov.io when
Expand Down
6 changes: 3 additions & 3 deletions .github/workflows/docker-publish.yml
Original file line number Diff line number Diff line change
Expand Up @@ -37,7 +37,7 @@ jobs:
id: pr
run: echo "pr_num=$(git log --format=%s -n 1 | sed -nr 's/.*\(\#([0-9]+)\)/\1/p')" >> $GITHUB_OUTPUT
- name: Build X-86-64
uses: docker/build-push-action@v5
uses: docker/build-push-action@v6
with:
context: .
# Docker multiplatform images require an entry in their manifests. If this
Expand Down Expand Up @@ -69,7 +69,7 @@ jobs:
# be built on pull requests which allows for debugging without changing
# the master branch.
if: false
uses: docker/build-push-action@v5
uses: docker/build-push-action@v6
with:
context: .
platforms: linux/arm64
Expand All @@ -88,7 +88,7 @@ jobs:

- name: Build and push
if: github.event_name != 'pull_request'
uses: docker/build-push-action@v5
uses: docker/build-push-action@v6
with:
context: .
platforms: linux/amd64,linux/arm64
Expand Down
2 changes: 1 addition & 1 deletion .github/workflows/format-check.yml
Original file line number Diff line number Diff line change
Expand Up @@ -16,7 +16,7 @@ jobs:
runs-on: ubuntu-22.04

steps:
- uses: actions/checkout@v3
- uses: actions/checkout@v4
- name: Install dependencies
run: |
# The following line currently seems to be necessary to work around a bug in the installation.
Expand Down
15 changes: 15 additions & 0 deletions .github/workflows/install-dependencies-ubuntu/action.yml
Original file line number Diff line number Diff line change
Expand Up @@ -10,6 +10,21 @@ inputs:
runs:
using: "composite"
steps:
- name: Free Disk Space (Ubuntu)
uses: jlumbroso/free-disk-space@main
with:
# this might remove tools that are actually needed,
# if set to "true" but frees about 6 GB
tool-cache: false

# all of these default to true, but feel free to set to
# "false" if necessary for your workflow
android: true
dotnet: true
haskell: true
large-packages: true
docker-images: true
swap-storage: true
- name: Install basic compiler
run: |
sudo apt-get update
Expand Down
2 changes: 1 addition & 1 deletion .github/workflows/macos.yml
Original file line number Diff line number Diff line change
Expand Up @@ -23,7 +23,7 @@ jobs:
build-type: [Release]
runs-on: macos-12
steps:
- uses: actions/checkout@v3
- uses: actions/checkout@v4
with:
submodules: 'recursive'

Expand Down
2 changes: 1 addition & 1 deletion .github/workflows/native-build-conan.yml
Original file line number Diff line number Diff line change
Expand Up @@ -26,7 +26,7 @@ jobs:


steps:
- uses: actions/checkout@v3
- uses: actions/checkout@v4
with:
submodules: 'recursive'

Expand Down
2 changes: 1 addition & 1 deletion .github/workflows/native-build.yml
Original file line number Diff line number Diff line change
Expand Up @@ -61,7 +61,7 @@ jobs:


steps:
- uses: actions/checkout@v3
- uses: actions/checkout@v4
with:
submodules: 'recursive'
- name: Install dependencies
Expand Down
2 changes: 1 addition & 1 deletion .github/workflows/sonarcloud.yml
Original file line number Diff line number Diff line change
Expand Up @@ -12,7 +12,7 @@ jobs:
fail-fast: false
runs-on: ubuntu-22.04
steps:
- uses: actions/checkout@v3
- uses: actions/checkout@v4
with:
submodules: 'recursive'

Expand Down
6 changes: 3 additions & 3 deletions .github/workflows/upload-coverage.yml
Original file line number Diff line number Diff line change
Expand Up @@ -28,7 +28,7 @@ jobs:
github.event.workflow_run.conclusion == 'success'
steps:
- name: 'Download artifact'
uses: actions/github-script@v6
uses: actions/github-script@v7
# The following script is taken from the link stated at the
# beginning of this file. It manually downloads an artifact
# from another workflow.
Expand Down Expand Up @@ -64,7 +64,7 @@ jobs:
# be overwritten. We then move all the files back into the working
# directory such that Codecov will pick them up properly.
- name: "Checkout"
uses: actions/checkout@v3
uses: actions/checkout@v4
with:
repository: ${{env.original_github_repository}}
submodules: "recursive"
Expand All @@ -73,7 +73,7 @@ jobs:
- name: "Move qlever sources up"
run: mv qlever-source/* .
- name: "Upload coverage report"
uses: codecov/codecov-action@v3
uses: codecov/codecov-action@v4
with:
file: coverage.lcov
# Note: technically, a `token` is not required for codecov.io when
Expand Down
4 changes: 2 additions & 2 deletions .github/workflows/upload-sonarcloud.yml
Original file line number Diff line number Diff line change
Expand Up @@ -40,7 +40,7 @@ jobs:
- name: Print concurrency key
run: echo "${{ github.workflow }} @ ${{ github.event.workflow_run.head_repository.full_name || github.ref}} @ ${{ github.event.workflow_run.head_branch || github.event.workflow_run.pull_requests[0].url || github.head_ref || github.ref }}"
- name: 'Download artifact'
uses: actions/github-script@v6
uses: actions/github-script@v7
if: github.event.workflow_run.event == 'pull_request'
# The following script is taken from the link stated at the
# beginning of this file. It manually downloads an artifact
Expand Down Expand Up @@ -91,7 +91,7 @@ jobs:
env:
GITHUB_TOKEN: ${{ secrets.GITHUB_TOKEN }}
- name: "Checkout"
uses: actions/checkout@v3
uses: actions/checkout@v4
with:
repository: ${{ github.event.workflow_run.head_repository.full_name }}
ref: ${{ github.event.workflow_run.head_branch }}
Expand Down
2 changes: 1 addition & 1 deletion src/engine/Bind.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -119,7 +119,7 @@ void Bind::computeExpressionBind(
sparqlExpression::EvaluationContext evaluationContext(
*getExecutionContext(), _subtree->getVariableColumns(),
inputResultTable.idTable(), getExecutionContext()->getAllocator(),
inputResultTable.localVocab(), cancellationHandle_);
inputResultTable.localVocab(), cancellationHandle_, deadline_);

sparqlExpression::ExpressionResult expressionResult =
expression->evaluate(&evaluationContext);
Expand Down
15 changes: 14 additions & 1 deletion src/engine/CartesianProductJoin.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -154,10 +154,19 @@ ProtoResult CartesianProductJoin::computeResult(
child.setLimit(limitIfPresent.value());
}
subResults.push_back(child.getResult());

const auto& table = subResults.back()->idTable();
// Early stopping: If one of the results is empty, we can stop early.
if (subResults.back()->idTable().size() == 0) {
if (table.empty()) {
break;
}

// If one of the children is the neutral element (because of a triple with
// zero variables), we can simply ignore it here.
if (table.numRows() == 1 && table.numColumns() == 0) {
subResults.pop_back();
continue;
}
// Example for the following calculation: If we have a LIMIT of 1000 and
// the first child already has a result of size 100, then the second child
// needs to evaluate only its first 10 results. The +1 is because integer
Expand All @@ -169,6 +178,10 @@ ProtoResult CartesianProductJoin::computeResult(
}
}

// TODO<joka921> Find a solution to cheaply handle the case, that only a
// single result is left. This can probably be done by using the
// `ProtoResult`.

auto sizesView = std::views::transform(
subResults, [](const auto& child) { return child->idTable().size(); });
auto totalResultSize = std::accumulate(sizesView.begin(), sizesView.end(),
Expand Down
6 changes: 1 addition & 5 deletions src/engine/CartesianProductJoin.h
Original file line number Diff line number Diff line change
Expand Up @@ -2,9 +2,7 @@
// Chair of Algorithms and Data Structures.
// Author: Johannes Kalmbach <[email protected]>

#ifndef QLEVER_CARTESIANPRODUCTJOIN_H
#define QLEVER_CARTESIANPRODUCTJOIN_H

#pragma once
#include "engine/Operation.h"
#include "engine/QueryExecutionTree.h"

Expand Down Expand Up @@ -92,5 +90,3 @@ class CartesianProductJoin : public Operation {
std::span<const Id> inputColumn, size_t groupSize,
size_t offset);
};

#endif // QLEVER_CARTESIANPRODUCTJOIN_H
30 changes: 30 additions & 0 deletions src/engine/Engine.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -5,6 +5,7 @@
#include "engine/Engine.h"

#include "engine/CallFixedSize.h"
#include "util/ChunkedForLoop.h"

// The actual implementation of sorting an `IdTable` according to the
// `sortCols`.
Expand Down Expand Up @@ -49,3 +50,32 @@ void Engine::sort(IdTable& idTable, const std::vector<ColumnIndex>& sortCols) {
});
}
}

// ___________________________________________________________________________
size_t Engine::countDistinct(const IdTable& input,
const std::function<void()>& checkCancellation) {
AD_EXPENSIVE_CHECK(
std::ranges::is_sorted(input, std::ranges::lexicographical_compare),
"Input to Engine::countDistinct must be sorted");
if (input.empty()) {
return 0;
}
// Store whether the `i`-th entry in the `input` is equal to the `i+1`-th
// entry in the columns that have already been checked.
std::vector<char, ad_utility::AllocatorWithLimit<char>> counter(
input.numRows() - 1, static_cast<char>(true), input.getAllocator());

// For each column, set the entries in `counter` to 0 where there's a
// mismatch.
for (const auto& col : input.getColumns()) {
ad_utility::chunkedForLoop<100'000>(
0ULL, input.numRows() - 1,
[&counter, &col](size_t i) {
counter[i] &= static_cast<char>(col[i] == col[i + 1]);
},
[&checkCancellation]() { checkCancellation(); });
}

auto numDuplicates = std::accumulate(counter.begin(), counter.end(), 0ULL);
return input.numRows() - numDuplicates;
}
7 changes: 7 additions & 0 deletions src/engine/Engine.h
Original file line number Diff line number Diff line change
Expand Up @@ -170,4 +170,11 @@ class Engine {
*dynResult = std::move(result).toDynamic();
LOG(DEBUG) << "Distinct done.\n";
}

// Return the number of distinct rows in the `input`. The input must have all
// duplicates adjacent to each other (e.g. by being sorted), otherwise the
// behavior is undefined. `checkCancellation()` is invoked regularly and can
// be used to implement a cancellation mechanism that throws on cancellation.
static size_t countDistinct(const IdTable& input,
const std::function<void()>& checkCancellation);
};
3 changes: 2 additions & 1 deletion src/engine/ExportQueryExecutionTrees.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -32,7 +32,8 @@ ExportQueryExecutionTrees::constructQueryResultToTriples(
LimitOffsetClause limitAndOffset, std::shared_ptr<const Result> res,
CancellationHandle cancellationHandle) {
for (size_t i : getRowIndices(limitAndOffset, *res)) {
ConstructQueryExportContext context{i, *res, qet.getVariableColumns(),
ConstructQueryExportContext context{i, res->idTable(), res->localVocab(),
qet.getVariableColumns(),
qet.getQec()->getIndex()};
using enum PositionInTriple;
for (const auto& triple : constructTriples) {
Expand Down
2 changes: 1 addition & 1 deletion src/engine/Filter.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -67,7 +67,7 @@ void Filter::computeFilterImpl(IdTable* outputIdTable,
sparqlExpression::EvaluationContext evaluationContext(
*getExecutionContext(), _subtree->getVariableColumns(),
inputResultTable.idTable(), getExecutionContext()->getAllocator(),
inputResultTable.localVocab(), cancellationHandle_);
inputResultTable.localVocab(), cancellationHandle_, deadline_);

// TODO<joka921> This should be a mandatory argument to the EvaluationContext
// constructor.
Expand Down
12 changes: 8 additions & 4 deletions src/engine/GroupBy.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -77,7 +77,9 @@ string GroupBy::getCacheKeyImpl() const {
}

string GroupBy::getDescriptor() const {
// TODO<C++23>:: Use std::views::join_with.
if (_groupByVariables.empty()) {
return "GroupBy (implicit)";
}
return "GroupBy on " +
absl::StrJoin(_groupByVariables, " ", &Variable::AbslFormatter);
}
Expand Down Expand Up @@ -247,7 +249,7 @@ void GroupBy::doGroupBy(const IdTable& dynInput,
sparqlExpression::EvaluationContext evaluationContext(
*getExecutionContext(), _subtree->getVariableColumns(), *inTable,
getExecutionContext()->getAllocator(), *outLocalVocab,
cancellationHandle_);
cancellationHandle_, deadline_);

// In a GROUP BY evaluation, the expressions need to know which variables are
// grouped, and to which columns the results of the aliases are written. The
Expand Down Expand Up @@ -1224,7 +1226,8 @@ void GroupBy::createResultFromHashMap(
// Initialize evaluation context
sparqlExpression::EvaluationContext evaluationContext(
*getExecutionContext(), _subtree->getVariableColumns(), *result,
getExecutionContext()->getAllocator(), *localVocab, cancellationHandle_);
getExecutionContext()->getAllocator(), *localVocab, cancellationHandle_,
deadline_);

evaluationContext._groupedVariables = ad_utility::HashSet<Variable>{
_groupByVariables.begin(), _groupByVariables.end()};
Expand Down Expand Up @@ -1295,7 +1298,8 @@ void GroupBy::computeGroupByForHashMapOptimization(
// Initialize evaluation context
sparqlExpression::EvaluationContext evaluationContext(
*getExecutionContext(), _subtree->getVariableColumns(), subresult,
getExecutionContext()->getAllocator(), *localVocab, cancellationHandle_);
getExecutionContext()->getAllocator(), *localVocab, cancellationHandle_,
deadline_);

evaluationContext._groupedVariables = ad_utility::HashSet<Variable>{
_groupByVariables.begin(), _groupByVariables.end()};
Expand Down
Loading

0 comments on commit 55d53a3

Please sign in to comment.