merge master

ad-freiburg · Oct 17, 2024 · 64b4a84 · 64b4a84
2 parents f1bd87e + 4acbca3
commit 64b4a84
Show file tree

Hide file tree

Showing 20 changed files with 957 additions and 194 deletions.
diff --git a/.github/workflows/sparql-conformance-uploader.yml b/.github/workflows/sparql-conformance-uploader.yml
@@ -0,0 +1,65 @@
+name: Upload conformance tests result
+
+on:
+  workflow_run:
+    workflows: [sparql-conformance]
+    types:
+      - completed
+
+jobs:
+  upload:
+    env:
+          SERVER_URL: https://qlever.cs.uni-freiburg.de/sparql-conformance-uploader
+          API_KEY: ${{ secrets.SPARQL_CONFORMANCE_TOKEN }}
+    runs-on: ubuntu-latest
+    if: github.event.workflow_run.conclusion == 'success'
+    steps:
+      - name: 'Download artifact'
+        uses: actions/github-script@v6
+        with:
+          script: |
+            var artifacts = await github.rest.actions.listWorkflowRunArtifacts({
+               owner: context.repo.owner,
+               repo: context.repo.repo,
+               run_id: ${{github.event.workflow_run.id }},
+            });
+            var matchArtifact = artifacts.data.artifacts.filter((artifact) => {
+              return artifact.name == "conformance-report"
+            })[0];
+            var download = await github.rest.actions.downloadArtifact({
+               owner: context.repo.owner,
+               repo: context.repo.repo,
+               artifact_id: matchArtifact.id,
+               archive_format: 'zip',
+            });
+            var fs = require('fs');
+            fs.writeFileSync('${{github.workspace}}/conformance-report.zip', Buffer.from(download.data));
+      - run: unzip conformance-report.zip
+      # Read the metadata into environment variables.
+      - name: "Read github event"
+        run: echo "github_event=`cat event`" >> $GITHUB_ENV
+      - name: "Read PR number"
+        run: echo "pr_number=`cat pr`" >> $GITHUB_ENV
+      - name: "Read Github Ref"
+        run: echo "original_github_ref=`cat github_ref`" >> $GITHUB_ENV;
+      - name: "Read Github SHA"
+        run: echo "commit_sha=`cat sha`" >> $GITHUB_ENV;
+      - name: "Read Github Repository"
+        run: echo "original_github_repository=`cat github_repository`" >> $GITHUB_ENV;
+      - name: "Submit data to server"
+        run: |
+            response=$(curl -s -o temp_response.txt -w "%{http_code}" \
+              -H "x-api-key: $API_KEY" \
+              -H "event: ${{ env.github_event }}" \
+              -H "sha: ${{ env.commit_sha }}" \
+              -H "pr-number: ${{ env.pr_number }}" \
+              -F "file=@${{env.commit_sha}}.json.bz2" \
+              $SERVER_URL/upload)
+
+            echo "Server response:"
+            cat temp_response.txt
+            echo "HTTP Status: $response"
+            if [ "$response" -gt 200 ]; then
+              echo "Server did not respond with status 200. Failing the workflow."
+              exit 1
+            fi
diff --git a/.github/workflows/sparql-conformance.yml b/.github/workflows/sparql-conformance.yml
@@ -0,0 +1,86 @@
+name: sparql-conformance
+
+on:
+  push:
+    branches: [ master ]
+  pull_request:
+    branches: [ master ]
+  merge_group:
+
+jobs:
+  build:
+    env:
+      compiler: clang
+      compiler-version: 16
+      build-type: Release
+      cmake-flags: "-DCMAKE_C_COMPILER=clang-16 -DCMAKE_CXX_COMPILER=clang++-16"
+    runs-on: ubuntu-22.04
+    steps:
+      - uses: actions/checkout@v3
+        with:
+          submodules: "recursive"
+          path: qlever-code
+      - name: Checkout sparql-test-suite-files
+        uses: actions/checkout@v3
+        with:
+          repository: "w3c/rdf-tests"
+          path: sparql-test-suite
+      - name: Checkout qlever-test-suite
+        uses: actions/checkout@v3
+        with:
+          repository: "ad-freiburg/sparql-conformance"
+          path: qlever-test-suite
+      - name: Set up Python
+        uses: actions/setup-python@v4
+        with:
+          python-version: "3.10"
+      - name: Install python dependencies
+        run: |
+          python -m pip install --upgrade pip
+          pip install requests
+          pip install rdflib
+      - name: Install dependencies
+        uses: ./qlever-code/.github/workflows/install-dependencies-ubuntu
+      - name: Install compiler
+        uses: ./qlever-code/.github/workflows/install-compiler-ubuntu
+        with:
+          compiler: "clang"
+          compiler-version: "16"
+      - name: Create build directory
+        run: mkdir ${{github.workspace}}/qlever-code/build
+      - name: Configure CMake
+        run: cmake -S ${{github.workspace}}/qlever-code/ -B ${{github.workspace}}/qlever-code/build ${{env.cmake-flags}} -DCMAKE_BUILD_TYPE=${{env.build-type}} -DLOGLEVEL=INFO -DUSE_PARALLEL=false
+      - name: Build IndexBuilderMain
+        run: cmake --build ${{github.workspace}}/qlever-code/build --target IndexBuilderMain --config ${{env.build-type}} -- -j $(nproc)
+      - name: Build ServerMain
+        run: cmake --build ${{github.workspace}}/qlever-code/build --target ServerMain  --config ${{env.build-type}} -- -j $(nproc)
+      - name: Execute test suite
+        run: |
+          cd qlever-test-suite
+          python testsuite.py config http://0.0.0.0 7001 ${{github.workspace}}/sparql-test-suite/sparql/sparql11/ ${{github.workspace}}/qlever-code/build/ localhost sparql sparql
+          python testsuite.py extract
+          python testsuite.py ${{ github.sha }}
+          cd ..
+      - name: Save workflow information
+        # Note: If you change any of the filenames here, you also have to change them in `upload-conformance.yml`
+        run : |
+          mkdir -p conformance-report
+          echo ${{ github.event_name }} > ./conformance-report/event
+          echo ${{ github.event.number }} > ./conformance-report/pr
+          echo ${{ github.repository }} > ./conformance-report/github_repository
+          echo ${GITHUB_REF} > ./conformance-report/github_ref
+      - name: Save SHA and conformance report if it is a master commit.
+        if: github.event_name == 'push'
+        run : |
+          echo ${{github.sha}} > ./conformance-report/sha
+          mv ${{ github.workspace}}/qlever-test-suite/results/${{ github.sha }}.json.bz2 conformance-report/${{ github.sha }}.json.bz2
+      - name: Save SHA and conformance report if it is a PR.
+        if: github.event_name == 'pull_request'
+        run : |
+          echo ${{github.event.pull_request.head.sha}} > ./conformance-report/sha
+          mv ${{ github.workspace}}/qlever-test-suite/results/${{ github.sha }}.json.bz2 conformance-report/${{ github.event.pull_request.head.sha }}.json.bz2
+      - name: Upload coverage artifact
+        uses: actions/upload-artifact@v3
+        with:
+          name: conformance-report
+          path: conformance-report/
diff --git a/src/engine/Bind.cpp b/src/engine/Bind.cpp
@@ -81,104 +81,119 @@ std::vector<QueryExecutionTree*> Bind::getChildren() {
 }
 
 // _____________________________________________________________________________
-ProtoResult Bind::computeResult([[maybe_unused]] bool requestLaziness) {
-  using std::endl;
-  LOG(DEBUG) << "Get input to BIND operation..." << endl;
-  std::shared_ptr<const Result> subRes = _subtree->getResult();
-  LOG(DEBUG) << "Got input to Bind operation." << endl;
-  IdTable idTable{getExecutionContext()->getAllocator()};
-
-  idTable.setNumColumns(getResultWidth());
-
-  // Make a deep copy of the local vocab from `subRes` and then add to it (in
-  // case BIND adds a new word or words).
-  //
-  // TODO: In most BIND operations, nothing is added to the local vocabulary, so
-  // it would be more efficient to first share the pointer here (like with
-  // `shareLocalVocabFrom`) and only copy it when a new word is about to be
-  // added. Same for GROUP BY.
-  auto localVocab = subRes->getCopyOfLocalVocab();
-
-  size_t inwidth = subRes->idTable().numColumns();
-  size_t outwidth = getResultWidth();
-
-  CALL_FIXED_SIZE((std::array{inwidth, outwidth}), &Bind::computeExpressionBind,
-                  this, &idTable, &localVocab, *subRes,
-                  _bind._expression.getPimpl());
-
-  LOG(DEBUG) << "BIND result computation done." << endl;
-  return {std::move(idTable), resultSortedOn(), std::move(localVocab)};
+IdTable Bind::cloneSubView(const IdTable& idTable,
+                           const std::pair<size_t, size_t>& subrange) {
+  IdTable result(idTable.numColumns(), idTable.getAllocator());
+  result.resize(subrange.second - subrange.first);
+  std::ranges::copy(idTable.begin() + subrange.first,
+                    idTable.begin() + subrange.second, result.begin());
+  return result;
 }
 
 // _____________________________________________________________________________
-template <size_t IN_WIDTH, size_t OUT_WIDTH>
-void Bind::computeExpressionBind(
-    IdTable* outputIdTable, LocalVocab* outputLocalVocab,
-    const Result& inputResultTable,
-    sparqlExpression::SparqlExpression* expression) const {
+ProtoResult Bind::computeResult(bool requestLaziness) {
+  LOG(DEBUG) << "Get input to BIND operation..." << std::endl;
+  std::shared_ptr<const Result> subRes = _subtree->getResult(requestLaziness);
+  LOG(DEBUG) << "Got input to Bind operation." << std::endl;
+
+  auto applyBind = [this, subRes](IdTable idTable, LocalVocab* localVocab) {
+    return computeExpressionBind(localVocab, std::move(idTable),
+                                 subRes->localVocab(),
+                                 _bind._expression.getPimpl());
+  };
+
+  if (subRes->isFullyMaterialized()) {
+    if (requestLaziness && subRes->idTable().size() > CHUNK_SIZE) {
+      auto localVocab =
+          std::make_shared<LocalVocab>(subRes->getCopyOfLocalVocab());
+      auto generator = [](std::shared_ptr<LocalVocab> vocab, auto applyBind,
+                          std::shared_ptr<const Result> result)
+          -> cppcoro::generator<IdTable> {
+        size_t size = result->idTable().size();
+        for (size_t offset = 0; offset < size; offset += CHUNK_SIZE) {
+          co_yield applyBind(
+              cloneSubView(result->idTable(),
+                           {offset, std::min(size, offset + CHUNK_SIZE)}),
+              vocab.get());
+        }
+      }(localVocab, std::move(applyBind), std::move(subRes));
+      return {std::move(generator), resultSortedOn(), std::move(localVocab)};
+    }
+    // Make a deep copy of the local vocab from `subRes` and then add to it (in
+    // case BIND adds a new word or words).
+    //
+    // Make a copy of the local vocab from`subRes`and then add to it (in case
+    // BIND adds new words). Note: The copy of the local vocab is shallow
+    // via`shared_ptr`s, so the following is also efficient if the BIND adds no
+    // new words.
+    LocalVocab localVocab = subRes->getCopyOfLocalVocab();
+    IdTable result = applyBind(subRes->idTable().clone(), &localVocab);
+    LOG(DEBUG) << "BIND result computation done." << std::endl;
+    return {std::move(result), resultSortedOn(), std::move(localVocab)};
+  }
+  auto localVocab = std::make_shared<LocalVocab>();
+  auto generator =
+      [](std::shared_ptr<LocalVocab> vocab, auto applyBind,
+         std::shared_ptr<const Result> result) -> cppcoro::generator<IdTable> {
+    for (IdTable& idTable : result->idTables()) {
+      co_yield applyBind(std::move(idTable), vocab.get());
+    }
+    std::array<const LocalVocab*, 2> vocabs{vocab.get(), &result->localVocab()};
+    *vocab = LocalVocab::merge(std::span{vocabs});
+  }(localVocab, std::move(applyBind), std::move(subRes));
+  return {std::move(generator), resultSortedOn(), std::move(localVocab)};
+}
+
+// _____________________________________________________________________________
+IdTable Bind::computeExpressionBind(
+    LocalVocab* outputLocalVocab, IdTable idTable,
+    const LocalVocab& inputLocalVocab,
+    const sparqlExpression::SparqlExpression* expression) const {
   sparqlExpression::EvaluationContext evaluationContext(
-      *getExecutionContext(), _subtree->getVariableColumns(),
-      inputResultTable.idTable(), getExecutionContext()->getAllocator(),
-      inputResultTable.localVocab(), cancellationHandle_, deadline_);
+      *getExecutionContext(), _subtree->getVariableColumns(), idTable,
+      getExecutionContext()->getAllocator(), inputLocalVocab,
+      cancellationHandle_, deadline_);
 
   sparqlExpression::ExpressionResult expressionResult =
       expression->evaluate(&evaluationContext);
 
-  const auto input = inputResultTable.idTable().asStaticView<IN_WIDTH>();
-  auto output = std::move(*outputIdTable).toStatic<OUT_WIDTH>();
-
-  // first initialize the first columns (they remain identical)
-  const auto inSize = input.size();
-  output.reserve(inSize);
-  const auto inCols = input.numColumns();
-  // copy the input to the first numColumns;
-  for (size_t i = 0; i < inSize; ++i) {
-    output.emplace_back();
-    for (size_t j = 0; j < inCols; ++j) {
-      output(i, j) = input(i, j);
-    }
-    checkCancellation();
-  }
+  idTable.addEmptyColumn();
+  auto outputColumn = idTable.getColumn(idTable.numColumns() - 1);
 
   auto visitor = [&]<sparqlExpression::SingleExpressionResult T>(
                      T&& singleResult) mutable {
     constexpr static bool isVariable = std::is_same_v<T, ::Variable>;
     constexpr static bool isStrongId = std::is_same_v<T, Id>;
 
     if constexpr (isVariable) {
-      auto column =
+      auto columnIndex =
           getInternallyVisibleVariableColumns().at(singleResult).columnIndex_;
-      for (size_t i = 0; i < inSize; ++i) {
-        output(i, inCols) = output(i, column);
-        checkCancellation();
-      }
+      auto inputColumn = idTable.getColumn(columnIndex);
+      AD_CORRECTNESS_CHECK(inputColumn.size() == outputColumn.size());
+      std::ranges::copy(inputColumn, outputColumn.begin());
     } else if constexpr (isStrongId) {
-      for (size_t i = 0; i < inSize; ++i) {
-        output(i, inCols) = singleResult;
-        checkCancellation();
-      }
+      std::ranges::fill(outputColumn, singleResult);
     } else {
       constexpr bool isConstant = sparqlExpression::isConstantResult<T>;
 
       auto resultGenerator = sparqlExpression::detail::makeGenerator(
-          std::forward<T>(singleResult), inSize, &evaluationContext);
+          std::forward<T>(singleResult), outputColumn.size(),
+          &evaluationContext);
 
       if constexpr (isConstant) {
         auto it = resultGenerator.begin();
         if (it != resultGenerator.end()) {
           Id constantId =
               sparqlExpression::detail::constantExpressionResultToId(
                   std::move(*it), *outputLocalVocab);
-          for (size_t i = 0; i < inSize; ++i) {
-            output(i, inCols) = constantId;
-            checkCancellation();
-          }
+          checkCancellation();
+          std::ranges::fill(outputColumn, constantId);
         }
       } else {
         size_t i = 0;
         // We deliberately move the values from the generator.
         for (auto& resultValue : resultGenerator) {
-          output(i, inCols) =
+          outputColumn[i] =
               sparqlExpression::detail::constantExpressionResultToId(
                   std::move(resultValue), *outputLocalVocab);
           i++;
@@ -190,5 +205,5 @@ void Bind::computeExpressionBind(
 
   std::visit(visitor, std::move(expressionResult));
 
-  *outputIdTable = std::move(output).toDynamic();
+  return idTable;
 }