Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

experimental using openMP for encode #257

Closed
wants to merge 1 commit into from
Closed
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
1 change: 1 addition & 0 deletions CMakeLists.txt
Original file line number Diff line number Diff line change
Expand Up @@ -121,6 +121,7 @@ include(CheckCXXCompilerFlag)
set(COMMON_CXX_FLAGS
-pipe
-Wall
-fopenmp
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

To do it in a portable way, we should use use find_package and use the populated variable.

An example is shown here :

cmake_minimum_required(VERSION 3.9)
project(solver LANGUAGES CXX)

find_package(OpenMP REQUIRED)
add_executable(solver solver.cc)
target_link_libraries(solver PRIVATE OpenMP::OpenMP_CXX)

Note that we may need to bump our minimum version for CMake, because OpenMP support got reworked in 3.9 (which is only one year old: I don't know if it's too recent to be easily used from most of Linux distribution).

Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

An alternative way:

find_package(OpenMP)
if (OPENMP_FOUND)
    set (CMAKE_C_FLAGS "${CMAKE_C_FLAGS} ${OpenMP_C_FLAGS}")
    set (CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} ${OpenMP_CXX_FLAGS}")
    set (CMAKE_EXE_LINKER_FLAGS "${CMAKE_EXE_LINKER_FLAGS} ${OpenMP_EXE_LINKER_FLAGS}")
endif()

If OpenMP is not found, its acceleration #pragma will be disabled.

Copy link
Contributor

@slaperche-scality slaperche-scality Oct 19, 2018

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

This doesn't always work (hence the revamp in 3.9), see https://cmake.org/Bug/view.php?id=15393

)

# Option for enabling/disabling SIMD flags is for both of debug and release
Expand Down
11 changes: 11 additions & 0 deletions src/fft_2n.h
Original file line number Diff line number Diff line change
Expand Up @@ -31,6 +31,8 @@
#ifndef __QUAD_FFT_2N_H__
#define __QUAD_FFT_2N_H__

#include <omp.h>

#include "arith.h"
#include "fft_2.h"
#include "fft_base.h"
Expand Down Expand Up @@ -197,6 +199,7 @@ void Radix2<T>::fft(vec::Vector<T>& output, vec::Vector<T>& input)
const unsigned group_len =
(input_len > data_len) ? len / input_len : len / data_len;

#pragma omp parallel for
for (unsigned idx = 0; idx < input_len; ++idx) {
// set output = scramble(input), i.e. bit reversal ordering
const T a = input.get(idx);
Expand All @@ -205,6 +208,7 @@ void Radix2<T>::fft(vec::Vector<T>& output, vec::Vector<T>& input)
output.set(i, a);
}
}
#pragma omp parallel for
for (unsigned idx = input_len; idx < data_len; ++idx) {
// set output = scramble(input), i.e. bit reversal ordering
const unsigned end = rev[idx] + group_len;
Expand All @@ -216,6 +220,7 @@ void Radix2<T>::fft(vec::Vector<T>& output, vec::Vector<T>& input)
for (unsigned m = group_len; m < len; m *= 2) {
const unsigned doubled_m = 2 * m;
const unsigned ratio = len / doubled_m;
#pragma omp parallel for
for (unsigned j = 0; j < m; ++j) {
const T r = W->get(j * ratio);
for (unsigned i = j; i < len; i += doubled_m) {
Expand Down Expand Up @@ -249,6 +254,7 @@ void Radix2<T>::fft_inv(vec::Vector<T>& output, vec::Vector<T>& input)

for (unsigned m = len / 2; m >= 1; m /= 2) {
unsigned doubled_m = 2 * m;
#pragma omp parallel for
for (unsigned j = 0; j < m; ++j) {
T r = inv_W->get(j * len / doubled_m);
for (unsigned i = j; i < len; i += doubled_m) {
Expand Down Expand Up @@ -290,6 +296,7 @@ void Radix2<T>::fft(vec::Buffers<T>& output, vec::Buffers<T>& input)
const unsigned group_len =
(input_len > data_len) ? len / input_len : len / data_len;

#pragma omp parallel for
for (unsigned idx = 0; idx < input_len; ++idx) {
// set output = scramble(input), i.e. bit reversal ordering
T* a = input.get(idx);
Expand All @@ -298,6 +305,7 @@ void Radix2<T>::fft(vec::Buffers<T>& output, vec::Buffers<T>& input)
output.copy(i, a);
}
}
#pragma omp parallel for
for (unsigned idx = input_len; idx < data_len; ++idx) {
// set output = scramble(input), i.e. bit reversal ordering
const unsigned end = rev[idx] + group_len;
Expand All @@ -308,6 +316,7 @@ void Radix2<T>::fft(vec::Buffers<T>& output, vec::Buffers<T>& input)
// perform butterfly operations
for (unsigned m = group_len; m < len; m *= 2) {
const unsigned doubled_m = 2 * m;
#pragma omp parallel for
for (unsigned j = 0; j < m; ++j) {
const T r = W->get(j * len / doubled_m);
for (unsigned i = j; i < len; i += doubled_m) {
Expand Down Expand Up @@ -344,6 +353,7 @@ void Radix2<T>::fft_inv(vec::Buffers<T>& output, vec::Buffers<T>& input)
bit_rev_permute(output);

unsigned i;
#pragma omp parallel for
for (i = 0; i < input_len; ++i) {
output.copy(i, input.get(i));
}
Expand All @@ -353,6 +363,7 @@ void Radix2<T>::fft_inv(vec::Buffers<T>& output, vec::Buffers<T>& input)

for (unsigned m = len / 2; m >= 1; m /= 2) {
unsigned doubled_m = 2 * m;
#pragma omp parallel for
for (unsigned j = 0; j < m; ++j) {
T r = inv_W->get(j * len / doubled_m);
for (unsigned i = j; i < len; i += doubled_m) {
Expand Down