Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Remove excessive floating-point divides #4312

Closed
wants to merge 5 commits into from
Closed
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
3 changes: 2 additions & 1 deletion src/lstm/functions.h
Original file line number Diff line number Diff line change
Expand Up @@ -200,8 +200,9 @@ inline void SoftmaxInPlace(int n, T *inout) {
inout[i] = prob;
}
if (prob_total > 0) {
T inv_prob_total = 1 / prob_total;
for (int i = 0; i < n; i++) {
inout[i] /= prob_total;
inout[i] *= inv_prob_total;
Comment on lines +203 to +205
Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Isn't this kind of optimization something which a good compiler should do automatically?

}
}
}
Expand Down
13 changes: 8 additions & 5 deletions src/lstm/networkio.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -224,6 +224,7 @@ void NetworkIO::Copy2DImage(int batch, Image pix, float black, float contrast, T
int target_width = stride_map_.Size(FD_WIDTH);
int num_features = NumFeatures();
bool color = num_features == 3;
float inv_contrast = 1 / contrast;
if (width > target_width) {
width = target_width;
}
Expand All @@ -236,11 +237,11 @@ void NetworkIO::Copy2DImage(int batch, Image pix, float black, float contrast, T
int f = 0;
for (int c = COLOR_RED; c <= COLOR_BLUE; ++c) {
int pixel = GET_DATA_BYTE(line + x, c);
SetPixel(t, f++, pixel, black, contrast);
SetPixel(t, f++, pixel, black, inv_contrast);
}
} else {
int pixel = GET_DATA_BYTE(line, x);
SetPixel(t, 0, pixel, black, contrast);
SetPixel(t, 0, pixel, black, inv_contrast);
}
}
}
Expand All @@ -264,6 +265,7 @@ void NetworkIO::Copy1DGreyImage(int batch, Image pix, float black, float contras
index.AddOffset(batch, FD_BATCH);
int t = index.t();
int target_width = stride_map_.Size(FD_WIDTH);
float inv_contrast = 1 / contrast;
if (width > target_width) {
width = target_width;
}
Expand All @@ -272,7 +274,7 @@ void NetworkIO::Copy1DGreyImage(int batch, Image pix, float black, float contras
for (int y = 0; y < height; ++y) {
uint32_t *line = pixGetData(pix) + wpl * y;
int pixel = GET_DATA_BYTE(line, x);
SetPixel(t, y, pixel, black, contrast);
SetPixel(t, y, pixel, black, inv_contrast);
}
}
for (; x < target_width; ++x) {
Expand All @@ -287,8 +289,9 @@ void NetworkIO::Copy1DGreyImage(int batch, Image pix, float black, float contras
// pixel: the value of the pixel from the image (in one channel)
// black: the pixel value to map to the lowest of the range of *this
// contrast: the range of pixel values to stretch to half the range of *this.
void NetworkIO::SetPixel(int t, int f, int pixel, float black, float contrast) {
float float_pixel = (pixel - black) / contrast - 1.0f;
// inv_contrast: one over the contrast, to save a divide
void NetworkIO::SetPixel(int t, int f, int pixel, float black, float inv_contrast) {
float float_pixel = (pixel - black) * inv_contrast - 1.0f;
if (int_mode_) {
i_[t][f] = ClipToRange<int>(IntCastRounded((INT8_MAX + 1) * float_pixel), -INT8_MAX, INT8_MAX);
} else {
Expand Down
3 changes: 2 additions & 1 deletion src/lstm/networkio.h
Original file line number Diff line number Diff line change
Expand Up @@ -92,7 +92,8 @@ class TESS_API NetworkIO {
// pixel: the value of the pixel from the image (in one channel)
// black: the pixel value to map to the lowest of the range of *this
// contrast: the range of pixel values to stretch to half the range of *this.
void SetPixel(int t, int f, int pixel, float black, float contrast);
// inv_contrast: one over the contrast, to save a divide
void SetPixel(int t, int f, int pixel, float black, float inv_contrast);
// Converts the array to a Pix. Must be pixDestroyed after use.
Image ToPix() const;
// Prints the first and last num timesteps of the array for each feature.
Expand Down
9 changes: 6 additions & 3 deletions src/textord/pithsync.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -120,6 +120,7 @@ void FPCUTPT::assign( // constructor
// half of pitch
int16_t half_pitch = pitch / 2 - 1;
uint32_t lead_flag; // new flag
float inv_projection_scale = 1 / projection_scale;

if (half_pitch > 31) {
half_pitch = 31;
Expand Down Expand Up @@ -166,7 +167,7 @@ void FPCUTPT::assign( // constructor
}
}
balance_count =
static_cast<int16_t>(balance_count * textord_balance_factor / projection_scale);
static_cast<int16_t>(balance_count * textord_balance_factor * inv_projection_scale);
}
r_index = segpt->region_index + 1;
total = segpt->mean_sum + dist;
Expand Down Expand Up @@ -221,6 +222,7 @@ void FPCUTPT::assign_cheap( // constructor
// half of pitch
int16_t half_pitch = pitch / 2 - 1;
uint32_t lead_flag; // new flag
float inv_projection_scale = 1 / projection_scale;

if (half_pitch > 31) {
half_pitch = 31;
Expand Down Expand Up @@ -260,7 +262,7 @@ void FPCUTPT::assign_cheap( // constructor
lead_flag &= lead_flag - 1;
}
balance_count =
static_cast<int16_t>(balance_count * textord_balance_factor / projection_scale);
static_cast<int16_t>(balance_count * textord_balance_factor * inv_projection_scale);
}
r_index = segpt->region_index + 1;
total = segpt->mean_sum + dist;
Expand Down Expand Up @@ -511,6 +513,7 @@ double check_pitch_sync3( // find segmentation
int16_t best_fake; // best fake level
int16_t best_count; // no of cuts
FPSEGPT_IT seg_it = seg_list; // output iterator
float inv_projection_scale = 1 / projection_scale;

end = (end - start) % pitch;
if (pitch < 3) {
Expand Down Expand Up @@ -597,7 +600,7 @@ double check_pitch_sync3( // find segmentation
offset = projection->pile_count(x);
faking = true;
} else {
projection_offset = static_cast<int16_t>(projection->pile_count(x) / projection_scale);
projection_offset = static_cast<int16_t>(projection->pile_count(x) * inv_projection_scale);
if (projection_offset > offset) {
offset = projection_offset;
}
Expand Down
Loading