I want to optimize the code to run faster, so I started with parallelizing the for
loops, but the code still not very fast. I do get variable frames per second, but the maximum is about 23 FPS. The code is manipulating the histogram of a given image. Is there a faster and more efficient way of doing it?
#include <opencv2/opencv.hpp>
#include <iostream>
#include <vector>
// Function declaration
void hist(cv::Mat& image);
int main() {
// Read the image
cv::Mat image = cv::imread("image.jpg");
if (image.empty()) {
std::cerr << "Error: Could not open or find the image!" << std::endl;
return -1;
}
// Call the hist function
hist(image);
// Display the result
cv::imshow("Result", image);
cv::waitKey(0);
return 0;
}
void hist(cv::Mat& image) {
int filterFactor = 1;
long int N = image.rows * image.cols;
std::vector<int> h_b(256, 0);
std::vector<int> h_g(256, 0);
std::vector<int> h_r(256, 0);
// Calculate histograms for B, G, and R channels in parallel
cv::parallel_for_(cv::Range(0, image.rows), [&](const cv::Range& range) {
for (int i = range.start; i < range.end; ++i) {
const cv::Vec3b* row = image.ptr<cv::Vec3b>(i);
for (int j = 0; j < image.cols; ++j) {
const cv::Vec3b& pxi = row[j];
h_b[pxi[0]]++;
h_g[pxi[1]]++;
h_r[pxi[2]]++;
}
}
});
// Accumulate histograms
for (int i = 1; i < 256; ++i) {
h_b[i] += filterFactor * h_b[i - 1];
h_g[i] += filterFactor * h_g[i - 1];
h_r[i] += filterFactor * h_r[i - 1];
}
// Find vmin and vmax for B, G, and R channels
auto find_vmin_vmax = [&](const std::vector<int>& hist, int& vmin, int& vmax) {
vmin = 0;
vmax = 255 - 1;
while (hist[vmin + 1] <= N * 3 / 100) { vmin++; }
while (hist[vmax - 1] > (N - (N / 100) * 3)) { vmax--; }
if (vmax < 255 - 1) { vmax++; }
};
int vmin_b, vmin_g, vmin_r, vmax_b, vmax_g, vmax_r;
find_vmin_vmax(h_b, vmin_b, vmax_b);
find_vmin_vmax(h_g, vmin_g, vmax_g);
find_vmin_vmax(h_r, vmin_r, vmax_r);
// Apply stretching in parallel
cv::parallel_for_(cv::Range(0, image.rows), [&](const cv::Range& range) {
for (int i = range.start; i < range.end; ++i) {
cv::Vec3b* row = image.ptr<cv::Vec3b>(i);
for (int j = 0; j < image.cols; ++j) {
cv::Vec3b& pxi = row[j];
pxi[0] = std::max(std::min(static_cast<int>(pxi[0]), vmax_b), vmin_b);
pxi[1] = std::max(std::min(static_cast<int>(pxi[1]), vmax_g), vmin_g);
pxi[2] = std::max(std::min(static_cast<int>(pxi[2]), vmax_r), vmin_r);
}
}
});
// Apply scaling in parallel
cv::parallel_for_(cv::Range(0, image.rows), [&](const cv::Range& range) {
for (int i = range.start; i < range.end; ++i) {
cv::Vec3b* row = image.ptr<cv::Vec3b>(i);
for (int j = 0; j < image.cols; ++j) {
cv::Vec3b& pxi = row[j];
pxi[0] = cv::saturate_cast<uchar>((pxi[0] - vmin_b) * 255 / (vmax_b - vmin_b));
pxi[1] = cv::saturate_cast<uchar>((pxi[1] - vmin_g) * 255 / (vmax_g - vmin_g));
pxi[2] = cv::saturate_cast<uchar>((pxi[2] - vmin_r) * 255 / (vmax_r - vmin_r));
}
}
});
}