This code performs a multithreaded matrix multiplication. It automatically scales to the number of available cores. The code is written in C++ and requires Qt and C++11.
[pastacode lang=”c++” message=”” highlight=”” provider=”manual”]#include <QVector>
#include <QtConcurrent>
#include <QThreadPool>
#include <QReadWriteLock>
#include <functional>
template <typename T, typename U>
auto innerProduct(QVector<T> const &vector1, QVector<U> const &vector2) -> decltype(T() * U())
{
Q_ASSERT_X(vector1.size() == vector2.size(), "template <typename T, typename U> auto innerProduct(QVector<T> const &vector1, QVector<U> const &vector2) -> decltype(T() * U())", qPrintable("incompatible sizes of vectors, size of vector1 = " + QString::number(vector1.size()) + ", size of vector2 = "+ QString::number(vector1.size())));
double sum(0.0);
for (int i = 0; i < vector1.size(); ++i)
sum += vector1.at(i) * vector2.at(i);
return sum;
}
template <typename T, typename U>
void _multiplicationAuxiliaryFunction(QVector<int> const &indexesToProcess, QVector<QVector<T> > const &matrix1, QVector<QVector<U> > const &matrix2, QVector<QVector<decltype(T() * U())> > &resultMatrix)
{
for (int i = 0; i < indexesToProcess.size(); ++i) {
int currentIndex = indexesToProcess.at(i);
QVector<U> currentColumnIndexOfMatrix2 = MatrixOperations::columnVector(currentIndex, matrix2);
QVector<decltype(T() * U())> ¤tResultVector(resultMatrix[currentIndex]);
for (int j = 0; j < matrix1.size(); ++j)
currentResultVector[j] = innerProduct(matrix1.at(j), currentColumnIndexOfMatrix2);
}
}
template <typename T, typename U>
auto multiplication(QVector<QVector<T> > const &matrix1, QVector<QVector<U> > const &matrix2) -> QVector<QVector<decltype(T() * U())> >
{
Q_ASSERT_X((matrix1.isEmpty() ? 0 : matrix1.at(0).size()) == matrix2.size(), "template <typename T, typename U> auto multiplication(QVector<QVector<U> > const &matrix1, QVector<QVector<U> > const &matrix2) -> QVector<QVector<decltype(T() * U())> >", qPrintable("incompatible sizes of matrices, column size of matrix1 = " + QString::number((matrix1.isEmpty() ? 0 : matrix1.size())) + ", row size of matrix2 = "+ QString::number(matrix2.size())));
int maximumThreadCount = QThreadPool::globalInstance()->maxThreadCount();
int n = matrix2.isEmpty() ? 0 : matrix2.at(0).size();
QVector<QVector<decltype(T() * U())> > resultMatrix(n, QVector<decltype(T() * U())>(matrix1.size()));
QVector<QVector<int> > scheduler(maximumThreadCount);
for (int i = 0; i < n; ++i)
scheduler[(i % maximumThreadCount)] << i;
QtConcurrent::blockingMap(scheduler, std::bind(_multiplicationAuxiliaryFunction<T, U>, std::placeholders::_1, std::cref(matrix1), std::cref(matrix2), std::ref(resultMatrix)));
MatrixOperations::inplaceTranspose(resultMatrix);
return resultMatrix;
}
[/pastacode]