![]() |
Blitz Support : |
From: Xianglong Yuan (yuanx_at_[hidden])
Date: 2004-08-18 07:15:24
Thank Patrick and Toby for your help. I first tried Toby's method,
and the compiler failed at
res = sum(m1(i,k) * m2(k,j), k);
but the compilation does go through once the m1 and m2 are not
Matrix<double> type, but Array<double,2> type. By following the
matmult.cpp example, as being suggested by Patrick, I did succeed in
matrix multiplication with Blitz.
The reason I'm interested in Blitz is its performance. Therefore, I
compared the Blitz array implementation, which is
assert(A2.columns() == M2.rows());
firstIndex i;
secondIndex j;
thirdIndex k;
t_start = clock();
B2 = sum(A2(i,k) * M2(k,j), k);
t_stop = clock();
with a plain array implementation, which is
assert(COLS_A == ROWS_M);
t_start = clock();
for (int i = 0; i < ROWS_A; i++)
for (int j = 0; j < COLS_M; j++) {
B1[i*COLS_M+j] = 0.0;
for (int k = 0; k < COLS_A; k++)
B1[i*COLS_M+j] += A1[i*COLS_A+k] * M1[k*COLS_M+j];
}
t_stop = clock();
The result is a surprising 100:26, which means a plain array
implementation is 4-times faster than the Blitz. Here, I use GNU
compiler gcc 3.3.1 with -O3 flag. The Blitz++-0.7 library was
compiled using the same compiler with --enable-optimize option.
The test is run on a 1GB memory computer with no memory swapping.
Do I miss something here? If the Blitz implementation cannot be
faster than a plain array implementation, there is no point for me to
use the Blitz.
I attached my blitz_test.cpp below and you can check the result by
yourselves. If there is anything I can do to improve the performance,
please let me know. Your comments and suggestions are appreciated.
Xianglong
=========================================================
// blitz_test.cpp
#include <blitz/array.h>
#include <ctime>
#include <iostream>
using namespace std;
int main()
{
static const int ROWS_A = 2000000;
static const int COLS_A = 3;
static const int COLS_M = 3;
static const int ROWS_M = COLS_A;
clock_t t_start, t_stop;
double* orig_M = new double[ROWS_M*COLS_M];
double* orig_A = new double[ROWS_A*COLS_A];
for (int i = 0; i < ROWS_M*COLS_M; i++)
orig_M[i] = 20.0 * random()/(RAND_MAX+1.0);
for (int i = 0; i < ROWS_A*COLS_A; i++)
orig_A[i] = random()/(RAND_MAX+1.0);
//* start plain array
double* M1 = new double[ROWS_M*COLS_M];
double* A1 = new double[ROWS_A*COLS_A];
for (int i = 0; i < ROWS_M; i++)
for (int j = 0; j < COLS_M; j++)
M1[i*COLS_M+j] = orig_M[i*COLS_M+j];
for (int i = 0; i < ROWS_A; i++)
for (int j = 0; j < COLS_A; j++)
A1[i*COLS_A+j] = orig_A[i*COLS_A+j];
double* B1 = new double[ROWS_A*COLS_M];
assert(COLS_A == ROWS_M);
t_start = clock();
for (int i = 0; i < ROWS_A; i++)
for (int j = 0; j < COLS_M; j++) {
B1[i*COLS_M+j] = 0.0;
for (int k = 0; k < COLS_A; k++)
B1[i*COLS_M+j] += A1[i*COLS_A+k] * M1[k*COLS_M+j];
}
t_stop = clock();
cout << "plain array: " << t_stop - t_start << endl;
delete[] M1, A1, B1;
// end plain array */
//* start blitz array
using namespace blitz;
Range rM(0, ROWS_M-1), cM(0, COLS_M-1);
Range rA(0, ROWS_A-1), cA(0, COLS_A-1);
Array<double, 2> M2(rM, cM), A2(rA, cA);
for (int i = 0; i < ROWS_M; i++)
for (int j = 0; j < COLS_M; j++)
M2(i,j) = orig_M[i*3+j];
for (int i = 0; i < ROWS_A; i++)
for (int j = 0; j < COLS_A; j++)
A2(i,j) = orig_A[i*3+j];
Array<double, 2> B2(rA, cM);
assert(A2.columns() == M2.rows());
firstIndex i;
secondIndex j;
thirdIndex k;
t_start = clock();
B2 = sum(A2(i,k) * M2(k,j), k);
t_stop = clock();
cout << "blitz array: " << t_stop - t_start << endl;
// end blitz array */
return 0;
}