![]() |
Blitz Support : |
From: Xavier WARIN(Compte LOCAL) - I23 (xavier.warin_at_[hidden])
Date: 2004-08-19 01:16:08
I forgot to join the file !!
Xianglong Yuan wrote:
> Thank Patrick and Toby for your help. I first tried Toby's method,
> and the compiler failed at
>
> res = sum(m1(i,k) * m2(k,j), k);
>
> but the compilation does go through once the m1 and m2 are not
> Matrix<double> type, but Array<double,2> type. By following the
> matmult.cpp example, as being suggested by Patrick, I did succeed in
> matrix multiplication with Blitz.
>
> The reason I'm interested in Blitz is its performance. Therefore, I
> compared the Blitz array implementation, which is
>
> assert(A2.columns() == M2.rows());
> firstIndex i;
> secondIndex j;
> thirdIndex k;
> t_start = clock();
> B2 = sum(A2(i,k) * M2(k,j), k);
> t_stop = clock();
>
> with a plain array implementation, which is
>
> assert(COLS_A == ROWS_M);
> t_start = clock();
> for (int i = 0; i < ROWS_A; i++)
> for (int j = 0; j < COLS_M; j++) {
> B1[i*COLS_M+j] = 0.0;
> for (int k = 0; k < COLS_A; k++)
> B1[i*COLS_M+j] += A1[i*COLS_A+k] * M1[k*COLS_M+j];
> }
> t_stop = clock();
>
> The result is a surprising 100:26, which means a plain array
> implementation is 4-times faster than the Blitz. Here, I use GNU
> compiler gcc 3.3.1 with -O3 flag. The Blitz++-0.7 library was
> compiled using the same compiler with --enable-optimize option.
> The test is run on a 1GB memory computer with no memory swapping.
>
> Do I miss something here? If the Blitz implementation cannot be
> faster than a plain array implementation, there is no point for me to
> use the Blitz.
>
> I attached my blitz_test.cpp below and you can check the result by
> yourselves. If there is anything I can do to improve the performance,
> please let me know. Your comments and suggestions are appreciated.
>
> Xianglong
>
> =========================================================
> // blitz_test.cpp
>
> #include <blitz/array.h>
> #include <ctime>
> #include <iostream>
> using namespace std;
>
> int main()
> {
> static const int ROWS_A = 2000000;
> static const int COLS_A = 3;
> static const int COLS_M = 3;
> static const int ROWS_M = COLS_A;
> clock_t t_start, t_stop;
>
> double* orig_M = new double[ROWS_M*COLS_M];
> double* orig_A = new double[ROWS_A*COLS_A];
>
> for (int i = 0; i < ROWS_M*COLS_M; i++)
> orig_M[i] = 20.0 * random()/(RAND_MAX+1.0);
> for (int i = 0; i < ROWS_A*COLS_A; i++)
> orig_A[i] = random()/(RAND_MAX+1.0);
>
> //* start plain array
>
> double* M1 = new double[ROWS_M*COLS_M];
> double* A1 = new double[ROWS_A*COLS_A];
>
> for (int i = 0; i < ROWS_M; i++)
> for (int j = 0; j < COLS_M; j++)
> M1[i*COLS_M+j] = orig_M[i*COLS_M+j];
>
> for (int i = 0; i < ROWS_A; i++)
> for (int j = 0; j < COLS_A; j++)
> A1[i*COLS_A+j] = orig_A[i*COLS_A+j];
>
> double* B1 = new double[ROWS_A*COLS_M];
> assert(COLS_A == ROWS_M);
>
> t_start = clock();
>
> for (int i = 0; i < ROWS_A; i++)
> for (int j = 0; j < COLS_M; j++) {
> B1[i*COLS_M+j] = 0.0;
> for (int k = 0; k < COLS_A; k++)
> B1[i*COLS_M+j] += A1[i*COLS_A+k] * M1[k*COLS_M+j];
> }
>
> t_stop = clock();
> cout << "plain array: " << t_stop - t_start << endl;
>
> delete[] M1, A1, B1;
>
> // end plain array */
>
> //* start blitz array
>
> using namespace blitz;
>
> Range rM(0, ROWS_M-1), cM(0, COLS_M-1);
> Range rA(0, ROWS_A-1), cA(0, COLS_A-1);
> Array<double, 2> M2(rM, cM), A2(rA, cA);
>
> for (int i = 0; i < ROWS_M; i++)
> for (int j = 0; j < COLS_M; j++)
> M2(i,j) = orig_M[i*3+j];
>
> for (int i = 0; i < ROWS_A; i++)
> for (int j = 0; j < COLS_A; j++)
> A2(i,j) = orig_A[i*3+j];
>
> Array<double, 2> B2(rA, cM);
> assert(A2.columns() == M2.rows());
>
> firstIndex i;
> secondIndex j;
> thirdIndex k;
>
> t_start = clock();
>
> B2 = sum(A2(i,k) * M2(k,j), k);
>
> t_stop = clock();
> cout << "blitz array: " << t_stop - t_start << endl;
>
> // end blitz array */
>
> return 0;
> }
>
>
> _______________________________________________
> Blitz-support mailing list
> Blitz-support_at_[hidden]
> http://www.oonumerics.org/mailman/listinfo.cgi/blitz-support
>
>
// blitz_test.cpp
#include <blitz/array.h>
#include <ctime>
#include <iostream>
using namespace std;
int main()
{
static const int ROWS_A = 2000000;
static const int COLS_A = 3;
static const int COLS_M = 3;
static const int ROWS_M = COLS_A;
clock_t t_start, t_stop;
double* orig_M = new double[ROWS_M*COLS_M];
double* orig_A = new double[ROWS_A*COLS_A];
for (int i = 0; i < ROWS_M*COLS_M; i++)
orig_M[i] = 20.0 * random()/(RAND_MAX+1.0);
for (int i = 0; i < ROWS_A*COLS_A; i++)
orig_A[i] = random()/(RAND_MAX+1.0);
//* start plain array
double* M1 = new double[ROWS_M*COLS_M];
double* A1 = new double[ROWS_A*COLS_A];
for (int i = 0; i < ROWS_M; i++)
for (int j = 0; j < COLS_M; j++)
M1[i*COLS_M+j] = orig_M[i*COLS_M+j];
for (int i = 0; i < ROWS_A; i++)
for (int j = 0; j < COLS_A; j++)
A1[i*COLS_A+j] = orig_A[i*COLS_A+j];
double* B1 = new double[ROWS_A*COLS_M];
assert(COLS_A == ROWS_M);
t_start = clock();
for (int i = 0; i < ROWS_A; i++)
for (int j = 0; j < COLS_M; j++) {
B1[i*COLS_M+j] = 0.0;
for (int k = 0; k < COLS_A; k++)
B1[i*COLS_M+j] += A1[i*COLS_A+k] * M1[k*COLS_M+j];
}
t_stop = clock();
cout << "plain array : " << t_stop - t_start << endl;
t_start = clock();
for (int i = 0; i < ROWS_A; ++i)
for (int j = 0; j < COLS_M; ++j) {
double b1_loc = 0.0;
for (int k = 0; k < COLS_A; ++k)
b1_loc += A1[i*COLS_A+k] * M1[k*COLS_M+j];
B1[i*COLS_M+j] = b1_loc ;
}
t_stop = clock();
cout << "plain array 2 : " << t_stop - t_start << endl;
delete[] M1, A1, B1;
// end plain array */
//* start blitz array
using namespace blitz;
Range rM(0, ROWS_M-1), cM(0, COLS_M-1);
Range rA(0, ROWS_A-1), cA(0, COLS_A-1);
Array<double, 2> M2(rM, cM), A2(rA, cA);
for (int i = 0; i < ROWS_M; i++)
for (int j = 0; j < COLS_M; j++)
M2(i,j) = orig_M[i*3+j];
for (int i = 0; i < ROWS_A; i++)
for (int j = 0; j < COLS_A; j++)
A2(i,j) = orig_A[i*3+j];
Array<double, 2> B2(rA, cM);
assert(A2.columns() == M2.rows());
firstIndex i;
secondIndex j;
thirdIndex k;
t_start = clock();
B2 = sum(A2(i,k) * M2(k,j), k);
t_stop = clock();
cout << "blitz array : " << t_stop - t_start << " sum " << sum(B2) << endl;
t_start = clock();
for (int i = 0; i < ROWS_A; ++i)
for (int j = 0; j < COLS_M; ++j)
{
double B1_loc = 0. ;
for (int k = 0; k < COLS_A; ++k)
B1_loc += A2(i,k) * M2(k,j);
B2(i,j) = B1_loc;
}
t_stop = clock();
cout << "blitz array 2 : " << t_stop - t_start << " sum " << sum(B2) <<endl;
// end blitz array */
return 0;
}