Blitz logo

Blitz Support :

From: Xavier WARIN(Compte LOCAL) - I23 (xavier.warin_at_[hidden])
Date: 2004-08-19 01:16:08


I forgot to join the file !!

Xianglong Yuan wrote:
> Thank Patrick and Toby for your help. I first tried Toby's method,
> and the compiler failed at
>
> res = sum(m1(i,k) * m2(k,j), k);
>
> but the compilation does go through once the m1 and m2 are not
> Matrix<double> type, but Array<double,2> type. By following the
> matmult.cpp example, as being suggested by Patrick, I did succeed in
> matrix multiplication with Blitz.
>
> The reason I'm interested in Blitz is its performance. Therefore, I
> compared the Blitz array implementation, which is
>
> assert(A2.columns() == M2.rows());
> firstIndex i;
> secondIndex j;
> thirdIndex k;
> t_start = clock();
> B2 = sum(A2(i,k) * M2(k,j), k);
> t_stop = clock();
>
> with a plain array implementation, which is
>
> assert(COLS_A == ROWS_M);
> t_start = clock();
> for (int i = 0; i < ROWS_A; i++)
> for (int j = 0; j < COLS_M; j++) {
> B1[i*COLS_M+j] = 0.0;
> for (int k = 0; k < COLS_A; k++)
> B1[i*COLS_M+j] += A1[i*COLS_A+k] * M1[k*COLS_M+j];
> }
> t_stop = clock();
>
> The result is a surprising 100:26, which means a plain array
> implementation is 4-times faster than the Blitz. Here, I use GNU
> compiler gcc 3.3.1 with -O3 flag. The Blitz++-0.7 library was
> compiled using the same compiler with --enable-optimize option.
> The test is run on a 1GB memory computer with no memory swapping.
>
> Do I miss something here? If the Blitz implementation cannot be
> faster than a plain array implementation, there is no point for me to
> use the Blitz.
>
> I attached my blitz_test.cpp below and you can check the result by
> yourselves. If there is anything I can do to improve the performance,
> please let me know. Your comments and suggestions are appreciated.
>
> Xianglong
>
> =========================================================
> // blitz_test.cpp
>
> #include <blitz/array.h>
> #include <ctime>
> #include <iostream>
> using namespace std;
>
> int main()
> {
> static const int ROWS_A = 2000000;
> static const int COLS_A = 3;
> static const int COLS_M = 3;
> static const int ROWS_M = COLS_A;
> clock_t t_start, t_stop;
>
> double* orig_M = new double[ROWS_M*COLS_M];
> double* orig_A = new double[ROWS_A*COLS_A];
>
> for (int i = 0; i < ROWS_M*COLS_M; i++)
> orig_M[i] = 20.0 * random()/(RAND_MAX+1.0);
> for (int i = 0; i < ROWS_A*COLS_A; i++)
> orig_A[i] = random()/(RAND_MAX+1.0);
>
> //* start plain array
>
> double* M1 = new double[ROWS_M*COLS_M];
> double* A1 = new double[ROWS_A*COLS_A];
>
> for (int i = 0; i < ROWS_M; i++)
> for (int j = 0; j < COLS_M; j++)
> M1[i*COLS_M+j] = orig_M[i*COLS_M+j];
>
> for (int i = 0; i < ROWS_A; i++)
> for (int j = 0; j < COLS_A; j++)
> A1[i*COLS_A+j] = orig_A[i*COLS_A+j];
>
> double* B1 = new double[ROWS_A*COLS_M];
> assert(COLS_A == ROWS_M);
>
> t_start = clock();
>
> for (int i = 0; i < ROWS_A; i++)
> for (int j = 0; j < COLS_M; j++) {
> B1[i*COLS_M+j] = 0.0;
> for (int k = 0; k < COLS_A; k++)
> B1[i*COLS_M+j] += A1[i*COLS_A+k] * M1[k*COLS_M+j];
> }
>
> t_stop = clock();
> cout << "plain array: " << t_stop - t_start << endl;
>
> delete[] M1, A1, B1;
>
> // end plain array */
>
> //* start blitz array
>
> using namespace blitz;
>
> Range rM(0, ROWS_M-1), cM(0, COLS_M-1);
> Range rA(0, ROWS_A-1), cA(0, COLS_A-1);
> Array<double, 2> M2(rM, cM), A2(rA, cA);
>
> for (int i = 0; i < ROWS_M; i++)
> for (int j = 0; j < COLS_M; j++)
> M2(i,j) = orig_M[i*3+j];
>
> for (int i = 0; i < ROWS_A; i++)
> for (int j = 0; j < COLS_A; j++)
> A2(i,j) = orig_A[i*3+j];
>
> Array<double, 2> B2(rA, cM);
> assert(A2.columns() == M2.rows());
>
> firstIndex i;
> secondIndex j;
> thirdIndex k;
>
> t_start = clock();
>
> B2 = sum(A2(i,k) * M2(k,j), k);
>
> t_stop = clock();
> cout << "blitz array: " << t_stop - t_start << endl;
>
> // end blitz array */
>
> return 0;
> }
>
>
> _______________________________________________
> Blitz-support mailing list
> Blitz-support_at_[hidden]
> http://www.oonumerics.org/mailman/listinfo.cgi/blitz-support
>
>


// blitz_test.cpp

#include <blitz/array.h>
#include <ctime>
#include <iostream>
using namespace std;

int main()
{
  static const int ROWS_A = 2000000;
  static const int COLS_A = 3;
  static const int COLS_M = 3;
  static const int ROWS_M = COLS_A;
  clock_t t_start, t_stop;

  double* orig_M = new double[ROWS_M*COLS_M];
  double* orig_A = new double[ROWS_A*COLS_A];

  for (int i = 0; i < ROWS_M*COLS_M; i++)
    orig_M[i] = 20.0 * random()/(RAND_MAX+1.0);
  for (int i = 0; i < ROWS_A*COLS_A; i++)
    orig_A[i] = random()/(RAND_MAX+1.0);

//* start plain array

  double* M1 = new double[ROWS_M*COLS_M];
  double* A1 = new double[ROWS_A*COLS_A];

  for (int i = 0; i < ROWS_M; i++)
    for (int j = 0; j < COLS_M; j++)
      M1[i*COLS_M+j] = orig_M[i*COLS_M+j];

  for (int i = 0; i < ROWS_A; i++)
    for (int j = 0; j < COLS_A; j++)
      A1[i*COLS_A+j] = orig_A[i*COLS_A+j];

  double* B1 = new double[ROWS_A*COLS_M];
  assert(COLS_A == ROWS_M);

  t_start = clock();

  for (int i = 0; i < ROWS_A; i++)
    for (int j = 0; j < COLS_M; j++) {
      B1[i*COLS_M+j] = 0.0;
      for (int k = 0; k < COLS_A; k++)
        B1[i*COLS_M+j] += A1[i*COLS_A+k] * M1[k*COLS_M+j];
    }

  t_stop = clock();
  cout << "plain array : " << t_stop - t_start << endl;
  
  t_start = clock();

  for (int i = 0; i < ROWS_A; ++i)
    for (int j = 0; j < COLS_M; ++j) {
      double b1_loc = 0.0;
      for (int k = 0; k < COLS_A; ++k)
        b1_loc += A1[i*COLS_A+k] * M1[k*COLS_M+j];
      B1[i*COLS_M+j] = b1_loc ;
    }

  t_stop = clock();
  cout << "plain array 2 : " << t_stop - t_start << endl;
  delete[] M1, A1, B1;

// end plain array */

//* start blitz array

  using namespace blitz;
  
  Range rM(0, ROWS_M-1), cM(0, COLS_M-1);
  Range rA(0, ROWS_A-1), cA(0, COLS_A-1);
  Array<double, 2> M2(rM, cM), A2(rA, cA);

  for (int i = 0; i < ROWS_M; i++)
    for (int j = 0; j < COLS_M; j++)
      M2(i,j) = orig_M[i*3+j];

  for (int i = 0; i < ROWS_A; i++)
    for (int j = 0; j < COLS_A; j++)
      A2(i,j) = orig_A[i*3+j];

  Array<double, 2> B2(rA, cM);
  assert(A2.columns() == M2.rows());

  firstIndex i;
  secondIndex j;
  thirdIndex k;

  t_start = clock();

  B2 = sum(A2(i,k) * M2(k,j), k);

  t_stop = clock();
  cout << "blitz array : " << t_stop - t_start << " sum " << sum(B2) << endl;

  
  t_start = clock();

  for (int i = 0; i < ROWS_A; ++i)
    for (int j = 0; j < COLS_M; ++j)
      {
        double B1_loc = 0. ;
        for (int k = 0; k < COLS_A; ++k)
          B1_loc += A2(i,k) * M2(k,j);
        B2(i,j) = B1_loc;
    }

  t_stop = clock();
  cout << "blitz array 2 : " << t_stop - t_start << " sum " << sum(B2) <<endl;

// end blitz array */

  return 0;
}