! Tuned F90 version NCCS = 19 ! - Stencil put in subroutine, called 3 times to cycle arrays ! - Rely on compiler to produce an efficient tiling SUBROUTINE echo_f90_tuned(N, niters, check) INTEGER, INTENT( IN ) :: N, niters REAL, INTENT( OUT ) :: check REAL, DIMENSION (N,N) :: P1, P2, P3, c INTEGER iter CALL echo_f90_tuned_setup(c, P1, P2, P3, N) IF (MODULO(niters, 3) > 0) THEN PRINT *, 'niters should be divisible by 3, results will be off' ENDIF DO iter=1, niters, 3 CALL stencil_f90(c, P1, P2, P3, N) CALL stencil_f90(c, P2, P3, P1, N) CALL stencil_f90(c, P3, P1, P2, N) END DO check = P1(N/2,7*N/8) RETURN END SUBROUTINE stencil_f90(c, P1, P2, P3, N) INTEGER, INTENT( IN ) :: N REAL, DIMENSION(N,N), INTENT( INOUT ) :: c, P1, P2, P3 P3(2:N-1,2:N-1) = (2-4*c(2:N-1,2:N-1)) * P2(2:N-1,2:N-1) & + c(2:N-1,2:N-1)*(P2(1:N-2,2:N-1) + P2(3:N,2:N-1) & + P2(2:N-1,1:N-2) + P2(2:N-1,3:N)) - P1(2:N-1,2:N-1) RETURN END