il y a 6 ans · d80b6a89a3
--- a/Makefile
+++ b/Makefile
@@ -7,7 +7,7 @@
 
				 ifeq ($(PLATFORM),SCC)
			
 
				 	CFLAGS = -Wall -g
			
 
				 	SHELL=sh
			
 
				-	RCCEROOT=../../bRCCE_V2.0
			
 
				+	RCCEROOT=./bRCCE_V2.0
			
 
				 	include $(RCCEROOT)/common/symbols
			
 
				 	PLATFORM_INCLUDES = $(RCCEINCLUDE)/RCCE.h
			
 
				 	MY_FLAGS += -DPLAT_SCC
			
--- a/RCCE_V2.0/.svn/all-wcprops
+++ b/RCCE_V2.0/.svn/all-wcprops
@@ -0,0 +1,47 @@
 
				+K 25
			
 
				+svn:wc:ra_dav:version-url
			
 
				+V 43
			
 
				+/svn/repository/!svn/ver/297/tags/RCCE_V2.0
			
 
				+END
			
 
				+run_stress_test
			
 
				+K 25
			
 
				+svn:wc:ra_dav:version-url
			
 
				+V 59
			
 
				+/svn/repository/!svn/ver/297/tags/RCCE_V2.0/run_stress_test
			
 
				+END
			
 
				+COPYING
			
 
				+K 25
			
 
				+svn:wc:ra_dav:version-url
			
 
				+V 51
			
 
				+/svn/repository/!svn/ver/297/tags/RCCE_V2.0/COPYING
			
 
				+END
			
 
				+sourcing
			
 
				+K 25
			
 
				+svn:wc:ra_dav:version-url
			
 
				+V 52
			
 
				+/svn/repository/!svn/ver/297/tags/RCCE_V2.0/sourcing
			
 
				+END
			
 
				+Makefile
			
 
				+K 25
			
 
				+svn:wc:ra_dav:version-url
			
 
				+V 52
			
 
				+/svn/repository/!svn/ver/297/tags/RCCE_V2.0/Makefile
			
 
				+END
			
 
				+README
			
 
				+K 25
			
 
				+svn:wc:ra_dav:version-url
			
 
				+V 50
			
 
				+/svn/repository/!svn/ver/297/tags/RCCE_V2.0/README
			
 
				+END
			
 
				+build_stress_test
			
 
				+K 25
			
 
				+svn:wc:ra_dav:version-url
			
 
				+V 61
			
 
				+/svn/repository/!svn/ver/297/tags/RCCE_V2.0/build_stress_test
			
 
				+END
			
 
				+configure
			
 
				+K 25
			
 
				+svn:wc:ra_dav:version-url
			
 
				+V 53
			
 
				+/svn/repository/!svn/ver/297/tags/RCCE_V2.0/configure
			
 
				+END
			
--- a/RCCE_V2.0/.svn/entries
+++ b/RCCE_V2.0/.svn/entries
@@ -0,0 +1,290 @@
 
				+10
			
 
				+
			
 
				+dir
			
 
				+313
			
 
				+http://marcbug.scc-dc.com/svn/repository/tags/RCCE_V2.0
			
 
				+http://marcbug.scc-dc.com/svn/repository
			
 
				+
			
 
				+
			
 
				+
			
 
				+2012-01-10T18:47:23.474723Z
			
 
				+297
			
 
				+tekubasx
			
 
				+
			
 
				+
			
 
				+
			
 
				+
			
 
				+
			
 
				+
			
 
				+
			
 
				+
			
 
				+
			
 
				+
			
 
				+
			
 
				+
			
 
				+
			
 
				+
			
 
				+c924d837-3317-4ba4-8fbd-5f2da8699d51
			
 
				+
			
 
				+COPYING
			
 
				+file
			
 
				+
			
 
				+
			
 
				+
			
 
				+
			
 
				+2012-10-27T13:42:44.532598Z
			
 
				+cfbe8de91e3af34fbef42d72d9634772
			
 
				+2010-06-25T23:28:47.346002Z
			
 
				+7
			
 
				+tekubasx
			
 
				+
			
 
				+
			
 
				+
			
 
				+
			
 
				+
			
 
				+
			
 
				+
			
 
				+
			
 
				+
			
 
				+
			
 
				+
			
 
				+
			
 
				+
			
 
				+
			
 
				+
			
 
				+
			
 
				+
			
 
				+
			
 
				+
			
 
				+
			
 
				+
			
 
				+632
			
 
				+
			
 
				+bin
			
 
				+dir
			
 
				+
			
 
				+man
			
 
				+dir
			
 
				+
			
 
				+Makefile
			
 
				+file
			
 
				+
			
 
				+
			
 
				+
			
 
				+
			
 
				+2012-10-27T13:42:44.532598Z
			
 
				+6a9f5ce78890306001f18338b1c92b5b
			
 
				+2011-08-03T21:54:30.632236Z
			
 
				+242
			
 
				+tekubasx
			
 
				+
			
 
				+
			
 
				+
			
 
				+
			
 
				+
			
 
				+
			
 
				+
			
 
				+
			
 
				+
			
 
				+
			
 
				+
			
 
				+
			
 
				+
			
 
				+
			
 
				+
			
 
				+
			
 
				+
			
 
				+
			
 
				+
			
 
				+
			
 
				+
			
 
				+4971
			
 
				+
			
 
				+configure
			
 
				+file
			
 
				+
			
 
				+
			
 
				+
			
 
				+
			
 
				+2012-10-27T13:42:44.532598Z
			
 
				+b1ee980b729ce38a0efa700a2c11d334
			
 
				+2012-01-07T00:22:43.621517Z
			
 
				+295
			
 
				+tekubasx
			
 
				+has-props
			
 
				+
			
 
				+
			
 
				+
			
 
				+
			
 
				+
			
 
				+
			
 
				+
			
 
				+
			
 
				+
			
 
				+
			
 
				+
			
 
				+
			
 
				+
			
 
				+
			
 
				+
			
 
				+
			
 
				+
			
 
				+
			
 
				+
			
 
				+
			
 
				+3167
			
 
				+
			
 
				+include
			
 
				+dir
			
 
				+
			
 
				+src
			
 
				+dir
			
 
				+
			
 
				+common
			
 
				+dir
			
 
				+
			
 
				+sourcing
			
 
				+file
			
 
				+
			
 
				+
			
 
				+
			
 
				+
			
 
				+2012-10-27T13:42:44.532598Z
			
 
				+3a02a4f19dbba66e5d8349b0998a5dc9
			
 
				+2010-08-27T16:04:17.032086Z
			
 
				+45
			
 
				+tekubasx
			
 
				+has-props
			
 
				+
			
 
				+
			
 
				+
			
 
				+
			
 
				+
			
 
				+
			
 
				+
			
 
				+
			
 
				+
			
 
				+
			
 
				+
			
 
				+
			
 
				+
			
 
				+
			
 
				+
			
 
				+
			
 
				+
			
 
				+
			
 
				+
			
 
				+
			
 
				+84
			
 
				+
			
 
				+README
			
 
				+file
			
 
				+
			
 
				+
			
 
				+
			
 
				+
			
 
				+2012-10-27T13:42:44.532598Z
			
 
				+3a4fad3d2b9fdf74aff8d883184cfb90
			
 
				+2012-01-07T00:47:20.951110Z
			
 
				+296
			
 
				+tekubasx
			
 
				+
			
 
				+
			
 
				+
			
 
				+
			
 
				+
			
 
				+
			
 
				+
			
 
				+
			
 
				+
			
 
				+
			
 
				+
			
 
				+
			
 
				+
			
 
				+
			
 
				+
			
 
				+
			
 
				+
			
 
				+
			
 
				+
			
 
				+
			
 
				+
			
 
				+6682
			
 
				+
			
 
				+utils
			
 
				+dir
			
 
				+
			
 
				+hosts
			
 
				+dir
			
 
				+
			
 
				+build_stress_test
			
 
				+file
			
 
				+
			
 
				+
			
 
				+
			
 
				+
			
 
				+2012-10-27T13:42:44.532598Z
			
 
				+5c2f69e9213cc23350557c9463ac070f
			
 
				+2010-06-25T23:28:47.346002Z
			
 
				+7
			
 
				+tekubasx
			
 
				+has-props
			
 
				+
			
 
				+
			
 
				+
			
 
				+
			
 
				+
			
 
				+
			
 
				+
			
 
				+
			
 
				+
			
 
				+
			
 
				+
			
 
				+
			
 
				+
			
 
				+
			
 
				+
			
 
				+
			
 
				+
			
 
				+
			
 
				+
			
 
				+
			
 
				+2649
			
 
				+
			
 
				+apps
			
 
				+dir
			
 
				+
			
 
				+run_stress_test
			
 
				+file
			
 
				+
			
 
				+
			
 
				+
			
 
				+
			
 
				+2012-10-27T13:42:44.532598Z
			
 
				+b1a6ad1e7238f3ed92ea363f4070d1f7
			
 
				+2010-06-25T23:28:47.346002Z
			
 
				+7
			
 
				+tekubasx
			
 
				+has-props
			
 
				+
			
 
				+
			
 
				+
			
 
				+
			
 
				+
			
 
				+
			
 
				+
			
 
				+
			
 
				+
			
 
				+
			
 
				+
			
 
				+
			
 
				+
			
 
				+
			
 
				+
			
 
				+
			
 
				+
			
 
				+
			
 
				+
			
 
				+
			
 
				+4605
			
 
				+
			
--- a/RCCE_V2.0/.svn/prop-base/build_stress_test.svn-base
+++ b/RCCE_V2.0/.svn/prop-base/build_stress_test.svn-base
@@ -0,0 +1,5 @@
 
				+K 14
			
 
				+svn:executable
			
 
				+V 0
			
 
				+
			
 
				+END
			
--- a/RCCE_V2.0/.svn/prop-base/configure.svn-base
+++ b/RCCE_V2.0/.svn/prop-base/configure.svn-base
@@ -0,0 +1,5 @@
 
				+K 14
			
 
				+svn:executable
			
 
				+V 0
			
 
				+
			
 
				+END
			
--- a/RCCE_V2.0/.svn/prop-base/run_stress_test.svn-base
+++ b/RCCE_V2.0/.svn/prop-base/run_stress_test.svn-base
@@ -0,0 +1,5 @@
 
				+K 14
			
 
				+svn:executable
			
 
				+V 0
			
 
				+
			
 
				+END
			
--- a/RCCE_V2.0/.svn/prop-base/sourcing.svn-base
+++ b/RCCE_V2.0/.svn/prop-base/sourcing.svn-base
@@ -0,0 +1,5 @@
 
				+K 14
			
 
				+svn:executable
			
 
				+V 1
			
 
				+*
			
 
				+END
			
--- a/RCCE_V2.0/.svn/text-base/COPYING.svn-base
+++ b/RCCE_V2.0/.svn/text-base/COPYING.svn-base
@@ -0,0 +1,15 @@
 
				+// 
			
 
				+// Copyright 2010 Intel Corporation
			
 
				+// 
			
 
				+//    Licensed under the Apache License, Version 2.0 (the "License");
			
 
				+//    you may not use this file except in compliance with the License.
			
 
				+//    You may obtain a copy of the License at
			
 
				+// 
			
 
				+//        http://www.apache.org/licenses/LICENSE-2.0
			
 
				+// 
			
 
				+//    Unless required by applicable law or agreed to in writing, software
			
 
				+//    distributed under the License is distributed on an "AS IS" BASIS,
			
 
				+//    WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
			
 
				+//    See the License for the specific language governing permissions and
			
 
				+//    limitations under the License.
			
 
				+// 
			
--- a/RCCE_V2.0/.svn/text-base/Makefile.svn-base
+++ b/RCCE_V2.0/.svn/text-base/Makefile.svn-base
@@ -0,0 +1,116 @@
 
				+include common/symbols
			
 
				+
			
 
				+ifeq ($(OMP_EMULATOR),0)
			
 
				+  PLATFORMOBJS=SCC_API.o   
			
 
				+else
			
 
				+  PLATFORMOBJS=RCCE_emulator_driver.o
			
 
				+endif
			
 
				+
			
 
				+ifeq ($(PWRMGMT),1)
			
 
				+  POWEROBJS=RCCE_power_management.o
			
 
				+endif
			
 
				+
			
 
				+ARCHIVEOBJS= RCCE_admin.o RCCE_comm.o   RCCE_malloc.o RCCE_qsort.o RCCE_synch.o RCCE_flags.o  \
			
 
				+             RCCE_send.o  RCCE_recv.o   RCCE_debug.o  RCCE_get.o   RCCE_put.o   RCCE_reduce.o \
			
 
				+             RCCE_bcast.o RCCE_shmalloc.o RCCE_DCMflush.o $(PLATFORMOBJS) $(POWEROBJS)
			
 
				+
			
 
				+ifeq ($(OMP_EMULATOR),0)
			
 
				+	ARCHIVEOBJS += RCCE_memcpy.o
			
 
				+endif
			
 
				+
			
 
				+$(ARCHIVE): $(ARCHIVEOBJS)
			
 
				+	@echo Archive name = $(ARCHIVE) 
			
 
				+	ar -r $(ARCHIVE) $(ARCHIVEOBJS) 
			
 
				+	rm -f *.o
			
 
				+
			
 
				+usage:
			
 
				+	@echo "         make [OMP_EMULATOR=0] [PWRMGMT=1] [API=gory]  [SINGLEBITFLAGS=1]"
			
 
				+	@echo "         make [clean] [veryclean]" 
			
 
				+	@echo "default: make  OMP_EMULATOR=1   PWRMGMT=0   API=nongory SINGLEBITFLAGS=0"
			
 
				+
			
 
				+RCCE_admin.o: $(RCCE_LIB_SRC)/RCCE_admin.c $(RCCEINCLUDE)/RCCE.h $(RCCEINCLUDE)/RCCE_lib.h \
			
 
				+        $(RCCEINCLUDE)/RCCE_lib_pwr.h
			
 
				+	$(CCOMPILE) -c $(CFLAGS) $(RCCE_LIB_SRC)/RCCE_admin.c  $(RCCE_FLAGS) 
			
 
				+
			
 
				+RCCE_power_management.o: $(RCCE_LIB_SRC)/RCCE_power_management.c $(RCCEINCLUDE)/RCCE.h \
			
 
				+         $(RCCEINCLUDE)/RCCE_lib.h $(RCCEINCLUDE)/SCC_API.h $(RCCEINCLUDE)/RCCE_lib_pwr.h
			
 
				+	$(CCOMPILE) -c $(CFLAGS) $(RCCE_LIB_SRC)/RCCE_power_management.c  $(RCCE_FLAGS) 
			
 
				+
			
 
				+RCCE_debug.o: $(RCCE_LIB_SRC)/RCCE_debug.c $(RCCEINCLUDE)/RCCE.h $(RCCEINCLUDE)/RCCE_lib.h \
			
 
				+         $(RCCEINCLUDE)/RCCE_debug.h
			
 
				+	$(CCOMPILE) -c $(CFLAGS) $(RCCE_LIB_SRC)/RCCE_debug.c  $(RCCE_FLAGS)
			
 
				+
			
 
				+RCCE_comm.o: $(RCCE_LIB_SRC)/RCCE_comm.c $(RCCEINCLUDE)/RCCE.h $(RCCEINCLUDE)/RCCE_lib.h 
			
 
				+	$(CCOMPILE) -c $(CFLAGS) $(RCCE_LIB_SRC)/RCCE_comm.c  $(RCCE_FLAGS)
			
 
				+
			
 
				+RCCE_send.o: $(RCCE_LIB_SRC)/RCCE_send.c $(RCCEINCLUDE)/RCCE.h $(RCCEINCLUDE)/RCCE_lib.h 
			
 
				+	$(CCOMPILE) -c $(CFLAGS) $(RCCE_LIB_SRC)/RCCE_send.c  $(RCCE_FLAGS)
			
 
				+
			
 
				+RCCE_recv.o: $(RCCE_LIB_SRC)/RCCE_recv.c $(RCCEINCLUDE)/RCCE.h $(RCCEINCLUDE)/RCCE_lib.h 
			
 
				+	$(CCOMPILE) -c $(CFLAGS) $(RCCE_LIB_SRC)/RCCE_recv.c  $(RCCE_FLAGS)
			
 
				+
			
 
				+RCCE_memcpy.o: $(RCCE_LIB_SRC)/RCCE_memcpy.c $(RCCEINCLUDE)/RCCE.h $(RCCEINCLUDE)/RCCE_lib.h 
			
 
				+	$(CCOMPILE) -c $(CFLAGS) $(RCCE_LIB_SRC)/RCCE_memcpy.c  $(RCCE_FLAGS)
			
 
				+
			
 
				+RCCE_get.o: $(RCCE_LIB_SRC)/RCCE_get.c $(RCCEINCLUDE)/RCCE.h $(RCCEINCLUDE)/RCCE_lib.h 
			
 
				+	$(CCOMPILE) -c $(CFLAGS) $(RCCE_LIB_SRC)/RCCE_get.c  $(RCCE_FLAGS)
			
 
				+
			
 
				+RCCE_put.o: $(RCCE_LIB_SRC)/RCCE_put.c $(RCCEINCLUDE)/RCCE.h $(RCCEINCLUDE)/RCCE_lib.h 
			
 
				+	$(CCOMPILE) -c $(CFLAGS) $(RCCE_LIB_SRC)/RCCE_put.c  $(RCCE_FLAGS)
			
 
				+
			
 
				+RCCE_reduce.o: $(RCCE_LIB_SRC)/RCCE_reduce.c $(RCCEINCLUDE)/RCCE.h $(RCCEINCLUDE)/RCCE_lib.h 
			
 
				+	$(CCOMPILE) -c $(CFLAGS) $(RCCE_LIB_SRC)/RCCE_reduce.c  $(RCCE_FLAGS)
			
 
				+
			
 
				+RCCE_bcast.o: $(RCCE_LIB_SRC)/RCCE_bcast.c $(RCCEINCLUDE)/RCCE.h $(RCCEINCLUDE)/RCCE_lib.h 
			
 
				+	$(CCOMPILE) -c $(CFLAGS) $(RCCE_LIB_SRC)/RCCE_bcast.c  $(RCCE_FLAGS)
			
 
				+
			
 
				+RCCE_malloc.o: $(RCCE_LIB_SRC)/RCCE_malloc.c $(RCCEINCLUDE)/RCCE.h $(RCCEINCLUDE)/RCCE_lib.h 
			
 
				+	$(CCOMPILE) -c $(CFLAGS) $(RCCE_LIB_SRC)/RCCE_malloc.c  $(RCCE_FLAGS)
			
 
				+
			
 
				+RCCE_shmalloc.o: $(RCCE_LIB_SRC)/RCCE_shmalloc.c $(RCCEINCLUDE)/RCCE.h $(RCCEINCLUDE)/RCCE_lib.h 
			
 
				+	$(CCOMPILE) -c $(CFLAGS) $(RCCE_LIB_SRC)/RCCE_shmalloc.c  $(RCCE_FLAGS)
			
 
				+
			
 
				+RCCE_qsort.o: $(RCCE_LIB_SRC)/RCCE_qsort.c $(RCCEINCLUDE)/RCCE.h $(RCCEINCLUDE)/RCCE_lib.h 
			
 
				+	$(CCOMPILE) -c $(CFLAGS) $(RCCE_LIB_SRC)/RCCE_qsort.c  $(RCCE_FLAGS)
			
 
				+
			
 
				+RCCE_synch.o: $(RCCE_LIB_SRC)/RCCE_synch.c $(RCCEINCLUDE)/RCCE.h $(RCCEINCLUDE)/RCCE_lib.h 
			
 
				+	$(CCOMPILE) -c $(CFLAGS) $(RCCE_LIB_SRC)/RCCE_synch.c  $(RCCE_FLAGS)
			
 
				+
			
 
				+RCCE_flags.o: $(RCCE_LIB_SRC)/RCCE_flags.c $(RCCEINCLUDE)/RCCE.h $(RCCEINCLUDE)/RCCE_lib.h 
			
 
				+	$(CCOMPILE) -c $(CFLAGS) $(RCCE_LIB_SRC)/RCCE_flags.c  $(RCCE_FLAGS)
			
 
				+
			
 
				+RCCE_emulator_driver.o: $(RCCE_LIB_SRC)/RCCE_emulator_driver.c $(RCCEINCLUDE)/RCCE.h 
			
 
				+	$(CCOMPILE) -c $(CFLAGS) $(RCCE_LIB_SRC)/RCCE_emulator_driver.c $(RCCE_FLAGS) 
			
 
				+
			
 
				+SCC_API.o: $(RCCE_LIB_SRC)/SCC_API.c $(RCCEINCLUDE)/SCC_API.h 
			
 
				+	$(CCOMPILE) -c $(CFLAGS) $(RCCE_LIB_SRC)/SCC_API.c
			
 
				+
			
 
				+RCCE_DCMflush.o: $(RCCE_LIB_SRC)/RCCE_DCMflush.c $(RCCEINCLUDE)/SCC_API.h 
			
 
				+	$(CCOMPILE) -c $(CFLAGS) $(RCCE_LIB_SRC)/RCCE_DCMflush.c
			
 
				+
			
 
				+mpb.o: $(RCCE_LIB_SRC)/mpb.c $(RCCEINCLUDE)/SCC_API.h 
			
 
				+	$(CCOMPILE) -c $(CFLAGS) $(RCCE_LIB_SRC)/mpb.c
			
 
				+
			
 
				+mpb: mpb.o SCC_API.o
			
 
				+	$(CCOMPILE) $(CFLAGS) mpb.o SCC_API.o -o $(RCCEROOT$)/bin/$(SUBDIR)/mpb
			
 
				+	rm -f *.o
			
 
				+
			
 
				+clean:
			
 
				+	rm -f $(ARCHIVE) $(ARCHIVEOBJS)
			
 
				+	rm -f mpb.o $(RCCEROOT$)/bin/$(SUBDIR)/mpb
			
 
				+	rm -f bin/*/*.a
			
 
				+
			
 
				+veryclean: 
			
 
				+	rm -f $(ARCHIVE) $(ARCHIVEOBJS)
			
 
				+	rm -f mpb.o $(RCCEROOT$)/bin/$(SUBDIR)/mpb
			
 
				+	rm -f bin/*/*.a
			
 
				+	cd apps/SHIFT;    make clean; cd -
			
 
				+	cd apps/STENCIL;  make clean; cd -
			
 
				+	cd apps/NPB;      make clean; cd -
			
 
				+	cd apps/PINGPONG; make clean; cd -
			
 
				+	cd apps/XHPL;     make veryclean; cd -
			
 
				+	cd apps/SHARE;    make clean; cd -
			
 
				+	rm -f common/symbols rccerun makeall
			
 
				+	@echo --------------------------------------------------------------------
			
 
				+	@echo RUN \"configure\" SCRIPT \(AGAIN\) BEFORE MAKING EXECUTABLES + LIBRARIES
			
 
				+	@echo --------------------------------------------------------------------        
			
--- a/RCCE_V2.0/.svn/text-base/README.svn-base
+++ b/RCCE_V2.0/.svn/text-base/README.svn-base
@@ -0,0 +1,153 @@
 
				+// 
			
 
				+// Copyright 2010 Intel Corporation
			
 
				+// 
			
 
				+//    Licensed under the Apache License, Version 2.0 (the "License");
			
 
				+//    you may not use this file except in compliance with the License.
			
 
				+//    You may obtain a copy of the License at
			
 
				+// 
			
 
				+//        http://www.apache.org/licenses/LICENSE-2.0
			
 
				+// 
			
 
				+//    Unless required by applicable law or agreed to in writing, software
			
 
				+//    distributed under the License is distributed on an "AS IS" BASIS,
			
 
				+//    WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
			
 
				+//    See the License for the specific language governing permissions and
			
 
				+//    limitations under the License.
			
 
				+// 
			
 
				+//
			
 
				+
			
 
				+Welcome to RCCE, a communication environment for the SCC processor 
			
 
				+------------------------------------------------------------------
			
 
				+
			
 
				+RCCE is designed to run on a variety of platforms including:
			
 
				+
			
 
				+  * Baremetal on the SCC chip, 
			
 
				+  * Linux on the SCC chip, 
			
 
				+  * A functional emulator running on top of OpenMP.  
			
 
				+
			
 
				+============ test line ============
			
 
				+This particular release has been validated only for Linux and the 
			
 
				+OpenMP emulator (a baremetal build option is available, but has
			
 
				+not been tested). It may seem a bit cumbersome to work with, but 
			
 
				+that's so we can replicate the "features" of the SCC chip ... i.e. 
			
 
				+once a program runs on the emulator, it's likely to work on real 
			
 
				+hardware.
			
 
				+
			
 
				+There are several versions of the RCCE library that can be 
			
 
				+built with this release.  They expose different options ...
			
 
				+
			
 
				+   * The "gory" interface ... this is the low level interface. It 
			
 
				+     makes the programmer responsible for declaring and managing
			
 
				+     synchronization flags and for managing the on-chip message 
			
 
				+     passing buffer. This mode gives access to the low level 
			
 
				+     get/put routines,  as well as to the higher level two-sided 
			
 
				+     send/receive interface.
			
 
				+
			
 
				+   * The "nongory" interface ... a higher level interface that
			
 
				+     hides the particulars of the message passing buffers and 
			
 
				+     inter-core synchronization from the programmer, including the 
			
 
				+     management of synchronization flags. This interface does not 
			
 
				+     give access to the low level put/get routines.
			
 
				+
			
 
				+   * Big Flags ... each flag used to coordinate interaction 
			
 
				+     between units of execution (UE) takes up a byte in a single cacheline. 
			
 
				+     This has lower latency but wastes memory.
			
 
				+
			
 
				+   * Small flags ... flags are stored in a single bit; many are
			
 
				+     packed into a single cache line. A slight hit on latency but 
			
 
				+     consumes less message passing buffer memory.
			
 
				+
			
 
				+   * With or without software controlled power management. POWER
			
 
				+     MANAGEMENT IS AN EXPERIMENTAL FEATURE THAT HAS NOT BEEN TESTED 
			
 
				+     AS THOROUGHLY AS THE REST OF THE LIBRARY. FOLLOW THE SPECIAL
			
 
				+     INSTRUCTIONS BELOW TO CONFIGURE THE "makeall" SCRIPT TO BUILD 
			
 
				+     VERSIONS OF THE LIBRARY THAT INCLUDE THE POWER MANAGEMENT API.
			
 
				+
			
 
				+
			
 
				+You can build all versions of the library supported with this 
			
 
				+release, as follows:
			
 
				+
			
 
				+1. Type "./configure <PLATFORM>". This creates file common/symbols 
			
 
				+   from file common/symbols.in, inserting the proper root of the 
			
 
				+   directory tree, and also inserting the proper platform (SCC_LINUX,
			
 
				+   SCC_BAREMETAL, or emulator). Any existing file common/symbols 
			
 
				+   will be overwritten, so do not update that file by hand. Instead,
			
 
				+   specify details of your build environment in common/symbols.in
			
 
				+   The configure utility also specializes the rccerun command (see 
			
 
				+   below) for the target platform. You may need to make the configure
			
 
				+   script executable (type "chmod u+x configure").
			
 
				+   To enable RCCE's power management API, you must specify the string
			
 
				+   "ADD_POWER_API" as the second parameter on the command line when 
			
 
				+   you execute the configure script. Because this is an experimental 
			
 
				+   feature, it is not built by default. See above.
			
 
				+
			
 
				+2. Type "./makeall" to build all libraries. Alternatively you 
			
 
				+   can build individual libraries by calling make directly.  
			
 
				+   Type "make usage" to discover the libraries you can build.
			
 
				+
			
 
				+The libraries generated by this procedure will be put in the directory
			
 
				+
			
 
				+    bin/<PLATFORM>
			
 
				+
			
 
				+An easy way to test correct operation of the platform is to build and
			
 
				+run a prepackaged RCCE stress test after building the RCCE library:
			
 
				+   "./build_stress_test; ./run_stress_test <size>"
			
 
				+where size is -S (small), -M (medium), or -L (large).
			
 
				+
			
 
				+A number of applications are included with this release in the
			
 
				+"apps" directory.  These include:  
			
 
				+
			
 
				+  * PINGPONG:  bounces messages between a pair of UEs
			
 
				+  * SHIFT:     passes messages around a logical ring of UEs
			
 
				+  * STENCIL:   solves a simple PDE with a basic stencil code
			
 
				+  * SHARE:     tests the off-chip shared memory access
			
 
				+  * NPB:       NAS Parallel Benchmarks, LU and BT
			
 
				+  * XHPL:      the Linpack benchmark
			
 
				+
			
 
				+To build an application,  go to the corresponding subdirectory of "apps" 
			
 
				+and type "make".   It will return a list of options for building
			
 
				+versions of an application. It may be necessary to edit the Makefile
			
 
				+in an application directory if parts of the original RCCE code tree 
			
 
				+got moved with respect to each other.
			
 
				+
			
 
				+We suggest that you start with PINGPONG, SHIFT, SHARE, and STENCIL and 
			
 
				+save the more complex NPB and XHPL for later. The STENCIl directory 
			
 
				+contains a few simple variations of the base code that exercise RCCE's 
			
 
				+experimental power management API.
			
 
				+See the apps/XHPL directory for instructions on how to build and run
			
 
				+Linpack.
			
 
				+
			
 
				+To run an application, you must use the rccerun command.  This command
			
 
				+is used to launch Linux jobs on SCC or on the emulator. To run application 
			
 
				+APP with P cores, type
			
 
				+
			
 
				+"rccerun -nue P -f HOSTFILE APP [application parameters]"
			
 
				+
			
 
				+where HOSTFILE contains the list of physical core IDs to be used. 
			
 
				+By default, the host file "./hosts/rc.hosts" should be used. 
			
 
				+You can see an example of the use of rccerun in the run_stencil and 
			
 
				+run_stencil_synch shell scripts in the "apps/STENCIL" directory. 
			
 
				+
			
 
				+If my_script is a shell script that contains the actual RCCE executable 
			
 
				+RCCE_X (which may take application parameters), make sure to execute it as 
			
 
				+follows inside the script: "/path_to_RCCE_X/RCCE_X $@" This is necessary 
			
 
				+so that all the parameters to the program, including those added by
			
 
				+rccerun, are supplied to the executable.
			
 
				+
			
 
				+Example: 
			
 
				+Shell script my_script contains executable RCCE_X that expects two 
			
 
				+parameters, n and m. Write the script as:
			
 
				+----------start of my_script---------
			
 
				+line 1
			
 
				+line 2
			
 
				+line ...
			
 
				+./RCCE_X $@
			
 
				+line ...
			
 
				+----------end of my_script-----------
			
 
				+To run the code on P cores of the SCC, type
			
 
				+
			
 
				+"rccerun -nue P -f HOSTFILE my_script m n"
			
 
				+
			
 
				+MANPAGES
			
 
				+This release of RCCE has manpages. To access those manpages, add a path to 
			
 
				+your MANPATH as follows:
			
 
				+export MANPATH="<path to where you installed RCCE>/man:${MANPATH}"
			
--- a/RCCE_V2.0/.svn/text-base/build_stress_test.svn-base
+++ b/RCCE_V2.0/.svn/text-base/build_stress_test.svn-base
@@ -0,0 +1,79 @@
 
				+#!/bin/bash
			
 
				+#  
			
 
				+#  Copyright 2010 Intel Corporation
			
 
				+#  
			
 
				+#     Licensed under the Apache License, Version 2.0 (the "License");
			
 
				+#     you may not use this file except in compliance with the License.
			
 
				+#     You may obtain a copy of the License at
			
 
				+#  
			
 
				+#         http://www.apache.org/licenses/LICENSE-2.0
			
 
				+#  
			
 
				+#     Unless required by applicable law or agreed to in writing, software
			
 
				+#     distributed under the License is distributed on an "AS IS" BASIS,
			
 
				+#     WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
			
 
				+#     See the License for the specific language governing permissions and
			
 
				+#     limitations under the License.
			
 
				+#  
			
 
				+NUMPARS=$# 
			
 
				+PWRMGMT=0
			
 
				+
			
 
				+SHARED=/shared/`whoami`
			
 
				+BIN_STRESS=$SHARED/bin_stress
			
 
				+PAR=1 
			
 
				+while [ $PAR -le $NUMPARS ]; do 
			
 
				+  eval OPT=\$$PAR 
			
 
				+  case $OPT in 
			
 
				+    -PWRMGMT  ) PWRMGMT=1                           ;; 
			
 
				+    -CLEAN    ) rm -rf $BIN_STRESS           ; exit ;;
			
 
				+     *        ) echo Error, wrong option $OPT; exit ;;
			
 
				+  esac   
			
 
				+  PAR=`expr $PAR + 1`                                                    
			
 
				+done 
			
 
				+
			
 
				+if [ ! \( -d $SHARED \) ]; then 
			
 
				+  echo Creating directory $SHARED
			
 
				+  mkdir $SHARED 
			
 
				+fi
			
 
				+if [ ! \( -d $BIN_STRESS \) ]; then 
			
 
				+  echo Creating directory $BIN_STRESS
			
 
				+  mkdir $BIN_STRESS 
			
 
				+fi
			
 
				+
			
 
				+cd apps/STENCIL
			
 
				+  make stencil_synch;                  mv stencil_synch $BIN_STRESS/stencil
			
 
				+  make SINGLEBITFLAGS=1 stencil_synch; mv stencil_synch $BIN_STRESS/stencil_1b
			
 
				+cd -
			
 
				+
			
 
				+cd apps/PINGPONG; 
			
 
				+  make pingpong; mv pingpong $BIN_STRESS
			
 
				+cd -
			
 
				+
			
 
				+cd apps/NPB
			
 
				+  make bt CLASS=S NPROCS=4;                   mv BT/bt.S.4  $BIN_STRESS/bt.S.4
			
 
				+  make bt CLASS=W NPROCS=16;                  mv BT/bt.W.16 $BIN_STRESS/bt.W.16
			
 
				+  make bt CLASS=W NPROCS=36;                  mv BT/bt.W.36 $BIN_STRESS/bt.W.36
			
 
				+  make bt SINGLEBITFLAGS=1 CLASS=S NPROCS=4;  mv BT/bt.S.4  $BIN_STRESS/bt.S.4_1b
			
 
				+  make bt SINGLEBITFLAGS=1 CLASS=W NPROCS=16; mv BT/bt.W.16 $BIN_STRESS/bt.W.16_1b
			
 
				+  make bt SINGLEBITFLAGS=1 CLASS=W NPROCS=36; mv BT/bt.W.36 $BIN_STRESS/bt.W.36_1b
			
 
				+cd -
			
 
				+
			
 
				+if [ $PWRMGMT -eq 1 ]; then
			
 
				+  cd apps/STENCIL
			
 
				+     make PWRMGMT=1 pstencil;    mv pstencil    $BIN_STRESS
			
 
				+     make PWRMGMT=1 power_reset; mv power_reset $BIN_STRESS
			
 
				+     make PWRMGMT=1 Fdiv;        mv Fdiv        $BIN_STRESS
			
 
				+     make PWRMGMT=1 FV;          mv FV          $BIN_STRESS
			
 
				+  cd -
			
 
				+fi
			
 
				+
			
 
				+cp rccerun $BIN_STRESS
			
 
				+cp hosts/rc.hosts $BIN_STRESS/allhosts
			
 
				+cat hosts/rc.hosts | sort -r > $BIN_STRESS/allhosts_reverse
			
 
				+echo 00 >  $BIN_STRESS/2hosts_1tile
			
 
				+echo 01 >> $BIN_STRESS/2hosts_1tile
			
 
				+echo 00 >  $BIN_STRESS/2hosts_nbr_tiles
			
 
				+echo 02 >> $BIN_STRESS/2hosts_nbr_tiles
			
 
				+echo 00 >  $BIN_STRESS/2hosts_faraway_tiles
			
 
				+echo 47 >> $BIN_STRESS/2hosts_faraway_tiles
			
 
				+
			
 
				+
			
--- a/RCCE_V2.0/.svn/text-base/configure.svn-base
+++ b/RCCE_V2.0/.svn/text-base/configure.svn-base
@@ -0,0 +1,91 @@
 
				+#!/bin/bash
			
 
				+#
			
 
				+#  Copyright 2010 Intel Corporation
			
 
				+#  
			
 
				+#     Licensed under the Apache License, Version 2.0 (the "License");
			
 
				+#     you may not use this file except in compliance with the License.
			
 
				+#     You may obtain a copy of the License at
			
 
				+#  
			
 
				+#         http://www.apache.org/licenses/LICENSE-2.0
			
 
				+#  
			
 
				+#     Unless required by applicable law or agreed to in writing, software
			
 
				+#     distributed under the License is distributed on an "AS IS" BASIS,
			
 
				+#     WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
			
 
				+#     See the License for the specific language governing permissions and
			
 
				+#     limitations under the License.
			
 
				+#
			
 
				+PID=$$
			
 
				+if [ $# -lt 1 ]; then
			
 
				+  echo "Usage: $0 emulator"
			
 
				+  echo "       $0 SCC_LINUX"
			
 
				+  echo "       $0 SCC_LINUX ADD_POWER_API"
			
 
				+  echo "       $0 SCC_BAREMETAL"
			
 
				+  echo "See README for power management options"
			
 
				+  exit
			
 
				+fi
			
 
				+if [ "$1" = "SCC_LINUX" ] || [ "$1" = "SCC_BAREMETAL" ]; then
			
 
				+  if [ "$1" = "SCC_LINUX" ]; then BAREMETAL=0; else BAREMETAL=1; fi
			
 
				+  OMP_EMULATOR=0
			
 
				+  MAKE_MPB="make mpb"
			
 
				+else
			
 
				+  MAKE_MPB=""
			
 
				+  PSSH_MPB_C=""
			
 
				+  PSSH_MPB_CL=""
			
 
				+  if [ "$1" = "emulator" ]; then
			
 
				+    OMP_EMULATOR=1
			
 
				+  else
			
 
				+    echo Incorrect platform: $1
			
 
				+    exit 1
			
 
				+  fi
			
 
				+fi
			
 
				+
			
 
				+POWERPARS="0"
			
 
				+if [ $# -eq 2 ] && [ "$2" = "ADD_POWER_API" ]; then
			
 
				+  POWERPARS="0 1"
			
 
				+fi
			
 
				+
			
 
				+ROOT=`pwd`
			
 
				+COMFILE=common/symbols
			
 
				+echo "#########################################################" >  $COMFILE
			
 
				+echo "# DO NOT EDIT BY HAND!! This file gets overwritten each #" >> $COMFILE
			
 
				+echo "# time the configure script is run. Insert any changes  #" >> $COMFILE
			
 
				+echo "# in file common/symbols.in instead.                    #" >> $COMFILE
			
 
				+echo "#########################################################" >> $COMFILE
			
 
				+echo ""                                                          >> $COMFILE
			
 
				+
			
 
				+#note: must use colon for sed separator; slash conflicts with symbol(s) in path
			
 
				+cat $COMFILE.in | sed "s:_INSERT_BMVAL_INSERT_:${BAREMETAL}:" | \
			
 
				+                  sed "s:_INSERT_ROOTDIR_INSERT_:${ROOT}:" | \
			
 
				+                  sed "s:_INSERT_EMVAL_INSERT_:${OMP_EMULATOR}:" >> $COMFILE
			
 
				+
			
 
				+RUNFILE=utils/rccerun
			
 
				+cat $RUNFILE.in | sed "s:_INSERT_ROOTDIR_INSERT_:${ROOT}:" | \
			
 
				+                  sed "s:_INSERT_BINDIR_INSERT_:$1:"       | \
			
 
				+                  sed "s:_INSERT_EMVAL_INSERT_:${OMP_EMULATOR}:" > $RUNFILE
			
 
				+chmod u+x $RUNFILE
			
 
				+mv $RUNFILE .
			
 
				+
			
 
				+MAKEALL=utils/makeall
			
 
				+cat $MAKEALL.in | sed "s:_INSERT_POWERPARS_INSERT_:${POWERPARS}:" | \
			
 
				+                  sed "s:_INSERT_MAKE_MPB_:${MAKE_MPB}:" > $MAKEALL
			
 
				+chmod u+x $MAKEALL
			
 
				+mv $MAKEALL .
			
 
				+
			
 
				+#create scripts for killing processes on the cores and the MCPC containing a user specified string
			
 
				+#note: this only makes sense on the SCC platform itself, not the emulator
			
 
				+if [ "$OMP_EMULATOR" -eq 0 ]; then
			
 
				+  SHARED=/shared/`whoami`
			
 
				+  if [ ! \( -d $SHARED \) ]; then 
			
 
				+    echo Creating directory $SHARED
			
 
				+    mkdir $SHARED 
			
 
				+  fi
			
 
				+  
			
 
				+  KILLIT=utils/killit
			
 
				+  cat $KILLIT.in | sed "s:_INSERT_SHAREDDIR_INSERT_:${SHARED}:" > $KILLIT
			
 
				+  KILLCORE=utils/killcorePIDs
			
 
				+  cat $KILLCORE.in | sed "s:_INSERT_SHAREDDIR_INSERT_:${SHARED}:" > $KILLCORE
			
 
				+  chmod u+x $KILLIT $KILLCORE
			
 
				+  ALLHOSTS=utils/allhosts
			
 
				+  mv $KILLIT $KILLCORE $SHARED
			
 
				+  cp  $ALLHOSTS $SHARED
			
 
				+fi
			
--- a/RCCE_V2.0/.svn/text-base/run_stress_test.svn-base
+++ b/RCCE_V2.0/.svn/text-base/run_stress_test.svn-base
@@ -0,0 +1,127 @@
 
				+#!/bin/bash
			
 
				+#  
			
 
				+#  Copyright 2010 Intel Corporation
			
 
				+#  
			
 
				+#     Licensed under the Apache License, Version 2.0 (the "License");
			
 
				+#     you may not use this file except in compliance with the License.
			
 
				+#     You may obtain a copy of the License at
			
 
				+#  
			
 
				+#         http://www.apache.org/licenses/LICENSE-2.0
			
 
				+#  
			
 
				+#     Unless required by applicable law or agreed to in writing, software
			
 
				+#     distributed under the License is distributed on an "AS IS" BASIS,
			
 
				+#     WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
			
 
				+#     See the License for the specific language governing permissions and
			
 
				+#     limitations under the License.
			
 
				+#  
			
 
				+NUMPARS=$# 
			
 
				+PID=$$
			
 
				+LOG=`pwd`/log.$PID
			
 
				+SUM=`pwd`/summary.$PID
			
 
				+SCRATCH=.scratch.$PID
			
 
				+SIZE="UNKNOWN"
			
 
				+PWRMGMT=0
			
 
				+# change the tile clock frequency if using a tile clock divider other than 3
			
 
				+GHZ=0.533
			
 
				+
			
 
				+PAR=1 
			
 
				+while [ $PAR -le $NUMPARS ]; do 
			
 
				+  eval OPT=\$$PAR 
			
 
				+  case $OPT in 
			
 
				+    -S        ) if [ $SIZE = "UNKNOWN" ]; then SIZE=SMALL;  fi ;; 
			
 
				+    -M        ) if [ $SIZE = "UNKNOWN" ]; then SIZE=MEDIUM; fi ;; 
			
 
				+    -L        ) if [ $SIZE = "UNKNOWN" ]; then SIZE=LARGE;  fi ;; 
			
 
				+    -PWRMGMT  ) PWRMGMT=1     ;; 
			
 
				+     *        ) echo Error, wrong option $OPT | tee -a $LOG      | tee -a $SUM 
			
 
				+                exit ;;
			
 
				+  esac                                                       
			
 
				+  PAR=`expr $PAR + 1`
			
 
				+done 
			
 
				+
			
 
				+if [ $SIZE = "UNKNOWN" ]; then 
			
 
				+  echo ERROR: No size specified \(-S, -M, or -L\)  | tee -a $LOG | tee -a $SUM
			
 
				+  exit
			
 
				+else
			
 
				+  echo Executing RCCE stress test of size $SIZE    | tee -a $LOG | tee -a $SUM
			
 
				+fi
			
 
				+
			
 
				+BIN_STRESS=/shared/`whoami`/bin_stress
			
 
				+if [ ! \( -d $BIN_STRESS \) ]; then
			
 
				+  echo ERROR: No stress test directory  | tee -a $LOG | tee -a $SUM 
			
 
				+  echo Please create and populate it by invoking \"build_stress_test\" \
			
 
				+        | tee -a $LOG | tee -a $SUM
			
 
				+  exit
			
 
				+fi
			
 
				+
			
 
				+cd $BIN_STRESS
			
 
				+
			
 
				+for HXT in 1tile nbr_tiles faraway_tiles; do
			
 
				+  case $SIZE in
			
 
				+    SMALL  ) ITERS=10;;
			
 
				+    MEDIUM ) ITERS=1000;;
			
 
				+    LARGE  ) ITERS=100000;;
			
 
				+  esac
			
 
				+  echo ./rccerun -nue 2 -f 2hosts_$HXT -clock $GHZ pingpong $ITERS \
			
 
				+             | tee -a $LOG | tee -a $SUM
			
 
				+       ./rccerun -nue 2 -f 2hosts_$HXT -clock $GHZ pingpong $ITERS \
			
 
				+             | tee -a $LOG | tee  $SCRATCH 
			
 
				+  grep -i latency $SCRATCH >> $SUM 
			
 
				+done
			
 
				+
			
 
				+for EXT in "" "_1b"; do
			
 
				+  if [ "$EXT" = "_1b" ]; then
			
 
				+    echo Using single bit flags | tee -a $LOG | tee -a $SUM
			
 
				+  fi
			
 
				+  for HOSTS in allhosts allhosts_reverse; do
			
 
				+    case $SIZE in
			
 
				+      SMALL  ) BTCORES=4;  CLASS=S; STCORES=4;  STITERS=10;;
			
 
				+      MEDIUM ) BTCORES=16; CLASS=W; STCORES=16; STITERS=100;;
			
 
				+      LARGE  ) BTCORES=36; CLASS=W; STCORES=48; STITERS=1000;;
			
 
				+    esac
			
 
				+
			
 
				+    echo ./rccerun -nue $STCORES -f $HOSTS -clock $GHZ stencil$EXT $STITERS    \
			
 
				+               | tee -a $LOG | tee -a $SUM
			
 
				+         ./rccerun -nue $STCORES -f $HOSTS -clock $GHZ stencil$EXT $STITERS    \
			
 
				+               | tee -a $LOG | tee  $SCRATCH 
			
 
				+    grep Verification $SCRATCH | grep SUCCESSFUL >> $SUM 
			
 
				+    echo ./rccerun -nue $BTCORES -f $HOSTS -clock $GHZ bt.$CLASS.$BTCORES$EXT  \
			
 
				+               | tee -a $LOG | tee -a $SUM
			
 
				+         ./rccerun -nue $BTCORES -f $HOSTS -clock $GHZ bt.$CLASS.$BTCORES$EXT  \
			
 
				+               | tee -a $LOG | tee  $SCRATCH 
			
 
				+    grep Verification $SCRATCH | grep SUCCESSFUL >> $SUM 
			
 
				+  done
			
 
				+  if [ "$EXT" = "_1b" ]; then
			
 
				+    echo End using single bit flags | tee -a $LOG | tee -a $SUM
			
 
				+  fi
			
 
				+done
			
 
				+
			
 
				+if [ $PWRMGMT -eq 1 ]; then
			
 
				+  case $SIZE in
			
 
				+    SMALL  ) NC=1;;
			
 
				+    MEDIUM ) NC=8 ;;
			
 
				+    LARGE  ) NC=48 ;;
			
 
				+  esac
			
 
				+  echo ./rccerun -nue $NC -f allhosts Fdiv 4                        \
			
 
				+             | tee -a $LOG | tee -a $SUM
			
 
				+       ./rccerun -nue $NC -f allhosts Fdiv 4                        \
			
 
				+             | tee -a $LOG | tee  $SCRATCH 
			
 
				+  grep Verification $SCRATCH | grep SUCCESSFUL >> $SUM 
			
 
				+  echo ./rccerun -nue $NC -f allhosts FV 3                          \
			
 
				+             | tee -a $LOG | tee -a $SUM
			
 
				+       ./rccerun -nue $NC -f allhosts FV 3                          \
			
 
				+             | tee -a $LOG | tee  $SCRATCH 
			
 
				+  grep Verification $SCRATCH | grep SUCCESSFUL >> $SUM 
			
 
				+  echo ./rccerun -nue $NC -f allhosts power_reset                   \
			
 
				+             | tee -a $LOG | tee -a $SUM
			
 
				+       ./rccerun -nue $NC -f allhosts power_reset                   \
			
 
				+             | tee -a $LOG | tee  $SCRATCH 
			
 
				+  grep Verification $SCRATCH | grep SUCCESSFUL >> $SUM 
			
 
				+  echo ./rccerun -nue $NC -f allhosts pstencil                      \
			
 
				+             | tee -a $LOG | tee -a $SUM
			
 
				+       ./rccerun -nue $NC -f allhosts pstencil                      \
			
 
				+             | tee -a $LOG | tee  $SCRATCH 
			
 
				+  grep Verification $SCRATCH | grep SUCCESSFUL >> $SUM 
			
 
				+fi
			
 
				+
			
 
				+rm $SCRATCH
			
 
				+
			
--- a/RCCE_V2.0/.svn/text-base/sourcing.svn-base
+++ b/RCCE_V2.0/.svn/text-base/sourcing.svn-base
@@ -0,0 +1,2 @@
 
				+source /shared/icc-8.1.038/bin/iccvars.sh
			
 
				+source /shared/crosstool/crosstoolvars.sh
			
--- a/RCCE_V2.0/COPYING
+++ b/RCCE_V2.0/COPYING
@@ -0,0 +1,15 @@
 
				+// 
			
 
				+// Copyright 2010 Intel Corporation
			
 
				+// 
			
 
				+//    Licensed under the Apache License, Version 2.0 (the "License");
			
 
				+//    you may not use this file except in compliance with the License.
			
 
				+//    You may obtain a copy of the License at
			
 
				+// 
			
 
				+//        http://www.apache.org/licenses/LICENSE-2.0
			
 
				+// 
			
 
				+//    Unless required by applicable law or agreed to in writing, software
			
 
				+//    distributed under the License is distributed on an "AS IS" BASIS,
			
 
				+//    WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
			
 
				+//    See the License for the specific language governing permissions and
			
 
				+//    limitations under the License.
			
 
				+// 
			
--- a/RCCE_V2.0/Makefile
+++ b/RCCE_V2.0/Makefile
@@ -0,0 +1,116 @@
 
				+include common/symbols
			
 
				+
			
 
				+ifeq ($(OMP_EMULATOR),0)
			
 
				+  PLATFORMOBJS=SCC_API.o   
			
 
				+else
			
 
				+  PLATFORMOBJS=RCCE_emulator_driver.o
			
 
				+endif
			
 
				+
			
 
				+ifeq ($(PWRMGMT),1)
			
 
				+  POWEROBJS=RCCE_power_management.o
			
 
				+endif
			
 
				+
			
 
				+ARCHIVEOBJS= RCCE_admin.o RCCE_comm.o   RCCE_malloc.o RCCE_qsort.o RCCE_synch.o RCCE_flags.o  \
			
 
				+             RCCE_send.o  RCCE_recv.o   RCCE_debug.o  RCCE_get.o   RCCE_put.o   RCCE_reduce.o \
			
 
				+             RCCE_bcast.o RCCE_shmalloc.o RCCE_DCMflush.o $(PLATFORMOBJS) $(POWEROBJS)
			
 
				+
			
 
				+ifeq ($(OMP_EMULATOR),0)
			
 
				+	ARCHIVEOBJS += RCCE_memcpy.o
			
 
				+endif
			
 
				+
			
 
				+$(ARCHIVE): $(ARCHIVEOBJS)
			
 
				+	@echo Archive name = $(ARCHIVE) 
			
 
				+	ar -r $(ARCHIVE) $(ARCHIVEOBJS) 
			
 
				+	rm -f *.o
			
 
				+
			
 
				+usage:
			
 
				+	@echo "         make [OMP_EMULATOR=0] [PWRMGMT=1] [API=gory]  [SINGLEBITFLAGS=1]"
			
 
				+	@echo "         make [clean] [veryclean]" 
			
 
				+	@echo "default: make  OMP_EMULATOR=1   PWRMGMT=0   API=nongory SINGLEBITFLAGS=0"
			
 
				+
			
 
				+RCCE_admin.o: $(RCCE_LIB_SRC)/RCCE_admin.c $(RCCEINCLUDE)/RCCE.h $(RCCEINCLUDE)/RCCE_lib.h \
			
 
				+        $(RCCEINCLUDE)/RCCE_lib_pwr.h
			
 
				+	$(CCOMPILE) -c $(CFLAGS) $(RCCE_LIB_SRC)/RCCE_admin.c  $(RCCE_FLAGS) 
			
 
				+
			
 
				+RCCE_power_management.o: $(RCCE_LIB_SRC)/RCCE_power_management.c $(RCCEINCLUDE)/RCCE.h \
			
 
				+         $(RCCEINCLUDE)/RCCE_lib.h $(RCCEINCLUDE)/SCC_API.h $(RCCEINCLUDE)/RCCE_lib_pwr.h
			
 
				+	$(CCOMPILE) -c $(CFLAGS) $(RCCE_LIB_SRC)/RCCE_power_management.c  $(RCCE_FLAGS) 
			
 
				+
			
 
				+RCCE_debug.o: $(RCCE_LIB_SRC)/RCCE_debug.c $(RCCEINCLUDE)/RCCE.h $(RCCEINCLUDE)/RCCE_lib.h \
			
 
				+         $(RCCEINCLUDE)/RCCE_debug.h
			
 
				+	$(CCOMPILE) -c $(CFLAGS) $(RCCE_LIB_SRC)/RCCE_debug.c  $(RCCE_FLAGS)
			
 
				+
			
 
				+RCCE_comm.o: $(RCCE_LIB_SRC)/RCCE_comm.c $(RCCEINCLUDE)/RCCE.h $(RCCEINCLUDE)/RCCE_lib.h 
			
 
				+	$(CCOMPILE) -c $(CFLAGS) $(RCCE_LIB_SRC)/RCCE_comm.c  $(RCCE_FLAGS)
			
 
				+
			
 
				+RCCE_send.o: $(RCCE_LIB_SRC)/RCCE_send.c $(RCCEINCLUDE)/RCCE.h $(RCCEINCLUDE)/RCCE_lib.h 
			
 
				+	$(CCOMPILE) -c $(CFLAGS) $(RCCE_LIB_SRC)/RCCE_send.c  $(RCCE_FLAGS)
			
 
				+
			
 
				+RCCE_recv.o: $(RCCE_LIB_SRC)/RCCE_recv.c $(RCCEINCLUDE)/RCCE.h $(RCCEINCLUDE)/RCCE_lib.h 
			
 
				+	$(CCOMPILE) -c $(CFLAGS) $(RCCE_LIB_SRC)/RCCE_recv.c  $(RCCE_FLAGS)
			
 
				+
			
 
				+RCCE_memcpy.o: $(RCCE_LIB_SRC)/RCCE_memcpy.c $(RCCEINCLUDE)/RCCE.h $(RCCEINCLUDE)/RCCE_lib.h 
			
 
				+	$(CCOMPILE) -c $(CFLAGS) $(RCCE_LIB_SRC)/RCCE_memcpy.c  $(RCCE_FLAGS)
			
 
				+
			
 
				+RCCE_get.o: $(RCCE_LIB_SRC)/RCCE_get.c $(RCCEINCLUDE)/RCCE.h $(RCCEINCLUDE)/RCCE_lib.h 
			
 
				+	$(CCOMPILE) -c $(CFLAGS) $(RCCE_LIB_SRC)/RCCE_get.c  $(RCCE_FLAGS)
			
 
				+
			
 
				+RCCE_put.o: $(RCCE_LIB_SRC)/RCCE_put.c $(RCCEINCLUDE)/RCCE.h $(RCCEINCLUDE)/RCCE_lib.h 
			
 
				+	$(CCOMPILE) -c $(CFLAGS) $(RCCE_LIB_SRC)/RCCE_put.c  $(RCCE_FLAGS)
			
 
				+
			
 
				+RCCE_reduce.o: $(RCCE_LIB_SRC)/RCCE_reduce.c $(RCCEINCLUDE)/RCCE.h $(RCCEINCLUDE)/RCCE_lib.h 
			
 
				+	$(CCOMPILE) -c $(CFLAGS) $(RCCE_LIB_SRC)/RCCE_reduce.c  $(RCCE_FLAGS)
			
 
				+
			
 
				+RCCE_bcast.o: $(RCCE_LIB_SRC)/RCCE_bcast.c $(RCCEINCLUDE)/RCCE.h $(RCCEINCLUDE)/RCCE_lib.h 
			
 
				+	$(CCOMPILE) -c $(CFLAGS) $(RCCE_LIB_SRC)/RCCE_bcast.c  $(RCCE_FLAGS)
			
 
				+
			
 
				+RCCE_malloc.o: $(RCCE_LIB_SRC)/RCCE_malloc.c $(RCCEINCLUDE)/RCCE.h $(RCCEINCLUDE)/RCCE_lib.h 
			
 
				+	$(CCOMPILE) -c $(CFLAGS) $(RCCE_LIB_SRC)/RCCE_malloc.c  $(RCCE_FLAGS)
			
 
				+
			
 
				+RCCE_shmalloc.o: $(RCCE_LIB_SRC)/RCCE_shmalloc.c $(RCCEINCLUDE)/RCCE.h $(RCCEINCLUDE)/RCCE_lib.h 
			
 
				+	$(CCOMPILE) -c $(CFLAGS) $(RCCE_LIB_SRC)/RCCE_shmalloc.c  $(RCCE_FLAGS)
			
 
				+
			
 
				+RCCE_qsort.o: $(RCCE_LIB_SRC)/RCCE_qsort.c $(RCCEINCLUDE)/RCCE.h $(RCCEINCLUDE)/RCCE_lib.h 
			
 
				+	$(CCOMPILE) -c $(CFLAGS) $(RCCE_LIB_SRC)/RCCE_qsort.c  $(RCCE_FLAGS)
			
 
				+
			
 
				+RCCE_synch.o: $(RCCE_LIB_SRC)/RCCE_synch.c $(RCCEINCLUDE)/RCCE.h $(RCCEINCLUDE)/RCCE_lib.h 
			
 
				+	$(CCOMPILE) -c $(CFLAGS) $(RCCE_LIB_SRC)/RCCE_synch.c  $(RCCE_FLAGS)
			
 
				+
			
 
				+RCCE_flags.o: $(RCCE_LIB_SRC)/RCCE_flags.c $(RCCEINCLUDE)/RCCE.h $(RCCEINCLUDE)/RCCE_lib.h 
			
 
				+	$(CCOMPILE) -c $(CFLAGS) $(RCCE_LIB_SRC)/RCCE_flags.c  $(RCCE_FLAGS)
			
 
				+
			
 
				+RCCE_emulator_driver.o: $(RCCE_LIB_SRC)/RCCE_emulator_driver.c $(RCCEINCLUDE)/RCCE.h 
			
 
				+	$(CCOMPILE) -c $(CFLAGS) $(RCCE_LIB_SRC)/RCCE_emulator_driver.c $(RCCE_FLAGS) 
			
 
				+
			
 
				+SCC_API.o: $(RCCE_LIB_SRC)/SCC_API.c $(RCCEINCLUDE)/SCC_API.h 
			
 
				+	$(CCOMPILE) -c $(CFLAGS) $(RCCE_LIB_SRC)/SCC_API.c
			
 
				+
			
 
				+RCCE_DCMflush.o: $(RCCE_LIB_SRC)/RCCE_DCMflush.c $(RCCEINCLUDE)/SCC_API.h 
			
 
				+	$(CCOMPILE) -c $(CFLAGS) $(RCCE_LIB_SRC)/RCCE_DCMflush.c
			
 
				+
			
 
				+mpb.o: $(RCCE_LIB_SRC)/mpb.c $(RCCEINCLUDE)/SCC_API.h 
			
 
				+	$(CCOMPILE) -c $(CFLAGS) $(RCCE_LIB_SRC)/mpb.c
			
 
				+
			
 
				+mpb: mpb.o SCC_API.o
			
 
				+	$(CCOMPILE) $(CFLAGS) mpb.o SCC_API.o -o $(RCCEROOT$)/bin/$(SUBDIR)/mpb
			
 
				+	rm -f *.o
			
 
				+
			
 
				+clean:
			
 
				+	rm -f $(ARCHIVE) $(ARCHIVEOBJS)
			
 
				+	rm -f mpb.o $(RCCEROOT$)/bin/$(SUBDIR)/mpb
			
 
				+	rm -f bin/*/*.a
			
 
				+
			
 
				+veryclean: 
			
 
				+	rm -f $(ARCHIVE) $(ARCHIVEOBJS)
			
 
				+	rm -f mpb.o $(RCCEROOT$)/bin/$(SUBDIR)/mpb
			
 
				+	rm -f bin/*/*.a
			
 
				+	cd apps/SHIFT;    make clean; cd -
			
 
				+	cd apps/STENCIL;  make clean; cd -
			
 
				+	cd apps/NPB;      make clean; cd -
			
 
				+	cd apps/PINGPONG; make clean; cd -
			
 
				+	cd apps/XHPL;     make veryclean; cd -
			
 
				+	cd apps/SHARE;    make clean; cd -
			
 
				+	rm -f common/symbols rccerun makeall
			
 
				+	@echo --------------------------------------------------------------------
			
 
				+	@echo RUN \"configure\" SCRIPT \(AGAIN\) BEFORE MAKING EXECUTABLES + LIBRARIES
			
 
				+	@echo --------------------------------------------------------------------        
			
--- a/RCCE_V2.0/README
+++ b/RCCE_V2.0/README
@@ -0,0 +1,153 @@
 
				+// 
			
 
				+// Copyright 2010 Intel Corporation
			
 
				+// 
			
 
				+//    Licensed under the Apache License, Version 2.0 (the "License");
			
 
				+//    you may not use this file except in compliance with the License.
			
 
				+//    You may obtain a copy of the License at
			
 
				+// 
			
 
				+//        http://www.apache.org/licenses/LICENSE-2.0
			
 
				+// 
			
 
				+//    Unless required by applicable law or agreed to in writing, software
			
 
				+//    distributed under the License is distributed on an "AS IS" BASIS,
			
 
				+//    WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
			
 
				+//    See the License for the specific language governing permissions and
			
 
				+//    limitations under the License.
			
 
				+// 
			
 
				+//
			
 
				+
			
 
				+Welcome to RCCE, a communication environment for the SCC processor 
			
 
				+------------------------------------------------------------------
			
 
				+
			
 
				+RCCE is designed to run on a variety of platforms including:
			
 
				+
			
 
				+  * Baremetal on the SCC chip, 
			
 
				+  * Linux on the SCC chip, 
			
 
				+  * A functional emulator running on top of OpenMP.  
			
 
				+
			
 
				+============ test line ============
			
 
				+This particular release has been validated only for Linux and the 
			
 
				+OpenMP emulator (a baremetal build option is available, but has
			
 
				+not been tested). It may seem a bit cumbersome to work with, but 
			
 
				+that's so we can replicate the "features" of the SCC chip ... i.e. 
			
 
				+once a program runs on the emulator, it's likely to work on real 
			
 
				+hardware.
			
 
				+
			
 
				+There are several versions of the RCCE library that can be 
			
 
				+built with this release.  They expose different options ...
			
 
				+
			
 
				+   * The "gory" interface ... this is the low level interface. It 
			
 
				+     makes the programmer responsible for declaring and managing
			
 
				+     synchronization flags and for managing the on-chip message 
			
 
				+     passing buffer. This mode gives access to the low level 
			
 
				+     get/put routines,  as well as to the higher level two-sided 
			
 
				+     send/receive interface.
			
 
				+
			
 
				+   * The "nongory" interface ... a higher level interface that
			
 
				+     hides the particulars of the message passing buffers and 
			
 
				+     inter-core synchronization from the programmer, including the 
			
 
				+     management of synchronization flags. This interface does not 
			
 
				+     give access to the low level put/get routines.
			
 
				+
			
 
				+   * Big Flags ... each flag used to coordinate interaction 
			
 
				+     between units of execution (UE) takes up a byte in a single cacheline. 
			
 
				+     This has lower latency but wastes memory.
			
 
				+
			
 
				+   * Small flags ... flags are stored in a single bit; many are
			
 
				+     packed into a single cache line. A slight hit on latency but 
			
 
				+     consumes less message passing buffer memory.
			
 
				+
			
 
				+   * With or without software controlled power management. POWER
			
 
				+     MANAGEMENT IS AN EXPERIMENTAL FEATURE THAT HAS NOT BEEN TESTED 
			
 
				+     AS THOROUGHLY AS THE REST OF THE LIBRARY. FOLLOW THE SPECIAL
			
 
				+     INSTRUCTIONS BELOW TO CONFIGURE THE "makeall" SCRIPT TO BUILD 
			
 
				+     VERSIONS OF THE LIBRARY THAT INCLUDE THE POWER MANAGEMENT API.
			
 
				+
			
 
				+
			
 
				+You can build all versions of the library supported with this 
			
 
				+release, as follows:
			
 
				+
			
 
				+1. Type "./configure <PLATFORM>". This creates file common/symbols 
			
 
				+   from file common/symbols.in, inserting the proper root of the 
			
 
				+   directory tree, and also inserting the proper platform (SCC_LINUX,
			
 
				+   SCC_BAREMETAL, or emulator). Any existing file common/symbols 
			
 
				+   will be overwritten, so do not update that file by hand. Instead,
			
 
				+   specify details of your build environment in common/symbols.in
			
 
				+   The configure utility also specializes the rccerun command (see 
			
 
				+   below) for the target platform. You may need to make the configure
			
 
				+   script executable (type "chmod u+x configure").
			
 
				+   To enable RCCE's power management API, you must specify the string
			
 
				+   "ADD_POWER_API" as the second parameter on the command line when 
			
 
				+   you execute the configure script. Because this is an experimental 
			
 
				+   feature, it is not built by default. See above.
			
 
				+
			
 
				+2. Type "./makeall" to build all libraries. Alternatively you 
			
 
				+   can build individual libraries by calling make directly.  
			
 
				+   Type "make usage" to discover the libraries you can build.
			
 
				+
			
 
				+The libraries generated by this procedure will be put in the directory
			
 
				+
			
 
				+    bin/<PLATFORM>
			
 
				+
			
 
				+An easy way to test correct operation of the platform is to build and
			
 
				+run a prepackaged RCCE stress test after building the RCCE library:
			
 
				+   "./build_stress_test; ./run_stress_test <size>"
			
 
				+where size is -S (small), -M (medium), or -L (large).
			
 
				+
			
 
				+A number of applications are included with this release in the
			
 
				+"apps" directory.  These include:  
			
 
				+
			
 
				+  * PINGPONG:  bounces messages between a pair of UEs
			
 
				+  * SHIFT:     passes messages around a logical ring of UEs
			
 
				+  * STENCIL:   solves a simple PDE with a basic stencil code
			
 
				+  * SHARE:     tests the off-chip shared memory access
			
 
				+  * NPB:       NAS Parallel Benchmarks, LU and BT
			
 
				+  * XHPL:      the Linpack benchmark
			
 
				+
			
 
				+To build an application,  go to the corresponding subdirectory of "apps" 
			
 
				+and type "make".   It will return a list of options for building
			
 
				+versions of an application. It may be necessary to edit the Makefile
			
 
				+in an application directory if parts of the original RCCE code tree 
			
 
				+got moved with respect to each other.
			
 
				+
			
 
				+We suggest that you start with PINGPONG, SHIFT, SHARE, and STENCIL and 
			
 
				+save the more complex NPB and XHPL for later. The STENCIl directory 
			
 
				+contains a few simple variations of the base code that exercise RCCE's 
			
 
				+experimental power management API.
			
 
				+See the apps/XHPL directory for instructions on how to build and run
			
 
				+Linpack.
			
 
				+
			
 
				+To run an application, you must use the rccerun command.  This command
			
 
				+is used to launch Linux jobs on SCC or on the emulator. To run application 
			
 
				+APP with P cores, type
			
 
				+
			
 
				+"rccerun -nue P -f HOSTFILE APP [application parameters]"
			
 
				+
			
 
				+where HOSTFILE contains the list of physical core IDs to be used. 
			
 
				+By default, the host file "./hosts/rc.hosts" should be used. 
			
 
				+You can see an example of the use of rccerun in the run_stencil and 
			
 
				+run_stencil_synch shell scripts in the "apps/STENCIL" directory. 
			
 
				+
			
 
				+If my_script is a shell script that contains the actual RCCE executable 
			
 
				+RCCE_X (which may take application parameters), make sure to execute it as 
			
 
				+follows inside the script: "/path_to_RCCE_X/RCCE_X $@" This is necessary 
			
 
				+so that all the parameters to the program, including those added by
			
 
				+rccerun, are supplied to the executable.
			
 
				+
			
 
				+Example: 
			
 
				+Shell script my_script contains executable RCCE_X that expects two 
			
 
				+parameters, n and m. Write the script as:
			
 
				+----------start of my_script---------
			
 
				+line 1
			
 
				+line 2
			
 
				+line ...
			
 
				+./RCCE_X $@
			
 
				+line ...
			
 
				+----------end of my_script-----------
			
 
				+To run the code on P cores of the SCC, type
			
 
				+
			
 
				+"rccerun -nue P -f HOSTFILE my_script m n"
			
 
				+
			
 
				+MANPAGES
			
 
				+This release of RCCE has manpages. To access those manpages, add a path to 
			
 
				+your MANPATH as follows:
			
 
				+export MANPATH="<path to where you installed RCCE>/man:${MANPATH}"
			
--- a/RCCE_V2.0/apps/.svn/all-wcprops
+++ b/RCCE_V2.0/apps/.svn/all-wcprops
@@ -0,0 +1,17 @@
 
				+K 25
			
 
				+svn:wc:ra_dav:version-url
			
 
				+V 48
			
 
				+/svn/repository/!svn/ver/297/tags/RCCE_V2.0/apps
			
 
				+END
			
 
				+hpl
			
 
				+K 25
			
 
				+svn:wc:ra_dav:version-url
			
 
				+V 52
			
 
				+/svn/repository/!svn/ver/297/tags/RCCE_V2.0/apps/hpl
			
 
				+END
			
 
				+README
			
 
				+K 25
			
 
				+svn:wc:ra_dav:version-url
			
 
				+V 55
			
 
				+/svn/repository/!svn/ver/297/tags/RCCE_V2.0/apps/README
			
 
				+END
			
--- a/RCCE_V2.0/apps/.svn/entries
+++ b/RCCE_V2.0/apps/.svn/entries
@@ -0,0 +1,123 @@
 
				+10
			
 
				+
			
 
				+dir
			
 
				+313
			
 
				+http://marcbug.scc-dc.com/svn/repository/tags/RCCE_V2.0/apps
			
 
				+http://marcbug.scc-dc.com/svn/repository
			
 
				+
			
 
				+
			
 
				+
			
 
				+2011-04-11T21:00:28.037293Z
			
 
				+188
			
 
				+tekubasx
			
 
				+
			
 
				+
			
 
				+
			
 
				+
			
 
				+
			
 
				+
			
 
				+
			
 
				+
			
 
				+
			
 
				+
			
 
				+
			
 
				+
			
 
				+
			
 
				+
			
 
				+c924d837-3317-4ba4-8fbd-5f2da8699d51
			
 
				+
			
 
				+NPB
			
 
				+dir
			
 
				+
			
 
				+FLUSH
			
 
				+dir
			
 
				+
			
 
				+PINGPONG
			
 
				+dir
			
 
				+
			
 
				+STENCIL
			
 
				+dir
			
 
				+
			
 
				+SHARE
			
 
				+dir
			
 
				+
			
 
				+HELLO
			
 
				+dir
			
 
				+
			
 
				+hpl
			
 
				+file
			
 
				+
			
 
				+
			
 
				+
			
 
				+
			
 
				+2012-10-27T13:42:43.852598Z
			
 
				+65c154fb251179d50086a65103a70c47
			
 
				+2010-07-16T00:05:27.319040Z
			
 
				+32
			
 
				+tekubasx
			
 
				+has-props
			
 
				+
			
 
				+
			
 
				+svn:special
			
 
				+
			
 
				+
			
 
				+
			
 
				+
			
 
				+
			
 
				+
			
 
				+
			
 
				+
			
 
				+
			
 
				+
			
 
				+
			
 
				+
			
 
				+
			
 
				+
			
 
				+
			
 
				+
			
 
				+
			
 
				+4
			
 
				+
			
 
				+ECOQ
			
 
				+dir
			
 
				+
			
 
				+XHPL
			
 
				+dir
			
 
				+
			
 
				+SHIFT
			
 
				+dir
			
 
				+
			
 
				+README
			
 
				+file
			
 
				+
			
 
				+
			
 
				+
			
 
				+
			
 
				+2012-10-27T13:42:43.852598Z
			
 
				+1854a34c919d87c38c13f947b88353ca
			
 
				+2010-12-27T18:51:02.240775Z
			
 
				+131
			
 
				+tekubasx
			
 
				+
			
 
				+
			
 
				+
			
 
				+
			
 
				+
			
 
				+
			
 
				+
			
 
				+
			
 
				+
			
 
				+
			
 
				+
			
 
				+
			
 
				+
			
 
				+
			
 
				+
			
 
				+
			
 
				+
			
 
				+
			
 
				+
			
 
				+
			
 
				+
			
 
				+62
			
 
				+
			
--- a/RCCE_V2.0/apps/.svn/prop-base/hpl.svn-base
+++ b/RCCE_V2.0/apps/.svn/prop-base/hpl.svn-base
@@ -0,0 +1,5 @@
 
				+K 11
			
 
				+svn:special
			
 
				+V 1
			
 
				+*
			
 
				+END
			
--- a/RCCE_V2.0/apps/.svn/text-base/README.svn-base
+++ b/RCCE_V2.0/apps/.svn/text-base/README.svn-base
@@ -0,0 +1 @@
 
				+keep link hpl->XHPL intact, it is necessary to build LINPACK.
			
--- a/RCCE_V2.0/apps/.svn/text-base/hpl.svn-base
+++ b/RCCE_V2.0/apps/.svn/text-base/hpl.svn-base
@@ -0,0 +1 @@
 
				+link XHPL
			
--- a/RCCE_V2.0/apps/ECOQ/.svn/all-wcprops
+++ b/RCCE_V2.0/apps/ECOQ/.svn/all-wcprops
@@ -0,0 +1,35 @@
 
				+K 25
			
 
				+svn:wc:ra_dav:version-url
			
 
				+V 53
			
 
				+/svn/repository/!svn/ver/297/tags/RCCE_V2.0/apps/ECOQ
			
 
				+END
			
 
				+RCCE_pwr_wq_framework.c
			
 
				+K 25
			
 
				+svn:wc:ra_dav:version-url
			
 
				+V 77
			
 
				+/svn/repository/!svn/ver/297/tags/RCCE_V2.0/apps/ECOQ/RCCE_pwr_wq_framework.c
			
 
				+END
			
 
				+Makefile
			
 
				+K 25
			
 
				+svn:wc:ra_dav:version-url
			
 
				+V 62
			
 
				+/svn/repository/!svn/ver/297/tags/RCCE_V2.0/apps/ECOQ/Makefile
			
 
				+END
			
 
				+README
			
 
				+K 25
			
 
				+svn:wc:ra_dav:version-url
			
 
				+V 60
			
 
				+/svn/repository/!svn/ver/297/tags/RCCE_V2.0/apps/ECOQ/README
			
 
				+END
			
 
				+RCCE_pwr_wq.h
			
 
				+K 25
			
 
				+svn:wc:ra_dav:version-url
			
 
				+V 67
			
 
				+/svn/repository/!svn/ver/297/tags/RCCE_V2.0/apps/ECOQ/RCCE_pwr_wq.h
			
 
				+END
			
 
				+RCCE_eco_q.c
			
 
				+K 25
			
 
				+svn:wc:ra_dav:version-url
			
 
				+V 66
			
 
				+/svn/repository/!svn/ver/297/tags/RCCE_V2.0/apps/ECOQ/RCCE_eco_q.c
			
 
				+END
			
--- a/RCCE_V2.0/apps/ECOQ/.svn/entries
+++ b/RCCE_V2.0/apps/ECOQ/.svn/entries
@@ -0,0 +1,198 @@
 
				+10
			
 
				+
			
 
				+dir
			
 
				+313
			
 
				+http://marcbug.scc-dc.com/svn/repository/tags/RCCE_V2.0/apps/ECOQ
			
 
				+http://marcbug.scc-dc.com/svn/repository
			
 
				+
			
 
				+
			
 
				+
			
 
				+2011-03-01T00:12:36.950662Z
			
 
				+165
			
 
				+tekubasx
			
 
				+
			
 
				+
			
 
				+
			
 
				+
			
 
				+
			
 
				+
			
 
				+
			
 
				+
			
 
				+
			
 
				+
			
 
				+
			
 
				+
			
 
				+
			
 
				+
			
 
				+c924d837-3317-4ba4-8fbd-5f2da8699d51
			
 
				+
			
 
				+RCCE_pwr_wq.h
			
 
				+file
			
 
				+
			
 
				+
			
 
				+
			
 
				+
			
 
				+2012-10-27T13:42:39.092598Z
			
 
				+6a2d46e1d1c182bf4bd5d38b38e62987
			
 
				+2011-03-01T00:11:03.930222Z
			
 
				+164
			
 
				+tekubasx
			
 
				+
			
 
				+
			
 
				+
			
 
				+
			
 
				+
			
 
				+
			
 
				+
			
 
				+
			
 
				+
			
 
				+
			
 
				+
			
 
				+
			
 
				+
			
 
				+
			
 
				+
			
 
				+
			
 
				+
			
 
				+
			
 
				+
			
 
				+
			
 
				+
			
 
				+572
			
 
				+
			
 
				+RCCE_eco_q.c
			
 
				+file
			
 
				+
			
 
				+
			
 
				+
			
 
				+
			
 
				+2012-10-27T13:42:39.092598Z
			
 
				+dbdce8ea42af7845ecd31f6cd6457698
			
 
				+2011-03-01T00:11:03.930222Z
			
 
				+164
			
 
				+tekubasx
			
 
				+
			
 
				+
			
 
				+
			
 
				+
			
 
				+
			
 
				+
			
 
				+
			
 
				+
			
 
				+
			
 
				+
			
 
				+
			
 
				+
			
 
				+
			
 
				+
			
 
				+
			
 
				+
			
 
				+
			
 
				+
			
 
				+
			
 
				+
			
 
				+
			
 
				+12193
			
 
				+
			
 
				+RCCE_pwr_wq_framework.c
			
 
				+file
			
 
				+
			
 
				+
			
 
				+
			
 
				+
			
 
				+2012-10-27T13:42:39.092598Z
			
 
				+caee278009153ed29bd317577efadb6e
			
 
				+2011-03-01T00:11:03.930222Z
			
 
				+164
			
 
				+tekubasx
			
 
				+
			
 
				+
			
 
				+
			
 
				+
			
 
				+
			
 
				+
			
 
				+
			
 
				+
			
 
				+
			
 
				+
			
 
				+
			
 
				+
			
 
				+
			
 
				+
			
 
				+
			
 
				+
			
 
				+
			
 
				+
			
 
				+
			
 
				+
			
 
				+
			
 
				+6008
			
 
				+
			
 
				+Makefile
			
 
				+file
			
 
				+
			
 
				+
			
 
				+
			
 
				+
			
 
				+2012-10-27T13:42:39.092598Z
			
 
				+6b3e73f6d2478f93649bed6388cf7605
			
 
				+2011-03-01T00:11:03.930222Z
			
 
				+164
			
 
				+tekubasx
			
 
				+
			
 
				+
			
 
				+
			
 
				+
			
 
				+
			
 
				+
			
 
				+
			
 
				+
			
 
				+
			
 
				+
			
 
				+
			
 
				+
			
 
				+
			
 
				+
			
 
				+
			
 
				+
			
 
				+
			
 
				+
			
 
				+
			
 
				+
			
 
				+
			
 
				+521
			
 
				+
			
 
				+README
			
 
				+file
			
 
				+
			
 
				+
			
 
				+
			
 
				+
			
 
				+2012-10-27T13:42:39.092598Z
			
 
				+8a3d52ef6996ffed15a25ad4b5d13f02
			
 
				+2011-03-01T00:12:36.950662Z
			
 
				+165
			
 
				+tekubasx
			
 
				+
			
 
				+
			
 
				+
			
 
				+
			
 
				+
			
 
				+
			
 
				+
			
 
				+
			
 
				+
			
 
				+
			
 
				+
			
 
				+
			
 
				+
			
 
				+
			
 
				+
			
 
				+
			
 
				+
			
 
				+
			
 
				+
			
 
				+
			
 
				+
			
 
				+71
			
 
				+
			
--- a/RCCE_V2.0/apps/ECOQ/.svn/text-base/Makefile.svn-base
+++ b/RCCE_V2.0/apps/ECOQ/.svn/text-base/Makefile.svn-base
@@ -0,0 +1,24 @@
 
				+SHELL=sh
			
 
				+
			
 
				+RCCEROOT=../..
			
 
				+include $(RCCEROOT)/common/symbols
			
 
				+
			
 
				+ECOQOBJS=RCCE_eco_q.o RCCE_pwr_wq_framework.o $(ARCHIVE)
			
 
				+
			
 
				+default:
			
 
				+	@echo Usage: make PWRMGMT=1 eco_q [clean]
			
 
				+
			
 
				+eco_q: $(ECOQOBJS)
			
 
				+	$(CCOMPILE) -o eco_q $(ECOQOBJS) $(CFLAGS)
			
 
				+
			
 
				+RCCE_eco_q.o: RCCE_eco_q.c $(RCCEINCLUDE)/RCCE.h RCCE_pwr_wq.h 
			
 
				+	$(CCOMPILE) -c $(CFLAGS) RCCE_eco_q.c  
			
 
				+
			
 
				+RCCE_pwr_wq_framework.o: RCCE_pwr_wq_framework.c $(RCCEINCLUDE)/RCCE.h RCCE_pwr_wq.h
			
 
				+	$(CCOMPILE) -c $(CFLAGS) RCCE_pwr_wq_framework.c  
			
 
				+
			
 
				+clean:
			
 
				+	@ rm -f	*.o wq eco_q FV_reset
			
 
				+
			
 
				+
			
 
				+
			
--- a/RCCE_V2.0/apps/ECOQ/.svn/text-base/RCCE_eco_q.c.svn-base
+++ b/RCCE_V2.0/apps/ECOQ/.svn/text-base/RCCE_eco_q.c.svn-base
@@ -0,0 +1,332 @@
 
				+/* this synthetic application assumes a three-dimensional
			
 
				+   domain of  nx*ny*nz points that is decomposed into chunks
			
 
				+   of different size, and that require different amounts
			
 
				+   of computational work.
			
 
				+*/
			
 
				+ 
			
 
				+#include "RCCE.h"
			
 
				+#include "RCCE_pwr_wq.h"
			
 
				+#include <stdio.h>
			
 
				+ 
			
 
				+#define min(x,y) ( (x) < (y) ? (x) : (y) )
			
 
				+#define max(x,y) ( (x) > (y) ? (x) : (y) )
			
 
				+ 
			
 
				+int power_change = 1;
			
 
				+int BASE_F = 5; /* baseline clock divider (320 MHz) */
			
 
				+int HIGH_F = 3; /* high CPU clock divider (533 MHz) */
			
 
				+void read_and_prep_data(int, int, int, int, double*);
			
 
				+void do_work(int, int, int, int, int, int, int, 
			
 
				+             double*, double*, double*, double*, RCCE_REQUEST*);
			
 
				+ 
			
 
				+#define NX     200
			
 
				+#define NY     200
			
 
				+#define NZ     100
			
 
				+#define NCOMP  5
			
 
				+#define NITER  10
			
 
				+#define XZONEJAGS 4
			
 
				+#define YZONEJAGS 4
			
 
				+#define STEP 3
			
 
				+ 
			
 
				+typedef struct {
			
 
				+  struct {
			
 
				+    int seq_number;
			
 
				+  } dynamic_part;
			
 
				+  int npx;
			
 
				+  int npy;
			
 
				+  int kstart;
			
 
				+  int kend;
			
 
				+  int kwidth;
			
 
				+  int left;
			
 
				+  int right;
			
 
				+  int *isize;
			
 
				+  int *jsize;
			
 
				+  int *ksize;
			
 
				+  RCCE_REQUEST *request;
			
 
				+} WORK_ITEM;
			
 
				+ 
			
 
				+int RCCE_WI_size(void *work_item) {
			
 
				+  return(sizeof(((WORK_ITEM *)work_item)->dynamic_part));
			
 
				+}
			
 
				+ 
			
 
				+int RCCE_WI_valid(void *work_item) {
			
 
				+  return(((WORK_ITEM *)work_item)->dynamic_part.seq_number>=0);
			
 
				+}
			
 
				+ 
			
 
				+void *RCCE_WI_address(void *work_item) {
			
 
				+  return((void *)(&(((WORK_ITEM *)work_item)->dynamic_part)));
			
 
				+}
			
 
				+int RCCE_APP(int argc, char **argv){
			
 
				+ 
			
 
				+  int       *isize, *jsize, *ksize; 
			
 
				+  int       ID, NP;
			
 
				+  int       npx, npy, ix, iy, kstart, kend, kwidth, nrounds;
			
 
				+  int       i, j, k, mem, ue, iter, fdiv, vlevel;
			
 
				+  int       *team_member, team_size, team_lead, size, local_rank, 
			
 
				+      left, right, master, master_number, *master_list;
			
 
				+  QUEUE_PARMS wq_pars;
			
 
				+  WORK_ITEM work_item;
			
 
				+  RCCE_REQUEST request;
			
 
				+ 
			
 
				+  RCCE_init(&argc, &argv);
			
 
				+  NP = wq_pars.NP = RCCE_num_ues();
			
 
				+  ID = wq_pars.ID = RCCE_ue();
			
 
				+ 
			
 
				+  if (argc < 4) {
			
 
				+    if (ID==0) printf("Error: Need two parameters, x & y tiles, plus # rounds\n");
			
 
				+    return(1);
			
 
				+  }
			
 
				+ 
			
 
				+/* read the number of subdomains (x & y-direction) from the command line        */
			
 
				+  npx = work_item.npx = atoi(*++argv);
			
 
				+  npy = work_item.npy =  atoi(*++argv);
			
 
				+ 
			
 
				+/* test validity of the requested tiling; each tile must be large enough to
			
 
				+   divide the z-dimension amoung the members of the team                       */
			
 
				+  if (npx <= 0 || npy <= 0 || npx > NX || npy > NY) {
			
 
				+    if (ID==0) printf("Illegal tiling: %d, %d\n", npx, npy);
			
 
				+    RCCE_finalize();
			
 
				+    return(1);
			
 
				+  }
			
 
				+  nrounds = atoi(*++argv);
			
 
				+  if (nrounds <= 0) {power_change=0; nrounds = -nrounds;}
			
 
				+  
			
 
				+  RCCE_debug_set(RCCE_DEBUG_ALL);
			
 
				+  /* lower power req until we need it                                           */
			
 
				+  if (power_change) RCCE_iset_power(BASE_F, &request, &fdiv, &vlevel);
			
 
				+ 
			
 
				+  /* form teams; copy results to local variables                                */
			
 
				+  RCCE_setup_work_queue_teams(&wq_pars); 
			
 
				+  master      = wq_pars.master;
			
 
				+  team_lead   = wq_pars.team_lead;
			
 
				+  local_rank  = wq_pars.local_rank;
			
 
				+  team_size   = wq_pars.team_size;
			
 
				+  team_member = wq_pars.team_member;
			
 
				+  master_list = wq_pars.master_list;  
			
 
				+ 
			
 
				+  if (team_size > NZ) {
			
 
				+    if (ID==0) printf("Error: NZ too small: %d\n", NZ);
			
 
				+    RCCE_finalize();
			
 
				+    return(1);
			
 
				+  }
			
 
				+ 
			
 
				+  /* define left and right neighbors                                            */
			
 
				+  if (local_rank>0)           work_item.left  = team_member[local_rank-1];
			
 
				+  else                        work_item.left  = -1;
			
 
				+  if (local_rank<team_size-1) work_item.right = team_member[local_rank+1];
			
 
				+  else                        work_item.right = -1;
			
 
				+ 
			
 
				+  if (ID != master) {
			
 
				+    /* allocate space for the sizes of the subdomains                           */
			
 
				+    isize = (int *) malloc(sizeof(int)*npx);
			
 
				+    jsize = (int *) malloc(sizeof(int)*npy);  
			
 
				+    ksize = (int *) malloc(sizeof(int)*team_size);
			
 
				+    if (!isize || !jsize || !ksize) {
			
 
				+      printf("Could not allocate space for tile sizes\n");
			
 
				+      return(1);
			
 
				+    }
			
 
				+ 
			
 
				+    for (k=0; k<team_size; k++) {
			
 
				+      ksize[k] = NZ/team_size;
			
 
				+      /* adjust for any leftover points                                         */
			
 
				+      if (k<(NZ%team_size)) ksize[k]++;
			
 
				+    }
			
 
				+    for (kstart=0, k=0; k<local_rank; k++) kstart += ksize[k];
			
 
				+    kend = kstart + ksize[local_rank] -1;
			
 
				+    kwidth = work_item.kwidth = ksize[local_rank]+2;
			
 
				+    work_item.kstart = kstart;
			
 
				+    work_item.kend   = kend;
			
 
				+ 
			
 
				+    /* introduce load imbalance among subdomains by perturbing their sizes      */
			
 
				+    for (i=0; i<npx-1; i++) isize[i] = NX/npx;
			
 
				+    isize[npx-1] = NX-(NX/npx)*(npx-1);
			
 
				+    for (iter=0; iter<XZONEJAGS; iter++) 
			
 
				+    for (i=1; i<npx; i+=2) if (isize[i-1] > i) {
			
 
				+      isize[i-1] -= i;
			
 
				+      isize[i]   += i;
			
 
				+    }
			
 
				+    for (j=0; j<npy-1; j++) jsize[j] = NY/npy;
			
 
				+    jsize[npy-1] = NY-(NY/npy)*(npy-1);
			
 
				+    for (iter=0; iter<YZONEJAGS; iter++) 
			
 
				+    for (j=1; j<npy; j+=2) if (jsize[j-1] > j) {
			
 
				+      jsize[j-1] -= j;
			
 
				+      jsize[j]   += j;
			
 
				+    }
			
 
				+  }
			
 
				+ 
			
 
				+  work_item.dynamic_part.seq_number = 0;
			
 
				+  work_item.request = &request;
			
 
				+  work_item.isize = isize;
			
 
				+  work_item.jsize = jsize;
			
 
				+  work_item.ksize = ksize;
			
 
				+ 
			
 
				+  WORK_ITEM *wi = &work_item;
			
 
				+ 
			
 
				+/* master goes into a loop, servicing work requests                             */
			
 
				+  if (ID==master) {
			
 
				+    int tasks_completed = 0;
			
 
				+    while (tasks_completed<nrounds) {
			
 
				+      tasks_completed += RCCE_queue_master_loop((void *)&work_item, &wq_pars);
			
 
				+    }
			
 
				+    /* master creates one more work loop to end all teams                       */
			
 
				+    work_item.dynamic_part.seq_number = -1;
			
 
				+    RCCE_queue_master_loop((void *)&work_item, &wq_pars);
			
 
				+  }
			
 
				+ 
			
 
				+/* teams go into an endless loop, executing tasks and asking for new 
			
 
				+   ones when they are done                                                      */
			
 
				+ 
			
 
				+  else {
			
 
				+    int error = 0;
			
 
				+    while (!error) {
			
 
				+      error=RCCE_queue_member_loop((void *)(&work_item), &wq_pars);
			
 
				+    }
			
 
				+  }
			
 
				+  
			
 
				+  RCCE_finalize();
			
 
				+  return (0);
			
 
				+}
			
 
				+ 
			
 
				+int RCCE_execute_work_item(void *work_item, QUEUE_PARMS *wq_pars) {
			
 
				+ 
			
 
				+  int ix, iy, words, fdiv, vlevel;
			
 
				+  double *data_frame, *flux_x, *flux_y, *flux_z;
			
 
				+  WORK_ITEM *wi;
			
 
				+  wi = (WORK_ITEM *)work_item;
			
 
				+    
			
 
				+  ix = (wi->dynamic_part.seq_number)%(wi->npx);
			
 
				+  iy = (wi->dynamic_part.seq_number)/(wi->npx);
			
 
				+  words = wi->isize[ix]*wi->jsize[iy]*(wi->kwidth)*NCOMP;
			
 
				+  data_frame = (double *) malloc(4*words*sizeof(double));
			
 
				+  if (!data_frame) {
			
 
				+    printf("Could not allocate %d words on UE %d\n", words, RCCE_ue());
			
 
				+    return(1);
			
 
				+  }
			
 
				+  flux_x = data_frame + 1*words;
			
 
				+  flux_y = data_frame + 2*words;
			
 
				+  flux_z = data_frame + 3*words;
			
 
				+  read_and_prep_data(wi->isize[ix], wi->jsize[iy], wi->kstart, wi->kend, data_frame);
			
 
				+  /* entering a high-cpu-intensity segment of the code  */
			
 
				+  if (power_change) RCCE_wait_power(wi->request);
			
 
				+  if (power_change) RCCE_iset_power(HIGH_F, wi->request, &fdiv, &vlevel);
			
 
				+  do_work(wi->isize[ix], wi->jsize[iy], wi->kstart, wi->kend, wi->left, wi->right, 
			
 
				+          wq_pars->local_rank, data_frame, flux_x, flux_y, flux_z, wi->request);
			
 
				+  free(data_frame);
			
 
				+  return(0);
			
 
				+}
			
 
				+ 
			
 
				+ 
			
 
				+#define FR(c,i,j,k) data_frame[(c)+NCOMP*((i)+in*((j)+(k-kstart+1)*jn))]
			
 
				+ 
			
 
				+void read_and_prep_data(int in, int jn, int kstart, int kend, double *data_frame) {
			
 
				+  int i, j, k, c;
			
 
				+ 
			
 
				+  /* initialize with smooth data */
			
 
				+  for (k=kstart; k<=kend; k++) for (j=0; j<jn; j++) for (i=0; i<in; i++) {
			
 
				+    FR(0,i,j,k) = 1.0;
			
 
				+    FR(1,i,j,k) = (double)(k-j)+10.0;
			
 
				+    FR(2,i,j,k) = (double)(i-k)+20.0;
			
 
				+    FR(3,i,j,k) = (double)(j-i)+30.0;
			
 
				+    FR(4,i,j,k) = 100.0;
			
 
				+  }
			
 
				+ 
			
 
				+  /* add jaggedness */
			
 
				+  for (k=kstart; k<=kend; k++) {
			
 
				+    for (j=0; j<jn; j+=2) {
			
 
				+      for (i=0; i<in; i+=2) for (c=0; c<NCOMP; c++) FR(c,i,j,k) -= 1.0;
			
 
				+      for (i=1; i<in; i+=2) for (c=0; c<NCOMP; c++) FR(c,i,j,k) += 1.0;
			
 
				+    }
			
 
				+    for (j=1; j<jn; j+=2) {
			
 
				+      for (i=0; i<in; i+=2) for (c=0; c<NCOMP; c++) FR(c,i,j,k) -= 1.0;
			
 
				+      for (i=1; i<in; i+=2) for (c=0; c<NCOMP; c++) FR(c,i,j,k) += 1.0;
			
 
				+    }
			
 
				+  }
			
 
				+  return;
			
 
				+}
			
 
				+ 
			
 
				+#define FLUX_X(c,i,j,k) flux_x[(c)+NCOMP*((i)+in*((j)+(k-kstart+1)*jn))]
			
 
				+#define FLUX_Y(c,i,j,k) flux_y[(c)+NCOMP*((i)+in*((j)+(k-kstart+1)*jn))]
			
 
				+#define FLUX_Z(c,i,j,k) flux_z[(c)+NCOMP*((i)+in*((j)+(k-kstart+1)*jn))]
			
 
				+ 
			
 
				+void do_work(int in, int jn, int kstart, int kend, int left, int right, int rank,
			
 
				+             double *data_frame, double *flux_x, double *flux_y, double *flux_z,
			
 
				+             RCCE_REQUEST *request) {
			
 
				+ 
			
 
				+  int i, j, k, c, iter, phase, fdiv, vlevel;
			
 
				+  double vx = 1.0, vy = 1.0, vz = 1.0;
			
 
				+  double dt = 0.0001;
			
 
				+  double mu = 1.0;
			
 
				+ 
			
 
				+  for (iter=0; iter<NITER; iter++) {
			
 
				+ 
			
 
				+    if (iter==2 && power_change) {
			
 
				+      RCCE_wait_power(request);
			
 
				+    }
			
 
				+    if (iter==NITER-2 & power_change) {
			
 
				+      RCCE_iset_power(BASE_F, request, &fdiv, &vlevel);
			
 
				+    }
			
 
				+    /* before each iteration we need to fill ghost points with neighbor data */
			
 
				+    for (phase=0; phase<2; phase++) {
			
 
				+      if (right != -1 && (rank+phase+1)%2) {
			
 
				+         RCCE_send((char *)(&FR(0,0,0,kend)),in*jn*NCOMP*sizeof(double), right);
			
 
				+      }
			
 
				+      if (left  != -1 && (rank+phase)%2) {
			
 
				+         RCCE_recv((char *)(&FR(0,0,0,kstart-1)),in*jn*NCOMP*sizeof(double), left);
			
 
				+      }
			
 
				+    }
			
 
				+    for (phase=0; phase<2; phase++) {
			
 
				+      if (left != -1 && (rank+phase+1)%2)
			
 
				+         RCCE_send((char *)(&FR(0,0,0,kstart)),in*jn*NCOMP*sizeof(double), left);
			
 
				+      if (right  != -1 && (rank+phase)%2) 
			
 
				+         RCCE_recv((char *)(&FR(0,0,0,kend+1)),in*jn*NCOMP*sizeof(double), right);
			
 
				+    }
			
 
				+    for (k=max(kstart,1); k<=min(NZ-2,kend); k++) for (j=1; j<jn-1; j++) 
			
 
				+    for (i=1; i<in-1; i++) 
			
 
				+    for (c=0; c<NCOMP; c++){
			
 
				+      FLUX_X(c,i,j,k) = 
			
 
				+        (3.0*FR(c,i+1,j+1,k  ) - 4.0*FR(c,i,j+1,k  ) + FR(c,i-1,j+1,k  ))/16.0 +
			
 
				+        (3.0*FR(c,i+1,j  ,k+1) - 4.0*FR(c,i,j,  k+1) + FR(c,i-1,j,  k+1))/16.0 +
			
 
				+        (3.0*FR(c,i+1,j+1,k+1) - 4.0*FR(c,i,j+1,k+1) + FR(c,i-1,j+1,k+1))/32.0 +
			
 
				+        (3.0*FR(c,i+1,j-1,k  ) - 4.0*FR(c,i,j-1,k  ) + FR(c,i-1,j-1,k  ))/16.0 +
			
 
				+        (3.0*FR(c,i+1,j  ,k-1) - 4.0*FR(c,i,j,  k-1) + FR(c,i-1,j,  k-1))/16.0 +
			
 
				+        (3.0*FR(c,i+1,j-1,k-1) - 4.0*FR(c,i,j-1,k-1) + FR(c,i-1,j-1,k-1))/32.0 +
			
 
				+        (3.0*FR(c,i+1,j-1,k+1) - 4.0*FR(c,i,j-1,k+1) + FR(c,i-1,j-1,k+1))/32.0 +
			
 
				+        (3.0*FR(c,i+1,j+1,k-1) - 4.0*FR(c,i,j+1,k-1) + FR(c,i-1,j+1,k-1))/32.0 +
			
 
				+        (3.0*FR(c,i+1,j  ,k  ) - 4.0*FR(c,i,j,  k  ) + FR(c,i-1,j,  k  ))/8.0;
			
 
				+  
			
 
				+      FLUX_Y(c,i,j,k) = 
			
 
				+        (3.0*FR(c,i+1,j+1,k  ) - 4.0*FR(c,i+1,j,k  ) + FR(c,i+1,j-1,k  ))/16.0 +
			
 
				+        (3.0*FR(c,i  ,j+1,k+1) - 4.0*FR(c,i  ,j,k+1) + FR(c,i  ,j-1,k+1))/16.0 +
			
 
				+        (3.0*FR(c,i+1,j+1,k+1) - 4.0*FR(c,i+1,j,k+1) + FR(c,i+1,j-1,k+1))/32.0 +
			
 
				+        (3.0*FR(c,i-1,j+1,k  ) - 4.0*FR(c,i-1,j,k  ) + FR(c,i-1,j-1,k  ))/16.0 +
			
 
				+        (3.0*FR(c,i  ,j+1,k-1) - 4.0*FR(c,i  ,j,k-1) + FR(c,i  ,j-1,k-1))/16.0 +
			
 
				+        (3.0*FR(c,i-1,j+1,k-1) - 4.0*FR(c,i-1,j,k-1) + FR(c,i-1,j-1,k-1))/32.0 +
			
 
				+        (3.0*FR(c,i-1,j+1,k+1) - 4.0*FR(c,i-1,j,k+1) + FR(c,i-1,j-1,k+1))/32.0 +
			
 
				+        (3.0*FR(c,i+1,j+1,k-1) - 4.0*FR(c,i+1,j,k-1) + FR(c,i+1,j-1,k-1))/32.0 +
			
 
				+        (3.0*FR(c,i  ,j+1,k  ) - 4.0*FR(c,i  ,j,k  ) + FR(c,i  ,j-1,k  ))/8.0;
			
 
				+  
			
 
				+      FLUX_Y(c,i,j,k) = 
			
 
				+        (3.0*FR(c,i+1,j  ,k+1) - 4.0*FR(c,i+1,j  ,k) + FR(c,i+1,j  ,k-1))/16.0 +
			
 
				+        (3.0*FR(c,i  ,j+1,k+1) - 4.0*FR(c,i  ,j+1,k) + FR(c,i  ,j+1,k-1))/16.0 +
			
 
				+        (3.0*FR(c,i+1,j+1,k+1) - 4.0*FR(c,i+1,j+1,k) + FR(c,i+1,j+1,k-1))/32.0 +
			
 
				+        (3.0*FR(c,i-1,j  ,k+1) - 4.0*FR(c,i-1,j  ,k) + FR(c,i-1,j  ,k-1))/16.0 +
			
 
				+        (3.0*FR(c,i  ,j-1,k+1) - 4.0*FR(c,i  ,j-1,k) + FR(c,i  ,j-1,k-1))/16.0 +
			
 
				+        (3.0*FR(c,i-1,j-1,k+1) - 4.0*FR(c,i-1,j-1,k) + FR(c,i-1,j-1,k-1))/32.0 +
			
 
				+        (3.0*FR(c,i-1,j+1,k+1) - 4.0*FR(c,i-1,j+1,k) + FR(c,i-1,j+1,k-1))/32.0 +
			
 
				+        (3.0*FR(c,i+1,j-1,k+1) - 4.0*FR(c,i+1,j-1,k) + FR(c,i+1,j-1,k-1))/32.0 +
			
 
				+        (3.0*FR(c,i  ,j  ,k+1) - 4.0*FR(c,i  ,j  ,k) + FR(c,i  ,j  ,k-1))/8.0;
			
 
				+  
			
 
				+      FR(c,i,j,k) += dt*(
			
 
				+         -1.0*(vx*FLUX_X(c,i,j,k) + vy*FLUX_Y(c,i,j,k) + vz*FLUX_Z(c,i,j,k)) + 
			
 
				+              FR(c,i+1,j,k) -2.0*FR(c,i,j,k) + FR(c,i-1,j,k) +
			
 
				+              FR(c,i,j+1,k) -2.0*FR(c,i,j,k) + FR(c,i,j-1,k) +
			
 
				+              FR(c,i,j,k+1) -2.0*FR(c,i,j,k) + FR(c,i,j,k-1));
			
 
				+    }
			
 
				+  }
			
 
				+  return;
			
 
				+}
			
 
				+ 
			
 
				+int RCCE_new_work_item(void *work_item, QUEUE_PARMS *wq_pars) {
			
 
				+  WORK_ITEM *wi = (WORK_ITEM *)work_item;
			
 
				+  wi->dynamic_part.seq_number = (wi->dynamic_part.seq_number+1)%(wi->npx*wi->npy);
			
 
				+  return(RCCE_SUCCESS);
			
 
				+}
			
--- a/RCCE_V2.0/apps/ECOQ/.svn/text-base/RCCE_pwr_wq.h.svn-base
+++ b/RCCE_V2.0/apps/ECOQ/.svn/text-base/RCCE_pwr_wq.h.svn-base
@@ -0,0 +1,23 @@
 
				+typedef struct {
			
 
				+  int NP;
			
 
				+  int ID;
			
 
				+  int master;
			
 
				+  int team_lead;
			
 
				+  int local_rank;
			
 
				+  int team_size;
			
 
				+  int team_member[RCCE_MAXNP];
			
 
				+  int master_list[RCCE_MAXNP];
			
 
				+  int master_number;
			
 
				+} QUEUE_PARMS;
			
 
				+ 
			
 
				+int RCCE_execute_work_item(void *, QUEUE_PARMS *);
			
 
				+int RCCE_setup_work_queue_teams(QUEUE_PARMS *);
			
 
				+int RCCE_queue_master_loop(void *, QUEUE_PARMS *);
			
 
				+int RCCE_new_work_item(void *, QUEUE_PARMS *);
			
 
				+int RCCE_queue_member_loop(void *, QUEUE_PARMS *);
			
 
				+int RCCE_WI_size(void *);
			
 
				+void *RCCE_WI_address(void *);
			
 
				+ 
			
 
				+#ifdef OPENMP_
			
 
				+#pragma omp threadprivate(power_change)
			
 
				+#endif
			
--- a/RCCE_V2.0/apps/ECOQ/.svn/text-base/RCCE_pwr_wq_framework.c.svn-base
+++ b/RCCE_V2.0/apps/ECOQ/.svn/text-base/RCCE_pwr_wq_framework.c.svn-base
@@ -0,0 +1,156 @@
 
				+#include "RCCE.h"
			
 
				+#include "RCCE_pwr_wq.h"
			
 
				+#include <stdio.h>
			
 
				+ 
			
 
				+int RCCE_WI_valid(void *);
			
 
				+int  RCCE_qsort(char *, size_t, size_t, int (*)(const void*, const void*));
			
 
				+/* comparison function used in routine to sort core IDs                  */
			
 
				+int id_compare(const void *e1, const void *e2);
			
 
				+ 
			
 
				+int RCCE_setup_work_queue_teams(QUEUE_PARMS *wq_pars){ 
			
 
				+ 
			
 
				+  int NP, ID, ue, size, mem, master, team_lead, team_size, local_rank;
			
 
				+  int test, isleader;
			
 
				+  int *team_member, *master_list;
			
 
				+ 
			
 
				+  NP = wq_pars->NP = RCCE_num_ues();
			
 
				+  ID = wq_pars->ID = RCCE_ue();
			
 
				+  team_member = wq_pars->team_member;
			
 
				+  master_list = wq_pars->master_list;
			
 
				+ 
			
 
				+/* determine the number of UEs in the local power domain and form teams         */
			
 
				+  wq_pars->team_size = team_size = RCCE_power_domain_size();
			
 
				+  wq_pars->team_lead = team_lead = RCCE_power_domain_master();
			
 
				+  if (team_lead == ID) {
			
 
				+    /* the team lead is the first team member                                   */
			
 
				+    team_member[0] = team_lead;
			
 
				+    size = 1;
			
 
				+    /* the team leads collects IDs from its team members ...                    */
			
 
				+    while (size<team_size) for (ue=0; ue<NP; ue++) if (ue != team_lead) {
			
 
				+      RCCE_recv_test((char *)(&(team_member[size])), sizeof(int), ue, &test);
			
 
				+      if (test) team_member[size++] = ue;
			
 
				+    }
			
 
				+    /* ... and sends the list to all other team members, after sorting it       */
			
 
				+    RCCE_qsort((char *)team_member, team_size, sizeof(int), id_compare);
			
 
				+    for (ue=1; ue<team_size; ue++) 
			
 
				+      RCCE_send((char *)team_member, team_size*sizeof(int), team_member[ue]);
			
 
				+  }
			
 
				+  else {
			
 
				+    /* team members check in with the team lead ...                             */
			
 
				+    RCCE_send((char *)(&ID), sizeof(int), team_lead);
			
 
				+    /* ... and receive the complete list of team members                        */
			
 
				+    RCCE_recv((char *)team_member, team_size*sizeof(int), team_lead);
			
 
				+  }
			
 
				+ 
			
 
				+  /* we assign the UE with the highest rank the role of master. We know that
			
 
				+     this UE is either in a power domain by itself, or there is another UE  
			
 
				+     in the same power domain who is the power domain master, because the
			
 
				+     power domain master is always the UE in that domain with the lowest rank   */
			
 
				+  master = wq_pars->master = NP-1;
			
 
				+ 
			
 
				+/* the team containing the overall master must remove it from its member list   */
			
 
				+  if (team_member[team_size-1] == master) wq_pars->team_size = --team_size;
			
 
				+ 
			
 
				+  /* the overall master is not in any team                                      */
			
 
				+  if (ID==master) team_size = wq_pars->team_size = 0;
			
 
				+ 
			
 
				+/* each UE determines its rank within the team                                  */
			
 
				+  local_rank = wq_pars->local_rank = 0;
			
 
				+  for (ue=0; ue<team_size; ue++) if (ID==team_member[ue]) 
			
 
				+    local_rank = wq_pars->local_rank = ue;
			
 
				+ 
			
 
				+/* this code determines number of power domain leads, plus list of UEs          */
			
 
				+  if (ID == master) {
			
 
				+    wq_pars->master_number = 0;
			
 
				+    for (int ue=0; ue<RCCE_num_ues()-1; ue++) {
			
 
				+      /* ask each core whether it is a team lead or not                         */
			
 
				+      RCCE_recv((char *)(&isleader), sizeof(int), ue);
			
 
				+      if (isleader) {
			
 
				+        master_list[wq_pars->master_number] = ue;
			
 
				+        (wq_pars->master_number)++;
			
 
				+      }
			
 
				+    }
			
 
				+  }
			
 
				+  else {
			
 
				+    /* all cores let the master know their team lead status                     */
			
 
				+    isleader = (ID == team_lead);
			
 
				+    RCCE_send((char *)(&isleader), sizeof(int), master);
			
 
				+  }
			
 
				+ 
			
 
				+/* all UEs report their team size and memberships                               */
			
 
				+//  for (ue=0; ue<NP; ue++) {
			
 
				+//    RCCE_barrier(&RCCE_COMM_WORLD);
			
 
				+//    if (ID==ue) {
			
 
				+//      printf("UE %d (%d) is in a team with %d members: ", ID, 
			
 
				+//             local_rank, team_size);
			
 
				+//      for (mem=0; mem<team_size; mem++) printf("%d ", team_member[mem]);
			
 
				+//      printf("\n");
			
 
				+//    }
			
 
				+//  }
			
 
				+  return (RCCE_SUCCESS);
			
 
				+}
			
 
				+ 
			
 
				+int RCCE_queue_master_loop(void *work_item, QUEUE_PARMS *wq_pars){
			
 
				+ 
			
 
				+  int ue, ignore, test, count;
			
 
				+ 
			
 
				+  int size = RCCE_WI_size(work_item);
			
 
				+  void *address = RCCE_WI_address(work_item);
			
 
				+  count = 0; 
			
 
				+ 
			
 
				+  if (RCCE_WI_valid(work_item)) {
			
 
				+ 
			
 
				+    /* service work requests from any UE; first come, first served                */
			
 
				+    for (ue=0; ue<wq_pars->master_number; ue++) {
			
 
				+      RCCE_recv_test((char *)(&ignore), sizeof(int), wq_pars->master_list[ue], &test);
			
 
				+      if (test) {
			
 
				+//        printf("Master sends work to UE %d\n", wq_pars->master_list[ue]);
			
 
				+        RCCE_send((char *)address, size, wq_pars->master_list[ue]);
			
 
				+        count++;
			
 
				+        /* generate the next work item                                            */
			
 
				+        RCCE_new_work_item(work_item, wq_pars);
			
 
				+      }
			
 
				+    }
			
 
				+  }
			
 
				+  else {
			
 
				+    /*  this loop ends all teams, so must insist each team checks in              */
			
 
				+    for (ue=0; ue<wq_pars->master_number; ue++) {
			
 
				+      RCCE_recv((char *)(&ignore), sizeof(int), wq_pars->master_list[ue]);
			
 
				+//      printf("Master sends end of work message to UE %d\n", ue);
			
 
				+      RCCE_send((char *)address, size,  wq_pars->master_list[ue]);
			
 
				+    }
			
 
				+  }
			
 
				+    
			
 
				+  return(count);
			
 
				+}
			
 
				+ 
			
 
				+int RCCE_queue_member_loop(void *work_item, QUEUE_PARMS *wq_pars) {
			
 
				+ 
			
 
				+  int gimme_work, mem;
			
 
				+  int size = RCCE_WI_size(work_item);
			
 
				+  void *address = RCCE_WI_address(work_item);
			
 
				+ 
			
 
				+  /* ask for work if I am a team lead                                          */
			
 
				+  if (wq_pars->ID == wq_pars->team_lead) {
			
 
				+    RCCE_send((char *)(&gimme_work), sizeof(int), wq_pars->master);
			
 
				+    RCCE_recv((char *)address, size, wq_pars->master);
			
 
				+    /* team leads parcel out the work to the workers */
			
 
				+    for (mem=1; mem<(wq_pars->team_size); mem++) {
			
 
				+        printf("Team lead %d sends work to UE %d\n", RCCE_ue(), wq_pars->team_member[mem]);
			
 
				+        fflush(0);
			
 
				+      RCCE_send((char *)address, size, wq_pars->team_member[mem]);
			
 
				+    }
			
 
				+  }
			
 
				+  else {
			
 
				+    RCCE_recv((char *)address, size, wq_pars->team_lead);
			
 
				+  }
			
 
				+  if (RCCE_WI_valid(work_item)) {
			
 
				+    RCCE_execute_work_item(work_item, wq_pars);
			
 
				+//    printf("UE %d executed work item\n", wq_pars->ID);
			
 
				+  }
			
 
				+  else {
			
 
				+//    printf("UE %d received stop queue task\n", RCCE_ue());
			
 
				+    return(1);
			
 
				+  }
			
 
				+  return(RCCE_SUCCESS);
			
 
				+}
			
--- a/RCCE_V2.0/apps/ECOQ/.svn/text-base/README.svn-base
+++ b/RCCE_V2.0/apps/ECOQ/.svn/text-base/README.svn-base
@@ -0,0 +1,2 @@
 
				+Please note that ECOQ is still under development and may be unstable.
			
 
				+
			
--- a/RCCE_V2.0/apps/ECOQ/Makefile
+++ b/RCCE_V2.0/apps/ECOQ/Makefile
@@ -0,0 +1,24 @@
 
				+SHELL=sh
			
 
				+
			
 
				+RCCEROOT=../..
			
 
				+include $(RCCEROOT)/common/symbols
			
 
				+
			
 
				+ECOQOBJS=RCCE_eco_q.o RCCE_pwr_wq_framework.o $(ARCHIVE)
			
 
				+
			
 
				+default:
			
 
				+	@echo Usage: make PWRMGMT=1 eco_q [clean]
			
 
				+
			
 
				+eco_q: $(ECOQOBJS)
			
 
				+	$(CCOMPILE) -o eco_q $(ECOQOBJS) $(CFLAGS)
			
 
				+
			
 
				+RCCE_eco_q.o: RCCE_eco_q.c $(RCCEINCLUDE)/RCCE.h RCCE_pwr_wq.h 
			
 
				+	$(CCOMPILE) -c $(CFLAGS) RCCE_eco_q.c  
			
 
				+
			
 
				+RCCE_pwr_wq_framework.o: RCCE_pwr_wq_framework.c $(RCCEINCLUDE)/RCCE.h RCCE_pwr_wq.h
			
 
				+	$(CCOMPILE) -c $(CFLAGS) RCCE_pwr_wq_framework.c  
			
 
				+
			
 
				+clean:
			
 
				+	@ rm -f	*.o wq eco_q FV_reset
			
 
				+
			
 
				+
			
 
				+
			
--- a/RCCE_V2.0/apps/ECOQ/RCCE_eco_q.c
+++ b/RCCE_V2.0/apps/ECOQ/RCCE_eco_q.c
@@ -0,0 +1,332 @@
 
				+/* this synthetic application assumes a three-dimensional
			
 
				+   domain of  nx*ny*nz points that is decomposed into chunks
			
 
				+   of different size, and that require different amounts
			
 
				+   of computational work.
			
 
				+*/
			
 
				+ 
			
 
				+#include "RCCE.h"
			
 
				+#include "RCCE_pwr_wq.h"
			
 
				+#include <stdio.h>
			
 
				+ 
			
 
				+#define min(x,y) ( (x) < (y) ? (x) : (y) )
			
 
				+#define max(x,y) ( (x) > (y) ? (x) : (y) )
			
 
				+ 
			
 
				+int power_change = 1;
			
 
				+int BASE_F = 5; /* baseline clock divider (320 MHz) */
			
 
				+int HIGH_F = 3; /* high CPU clock divider (533 MHz) */
			
 
				+void read_and_prep_data(int, int, int, int, double*);
			
 
				+void do_work(int, int, int, int, int, int, int, 
			
 
				+             double*, double*, double*, double*, RCCE_REQUEST*);
			
 
				+ 
			
 
				+#define NX     200
			
 
				+#define NY     200
			
 
				+#define NZ     100
			
 
				+#define NCOMP  5
			
 
				+#define NITER  10
			
 
				+#define XZONEJAGS 4
			
 
				+#define YZONEJAGS 4
			
 
				+#define STEP 3
			
 
				+ 
			
 
				+typedef struct {
			
 
				+  struct {
			
 
				+    int seq_number;
			
 
				+  } dynamic_part;
			
 
				+  int npx;
			
 
				+  int npy;
			
 
				+  int kstart;
			
 
				+  int kend;
			
 
				+  int kwidth;
			
 
				+  int left;
			
 
				+  int right;
			
 
				+  int *isize;
			
 
				+  int *jsize;
			
 
				+  int *ksize;
			
 
				+  RCCE_REQUEST *request;
			
 
				+} WORK_ITEM;
			
 
				+ 
			
 
				+int RCCE_WI_size(void *work_item) {
			
 
				+  return(sizeof(((WORK_ITEM *)work_item)->dynamic_part));
			
 
				+}
			
 
				+ 
			
 
				+int RCCE_WI_valid(void *work_item) {
			
 
				+  return(((WORK_ITEM *)work_item)->dynamic_part.seq_number>=0);
			
 
				+}
			
 
				+ 
			
 
				+void *RCCE_WI_address(void *work_item) {
			
 
				+  return((void *)(&(((WORK_ITEM *)work_item)->dynamic_part)));
			
 
				+}
			
 
				+int RCCE_APP(int argc, char **argv){
			
 
				+ 
			
 
				+  int       *isize, *jsize, *ksize; 
			
 
				+  int       ID, NP;
			
 
				+  int       npx, npy, ix, iy, kstart, kend, kwidth, nrounds;
			
 
				+  int       i, j, k, mem, ue, iter, fdiv, vlevel;
			
 
				+  int       *team_member, team_size, team_lead, size, local_rank, 
			
 
				+      left, right, master, master_number, *master_list;
			
 
				+  QUEUE_PARMS wq_pars;
			
 
				+  WORK_ITEM work_item;
			
 
				+  RCCE_REQUEST request;
			
 
				+ 
			
 
				+  RCCE_init(&argc, &argv);
			
 
				+  NP = wq_pars.NP = RCCE_num_ues();
			
 
				+  ID = wq_pars.ID = RCCE_ue();
			
 
				+ 
			
 
				+  if (argc < 4) {
			
 
				+    if (ID==0) printf("Error: Need two parameters, x & y tiles, plus # rounds\n");
			
 
				+    return(1);
			
 
				+  }
			
 
				+ 
			
 
				+/* read the number of subdomains (x & y-direction) from the command line        */
			
 
				+  npx = work_item.npx = atoi(*++argv);
			
 
				+  npy = work_item.npy =  atoi(*++argv);
			
 
				+ 
			
 
				+/* test validity of the requested tiling; each tile must be large enough to
			
 
				+   divide the z-dimension amoung the members of the team                       */
			
 
				+  if (npx <= 0 || npy <= 0 || npx > NX || npy > NY) {
			
 
				+    if (ID==0) printf("Illegal tiling: %d, %d\n", npx, npy);
			
 
				+    RCCE_finalize();
			
 
				+    return(1);
			
 
				+  }
			
 
				+  nrounds = atoi(*++argv);
			
 
				+  if (nrounds <= 0) {power_change=0; nrounds = -nrounds;}
			
 
				+  
			
 
				+  RCCE_debug_set(RCCE_DEBUG_ALL);
			
 
				+  /* lower power req until we need it                                           */
			
 
				+  if (power_change) RCCE_iset_power(BASE_F, &request, &fdiv, &vlevel);
			
 
				+ 
			
 
				+  /* form teams; copy results to local variables                                */
			
 
				+  RCCE_setup_work_queue_teams(&wq_pars); 
			
 
				+  master      = wq_pars.master;
			
 
				+  team_lead   = wq_pars.team_lead;
			
 
				+  local_rank  = wq_pars.local_rank;
			
 
				+  team_size   = wq_pars.team_size;
			
 
				+  team_member = wq_pars.team_member;
			
 
				+  master_list = wq_pars.master_list;  
			
 
				+ 
			
 
				+  if (team_size > NZ) {
			
 
				+    if (ID==0) printf("Error: NZ too small: %d\n", NZ);
			
 
				+    RCCE_finalize();
			
 
				+    return(1);
			
 
				+  }
			
 
				+ 
			
 
				+  /* define left and right neighbors                                            */
			
 
				+  if (local_rank>0)           work_item.left  = team_member[local_rank-1];
			
 
				+  else                        work_item.left  = -1;
			
 
				+  if (local_rank<team_size-1) work_item.right = team_member[local_rank+1];
			
 
				+  else                        work_item.right = -1;
			
 
				+ 
			
 
				+  if (ID != master) {
			
 
				+    /* allocate space for the sizes of the subdomains                           */
			
 
				+    isize = (int *) malloc(sizeof(int)*npx);
			
 
				+    jsize = (int *) malloc(sizeof(int)*npy);  
			
 
				+    ksize = (int *) malloc(sizeof(int)*team_size);
			
 
				+    if (!isize || !jsize || !ksize) {
			
 
				+      printf("Could not allocate space for tile sizes\n");
			
 
				+      return(1);
			
 
				+    }
			
 
				+ 
			
 
				+    for (k=0; k<team_size; k++) {
			
 
				+      ksize[k] = NZ/team_size;
			
 
				+      /* adjust for any leftover points                                         */
			
 
				+      if (k<(NZ%team_size)) ksize[k]++;
			
 
				+    }
			
 
				+    for (kstart=0, k=0; k<local_rank; k++) kstart += ksize[k];
			
 
				+    kend = kstart + ksize[local_rank] -1;
			
 
				+    kwidth = work_item.kwidth = ksize[local_rank]+2;
			
 
				+    work_item.kstart = kstart;
			
 
				+    work_item.kend   = kend;
			
 
				+ 
			
 
				+    /* introduce load imbalance among subdomains by perturbing their sizes      */
			
 
				+    for (i=0; i<npx-1; i++) isize[i] = NX/npx;
			
 
				+    isize[npx-1] = NX-(NX/npx)*(npx-1);
			
 
				+    for (iter=0; iter<XZONEJAGS; iter++) 
			
 
				+    for (i=1; i<npx; i+=2) if (isize[i-1] > i) {
			
 
				+      isize[i-1] -= i;
			
 
				+      isize[i]   += i;
			
 
				+    }
			
 
				+    for (j=0; j<npy-1; j++) jsize[j] = NY/npy;
			
 
				+    jsize[npy-1] = NY-(NY/npy)*(npy-1);
			
 
				+    for (iter=0; iter<YZONEJAGS; iter++) 
			
 
				+    for (j=1; j<npy; j+=2) if (jsize[j-1] > j) {
			
 
				+      jsize[j-1] -= j;
			
 
				+      jsize[j]   += j;
			
 
				+    }
			
 
				+  }
			
 
				+ 
			
 
				+  work_item.dynamic_part.seq_number = 0;
			
 
				+  work_item.request = &request;
			
 
				+  work_item.isize = isize;
			
 
				+  work_item.jsize = jsize;
			
 
				+  work_item.ksize = ksize;
			
 
				+ 
			
 
				+  WORK_ITEM *wi = &work_item;
			
 
				+ 
			
 
				+/* master goes into a loop, servicing work requests                             */
			
 
				+  if (ID==master) {
			
 
				+    int tasks_completed = 0;
			
 
				+    while (tasks_completed<nrounds) {
			
 
				+      tasks_completed += RCCE_queue_master_loop((void *)&work_item, &wq_pars);
			
 
				+    }
			
 
				+    /* master creates one more work loop to end all teams                       */
			
 
				+    work_item.dynamic_part.seq_number = -1;
			
 
				+    RCCE_queue_master_loop((void *)&work_item, &wq_pars);
			
 
				+  }
			
 
				+ 
			
 
				+/* teams go into an endless loop, executing tasks and asking for new 
			
 
				+   ones when they are done                                                      */
			
 
				+ 
			
 
				+  else {
			
 
				+    int error = 0;
			
 
				+    while (!error) {
			
 
				+      error=RCCE_queue_member_loop((void *)(&work_item), &wq_pars);
			
 
				+    }
			
 
				+  }
			
 
				+  
			
 
				+  RCCE_finalize();
			
 
				+  return (0);
			
 
				+}
			
 
				+ 
			
 
				+int RCCE_execute_work_item(void *work_item, QUEUE_PARMS *wq_pars) {
			
 
				+ 
			
 
				+  int ix, iy, words, fdiv, vlevel;
			
 
				+  double *data_frame, *flux_x, *flux_y, *flux_z;
			
 
				+  WORK_ITEM *wi;
			
 
				+  wi = (WORK_ITEM *)work_item;
			
 
				+    
			
 
				+  ix = (wi->dynamic_part.seq_number)%(wi->npx);
			
 
				+  iy = (wi->dynamic_part.seq_number)/(wi->npx);
			
 
				+  words = wi->isize[ix]*wi->jsize[iy]*(wi->kwidth)*NCOMP;
			
 
				+  data_frame = (double *) malloc(4*words*sizeof(double));
			
 
				+  if (!data_frame) {
			
 
				+    printf("Could not allocate %d words on UE %d\n", words, RCCE_ue());
			
 
				+    return(1);
			
 
				+  }
			
 
				+  flux_x = data_frame + 1*words;
			
 
				+  flux_y = data_frame + 2*words;
			
 
				+  flux_z = data_frame + 3*words;
			
 
				+  read_and_prep_data(wi->isize[ix], wi->jsize[iy], wi->kstart, wi->kend, data_frame);
			
 
				+  /* entering a high-cpu-intensity segment of the code  */
			
 
				+  if (power_change) RCCE_wait_power(wi->request);
			
 
				+  if (power_change) RCCE_iset_power(HIGH_F, wi->request, &fdiv, &vlevel);
			
 
				+  do_work(wi->isize[ix], wi->jsize[iy], wi->kstart, wi->kend, wi->left, wi->right, 
			
 
				+          wq_pars->local_rank, data_frame, flux_x, flux_y, flux_z, wi->request);
			
 
				+  free(data_frame);
			
 
				+  return(0);
			
 
				+}
			
 
				+ 
			
 
				+ 
			
 
				+#define FR(c,i,j,k) data_frame[(c)+NCOMP*((i)+in*((j)+(k-kstart+1)*jn))]
			
 
				+ 
			
 
				+void read_and_prep_data(int in, int jn, int kstart, int kend, double *data_frame) {
			
 
				+  int i, j, k, c;
			
 
				+ 
			
 
				+  /* initialize with smooth data */
			
 
				+  for (k=kstart; k<=kend; k++) for (j=0; j<jn; j++) for (i=0; i<in; i++) {
			
 
				+    FR(0,i,j,k) = 1.0;
			
 
				+    FR(1,i,j,k) = (double)(k-j)+10.0;
			
 
				+    FR(2,i,j,k) = (double)(i-k)+20.0;
			
 
				+    FR(3,i,j,k) = (double)(j-i)+30.0;
			
 
				+    FR(4,i,j,k) = 100.0;
			
 
				+  }
			
 
				+ 
			
 
				+  /* add jaggedness */
			
 
				+  for (k=kstart; k<=kend; k++) {
			
 
				+    for (j=0; j<jn; j+=2) {
			
 
				+      for (i=0; i<in; i+=2) for (c=0; c<NCOMP; c++) FR(c,i,j,k) -= 1.0;
			
 
				+      for (i=1; i<in; i+=2) for (c=0; c<NCOMP; c++) FR(c,i,j,k) += 1.0;
			
 
				+    }
			
 
				+    for (j=1; j<jn; j+=2) {
			
 
				+      for (i=0; i<in; i+=2) for (c=0; c<NCOMP; c++) FR(c,i,j,k) -= 1.0;
			
 
				+      for (i=1; i<in; i+=2) for (c=0; c<NCOMP; c++) FR(c,i,j,k) += 1.0;
			
 
				+    }
			
 
				+  }
			
 
				+  return;
			
 
				+}
			
 
				+ 
			
 
				+#define FLUX_X(c,i,j,k) flux_x[(c)+NCOMP*((i)+in*((j)+(k-kstart+1)*jn))]
			
 
				+#define FLUX_Y(c,i,j,k) flux_y[(c)+NCOMP*((i)+in*((j)+(k-kstart+1)*jn))]
			
 
				+#define FLUX_Z(c,i,j,k) flux_z[(c)+NCOMP*((i)+in*((j)+(k-kstart+1)*jn))]
			
 
				+ 
			
 
				+void do_work(int in, int jn, int kstart, int kend, int left, int right, int rank,
			
 
				+             double *data_frame, double *flux_x, double *flux_y, double *flux_z,
			
 
				+             RCCE_REQUEST *request) {
			
 
				+ 
			
 
				+  int i, j, k, c, iter, phase, fdiv, vlevel;
			
 
				+  double vx = 1.0, vy = 1.0, vz = 1.0;
			
 
				+  double dt = 0.0001;
			
 
				+  double mu = 1.0;
			
 
				+ 
			
 
				+  for (iter=0; iter<NITER; iter++) {
			
 
				+ 
			
 
				+    if (iter==2 && power_change) {
			
 
				+      RCCE_wait_power(request);
			
 
				+    }
			
 
				+    if (iter==NITER-2 & power_change) {
			
 
				+      RCCE_iset_power(BASE_F, request, &fdiv, &vlevel);
			
 
				+    }
			
 
				+    /* before each iteration we need to fill ghost points with neighbor data */
			
 
				+    for (phase=0; phase<2; phase++) {
			
 
				+      if (right != -1 && (rank+phase+1)%2) {
			
 
				+         RCCE_send((char *)(&FR(0,0,0,kend)),in*jn*NCOMP*sizeof(double), right);
			
 
				+      }
			
 
				+      if (left  != -1 && (rank+phase)%2) {
			
 
				+         RCCE_recv((char *)(&FR(0,0,0,kstart-1)),in*jn*NCOMP*sizeof(double), left);
			
 
				+      }
			
 
				+    }
			
 
				+    for (phase=0; phase<2; phase++) {
			
 
				+      if (left != -1 && (rank+phase+1)%2)
			
 
				+         RCCE_send((char *)(&FR(0,0,0,kstart)),in*jn*NCOMP*sizeof(double), left);
			
 
				+      if (right  != -1 && (rank+phase)%2) 
			
 
				+         RCCE_recv((char *)(&FR(0,0,0,kend+1)),in*jn*NCOMP*sizeof(double), right);
			
 
				+    }
			
 
				+    for (k=max(kstart,1); k<=min(NZ-2,kend); k++) for (j=1; j<jn-1; j++) 
			
 
				+    for (i=1; i<in-1; i++) 
			
 
				+    for (c=0; c<NCOMP; c++){
			
 
				+      FLUX_X(c,i,j,k) = 
			
 
				+        (3.0*FR(c,i+1,j+1,k  ) - 4.0*FR(c,i,j+1,k  ) + FR(c,i-1,j+1,k  ))/16.0 +
			
 
				+        (3.0*FR(c,i+1,j  ,k+1) - 4.0*FR(c,i,j,  k+1) + FR(c,i-1,j,  k+1))/16.0 +
			
 
				+        (3.0*FR(c,i+1,j+1,k+1) - 4.0*FR(c,i,j+1,k+1) + FR(c,i-1,j+1,k+1))/32.0 +
			
 
				+        (3.0*FR(c,i+1,j-1,k  ) - 4.0*FR(c,i,j-1,k  ) + FR(c,i-1,j-1,k  ))/16.0 +
			
 
				+        (3.0*FR(c,i+1,j  ,k-1) - 4.0*FR(c,i,j,  k-1) + FR(c,i-1,j,  k-1))/16.0 +
			
 
				+        (3.0*FR(c,i+1,j-1,k-1) - 4.0*FR(c,i,j-1,k-1) + FR(c,i-1,j-1,k-1))/32.0 +
			
 
				+        (3.0*FR(c,i+1,j-1,k+1) - 4.0*FR(c,i,j-1,k+1) + FR(c,i-1,j-1,k+1))/32.0 +
			
 
				+        (3.0*FR(c,i+1,j+1,k-1) - 4.0*FR(c,i,j+1,k-1) + FR(c,i-1,j+1,k-1))/32.0 +
			
 
				+        (3.0*FR(c,i+1,j  ,k  ) - 4.0*FR(c,i,j,  k  ) + FR(c,i-1,j,  k  ))/8.0;
			
 
				+  
			
 
				+      FLUX_Y(c,i,j,k) = 
			
 
				+        (3.0*FR(c,i+1,j+1,k  ) - 4.0*FR(c,i+1,j,k  ) + FR(c,i+1,j-1,k  ))/16.0 +
			
 
				+        (3.0*FR(c,i  ,j+1,k+1) - 4.0*FR(c,i  ,j,k+1) + FR(c,i  ,j-1,k+1))/16.0 +
			
 
				+        (3.0*FR(c,i+1,j+1,k+1) - 4.0*FR(c,i+1,j,k+1) + FR(c,i+1,j-1,k+1))/32.0 +
			
 
				+        (3.0*FR(c,i-1,j+1,k  ) - 4.0*FR(c,i-1,j,k  ) + FR(c,i-1,j-1,k  ))/16.0 +
			
 
				+        (3.0*FR(c,i  ,j+1,k-1) - 4.0*FR(c,i  ,j,k-1) + FR(c,i  ,j-1,k-1))/16.0 +
			
 
				+        (3.0*FR(c,i-1,j+1,k-1) - 4.0*FR(c,i-1,j,k-1) + FR(c,i-1,j-1,k-1))/32.0 +
			
 
				+        (3.0*FR(c,i-1,j+1,k+1) - 4.0*FR(c,i-1,j,k+1) + FR(c,i-1,j-1,k+1))/32.0 +
			
 
				+        (3.0*FR(c,i+1,j+1,k-1) - 4.0*FR(c,i+1,j,k-1) + FR(c,i+1,j-1,k-1))/32.0 +
			
 
				+        (3.0*FR(c,i  ,j+1,k  ) - 4.0*FR(c,i  ,j,k  ) + FR(c,i  ,j-1,k  ))/8.0;
			
 
				+  
			
 
				+      FLUX_Y(c,i,j,k) = 
			
 
				+        (3.0*FR(c,i+1,j  ,k+1) - 4.0*FR(c,i+1,j  ,k) + FR(c,i+1,j  ,k-1))/16.0 +
			
 
				+        (3.0*FR(c,i  ,j+1,k+1) - 4.0*FR(c,i  ,j+1,k) + FR(c,i  ,j+1,k-1))/16.0 +
			
 
				+        (3.0*FR(c,i+1,j+1,k+1) - 4.0*FR(c,i+1,j+1,k) + FR(c,i+1,j+1,k-1))/32.0 +
			
 
				+        (3.0*FR(c,i-1,j  ,k+1) - 4.0*FR(c,i-1,j  ,k) + FR(c,i-1,j  ,k-1))/16.0 +
			
 
				+        (3.0*FR(c,i  ,j-1,k+1) - 4.0*FR(c,i  ,j-1,k) + FR(c,i  ,j-1,k-1))/16.0 +
			
 
				+        (3.0*FR(c,i-1,j-1,k+1) - 4.0*FR(c,i-1,j-1,k) + FR(c,i-1,j-1,k-1))/32.0 +
			
 
				+        (3.0*FR(c,i-1,j+1,k+1) - 4.0*FR(c,i-1,j+1,k) + FR(c,i-1,j+1,k-1))/32.0 +
			
 
				+        (3.0*FR(c,i+1,j-1,k+1) - 4.0*FR(c,i+1,j-1,k) + FR(c,i+1,j-1,k-1))/32.0 +
			
 
				+        (3.0*FR(c,i  ,j  ,k+1) - 4.0*FR(c,i  ,j  ,k) + FR(c,i  ,j  ,k-1))/8.0;
			
 
				+  
			
 
				+      FR(c,i,j,k) += dt*(
			
 
				+         -1.0*(vx*FLUX_X(c,i,j,k) + vy*FLUX_Y(c,i,j,k) + vz*FLUX_Z(c,i,j,k)) + 
			
 
				+              FR(c,i+1,j,k) -2.0*FR(c,i,j,k) + FR(c,i-1,j,k) +
			
 
				+              FR(c,i,j+1,k) -2.0*FR(c,i,j,k) + FR(c,i,j-1,k) +
			
 
				+              FR(c,i,j,k+1) -2.0*FR(c,i,j,k) + FR(c,i,j,k-1));
			
 
				+    }
			
 
				+  }
			
 
				+  return;
			
 
				+}
			
 
				+ 
			
 
				+int RCCE_new_work_item(void *work_item, QUEUE_PARMS *wq_pars) {
			
 
				+  WORK_ITEM *wi = (WORK_ITEM *)work_item;
			
 
				+  wi->dynamic_part.seq_number = (wi->dynamic_part.seq_number+1)%(wi->npx*wi->npy);
			
 
				+  return(RCCE_SUCCESS);
			
 
				+}
			
--- a/RCCE_V2.0/apps/ECOQ/RCCE_pwr_wq.h
+++ b/RCCE_V2.0/apps/ECOQ/RCCE_pwr_wq.h
@@ -0,0 +1,23 @@
 
				+typedef struct {
			
 
				+  int NP;
			
 
				+  int ID;
			
 
				+  int master;
			
 
				+  int team_lead;
			
 
				+  int local_rank;
			
 
				+  int team_size;
			
 
				+  int team_member[RCCE_MAXNP];
			
 
				+  int master_list[RCCE_MAXNP];
			
 
				+  int master_number;
			
 
				+} QUEUE_PARMS;
			
 
				+ 
			
 
				+int RCCE_execute_work_item(void *, QUEUE_PARMS *);
			
 
				+int RCCE_setup_work_queue_teams(QUEUE_PARMS *);
			
 
				+int RCCE_queue_master_loop(void *, QUEUE_PARMS *);
			
 
				+int RCCE_new_work_item(void *, QUEUE_PARMS *);
			
 
				+int RCCE_queue_member_loop(void *, QUEUE_PARMS *);
			
 
				+int RCCE_WI_size(void *);
			
 
				+void *RCCE_WI_address(void *);
			
 
				+ 
			
 
				+#ifdef OPENMP_
			
 
				+#pragma omp threadprivate(power_change)
			
 
				+#endif
			
--- a/RCCE_V2.0/apps/ECOQ/RCCE_pwr_wq_framework.c
+++ b/RCCE_V2.0/apps/ECOQ/RCCE_pwr_wq_framework.c
@@ -0,0 +1,156 @@
 
				+#include "RCCE.h"
			
 
				+#include "RCCE_pwr_wq.h"
			
 
				+#include <stdio.h>
			
 
				+ 
			
 
				+int RCCE_WI_valid(void *);
			
 
				+int  RCCE_qsort(char *, size_t, size_t, int (*)(const void*, const void*));
			
 
				+/* comparison function used in routine to sort core IDs                  */
			
 
				+int id_compare(const void *e1, const void *e2);
			
 
				+ 
			
 
				+int RCCE_setup_work_queue_teams(QUEUE_PARMS *wq_pars){ 
			
 
				+ 
			
 
				+  int NP, ID, ue, size, mem, master, team_lead, team_size, local_rank;
			
 
				+  int test, isleader;
			
 
				+  int *team_member, *master_list;
			
 
				+ 
			
 
				+  NP = wq_pars->NP = RCCE_num_ues();
			
 
				+  ID = wq_pars->ID = RCCE_ue();
			
 
				+  team_member = wq_pars->team_member;
			
 
				+  master_list = wq_pars->master_list;
			
 
				+ 
			
 
				+/* determine the number of UEs in the local power domain and form teams         */
			
 
				+  wq_pars->team_size = team_size = RCCE_power_domain_size();
			
 
				+  wq_pars->team_lead = team_lead = RCCE_power_domain_master();
			
 
				+  if (team_lead == ID) {
			
 
				+    /* the team lead is the first team member                                   */
			
 
				+    team_member[0] = team_lead;
			
 
				+    size = 1;
			
 
				+    /* the team leads collects IDs from its team members ...                    */
			
 
				+    while (size<team_size) for (ue=0; ue<NP; ue++) if (ue != team_lead) {
			
 
				+      RCCE_recv_test((char *)(&(team_member[size])), sizeof(int), ue, &test);
			
 
				+      if (test) team_member[size++] = ue;
			
 
				+    }
			
 
				+    /* ... and sends the list to all other team members, after sorting it       */
			
 
				+    RCCE_qsort((char *)team_member, team_size, sizeof(int), id_compare);
			
 
				+    for (ue=1; ue<team_size; ue++) 
			
 
				+      RCCE_send((char *)team_member, team_size*sizeof(int), team_member[ue]);
			
 
				+  }
			
 
				+  else {
			
 
				+    /* team members check in with the team lead ...                             */
			
 
				+    RCCE_send((char *)(&ID), sizeof(int), team_lead);
			
 
				+    /* ... and receive the complete list of team members                        */
			
 
				+    RCCE_recv((char *)team_member, team_size*sizeof(int), team_lead);
			
 
				+  }
			
 
				+ 
			
 
				+  /* we assign the UE with the highest rank the role of master. We know that
			
 
				+     this UE is either in a power domain by itself, or there is another UE  
			
 
				+     in the same power domain who is the power domain master, because the
			
 
				+     power domain master is always the UE in that domain with the lowest rank   */
			
 
				+  master = wq_pars->master = NP-1;
			
 
				+ 
			
 
				+/* the team containing the overall master must remove it from its member list   */
			
 
				+  if (team_member[team_size-1] == master) wq_pars->team_size = --team_size;
			
 
				+ 
			
 
				+  /* the overall master is not in any team                                      */
			
 
				+  if (ID==master) team_size = wq_pars->team_size = 0;
			
 
				+ 
			
 
				+/* each UE determines its rank within the team                                  */
			
 
				+  local_rank = wq_pars->local_rank = 0;
			
 
				+  for (ue=0; ue<team_size; ue++) if (ID==team_member[ue]) 
			
 
				+    local_rank = wq_pars->local_rank = ue;
			
 
				+ 
			
 
				+/* this code determines number of power domain leads, plus list of UEs          */
			
 
				+  if (ID == master) {
			
 
				+    wq_pars->master_number = 0;
			
 
				+    for (int ue=0; ue<RCCE_num_ues()-1; ue++) {
			
 
				+      /* ask each core whether it is a team lead or not                         */
			
 
				+      RCCE_recv((char *)(&isleader), sizeof(int), ue);
			
 
				+      if (isleader) {
			
 
				+        master_list[wq_pars->master_number] = ue;
			
 
				+        (wq_pars->master_number)++;
			
 
				+      }
			
 
				+    }
			
 
				+  }
			
 
				+  else {
			
 
				+    /* all cores let the master know their team lead status                     */
			
 
				+    isleader = (ID == team_lead);
			
 
				+    RCCE_send((char *)(&isleader), sizeof(int), master);
			
 
				+  }
			
 
				+ 
			
 
				+/* all UEs report their team size and memberships                               */
			
 
				+//  for (ue=0; ue<NP; ue++) {
			
 
				+//    RCCE_barrier(&RCCE_COMM_WORLD);
			
 
				+//    if (ID==ue) {
			
 
				+//      printf("UE %d (%d) is in a team with %d members: ", ID, 
			
 
				+//             local_rank, team_size);
			
 
				+//      for (mem=0; mem<team_size; mem++) printf("%d ", team_member[mem]);
			
 
				+//      printf("\n");
			
 
				+//    }
			
 
				+//  }
			
 
				+  return (RCCE_SUCCESS);
			
 
				+}
			
 
				+ 
			
 
				+int RCCE_queue_master_loop(void *work_item, QUEUE_PARMS *wq_pars){
			
 
				+ 
			
 
				+  int ue, ignore, test, count;
			
 
				+ 
			
 
				+  int size = RCCE_WI_size(work_item);
			
 
				+  void *address = RCCE_WI_address(work_item);
			
 
				+  count = 0; 
			
 
				+ 
			
 
				+  if (RCCE_WI_valid(work_item)) {
			
 
				+ 
			
 
				+    /* service work requests from any UE; first come, first served                */
			
 
				+    for (ue=0; ue<wq_pars->master_number; ue++) {
			
 
				+      RCCE_recv_test((char *)(&ignore), sizeof(int), wq_pars->master_list[ue], &test);
			
 
				+      if (test) {
			
 
				+//        printf("Master sends work to UE %d\n", wq_pars->master_list[ue]);
			
 
				+        RCCE_send((char *)address, size, wq_pars->master_list[ue]);
			
 
				+        count++;
			
 
				+        /* generate the next work item                                            */
			
 
				+        RCCE_new_work_item(work_item, wq_pars);
			
 
				+      }
			
 
				+    }
			
 
				+  }
			
 
				+  else {
			
 
				+    /*  this loop ends all teams, so must insist each team checks in              */
			
 
				+    for (ue=0; ue<wq_pars->master_number; ue++) {
			
 
				+      RCCE_recv((char *)(&ignore), sizeof(int), wq_pars->master_list[ue]);
			
 
				+//      printf("Master sends end of work message to UE %d\n", ue);
			
 
				+      RCCE_send((char *)address, size,  wq_pars->master_list[ue]);
			
 
				+    }
			
 
				+  }
			
 
				+    
			
 
				+  return(count);
			
 
				+}
			
 
				+ 
			
 
				+int RCCE_queue_member_loop(void *work_item, QUEUE_PARMS *wq_pars) {
			
 
				+ 
			
 
				+  int gimme_work, mem;
			
 
				+  int size = RCCE_WI_size(work_item);
			
 
				+  void *address = RCCE_WI_address(work_item);
			
 
				+ 
			
 
				+  /* ask for work if I am a team lead                                          */
			
 
				+  if (wq_pars->ID == wq_pars->team_lead) {
			
 
				+    RCCE_send((char *)(&gimme_work), sizeof(int), wq_pars->master);
			
 
				+    RCCE_recv((char *)address, size, wq_pars->master);
			
 
				+    /* team leads parcel out the work to the workers */
			
 
				+    for (mem=1; mem<(wq_pars->team_size); mem++) {
			
 
				+        printf("Team lead %d sends work to UE %d\n", RCCE_ue(), wq_pars->team_member[mem]);
			
 
				+        fflush(0);
			
 
				+      RCCE_send((char *)address, size, wq_pars->team_member[mem]);
			
 
				+    }
			
 
				+  }
			
 
				+  else {
			
 
				+    RCCE_recv((char *)address, size, wq_pars->team_lead);
			
 
				+  }
			
 
				+  if (RCCE_WI_valid(work_item)) {
			
 
				+    RCCE_execute_work_item(work_item, wq_pars);
			
 
				+//    printf("UE %d executed work item\n", wq_pars->ID);
			
 
				+  }
			
 
				+  else {
			
 
				+//    printf("UE %d received stop queue task\n", RCCE_ue());
			
 
				+    return(1);
			
 
				+  }
			
 
				+  return(RCCE_SUCCESS);
			
 
				+}
			
--- a/RCCE_V2.0/apps/ECOQ/README
+++ b/RCCE_V2.0/apps/ECOQ/README
@@ -0,0 +1,2 @@
 
				+Please note that ECOQ is still under development and may be unstable.
			
 
				+
			
--- a/RCCE_V2.0/apps/FLUSH/.svn/all-wcprops
+++ b/RCCE_V2.0/apps/FLUSH/.svn/all-wcprops
@@ -0,0 +1,17 @@
 
				+K 25
			
 
				+svn:wc:ra_dav:version-url
			
 
				+V 54
			
 
				+/svn/repository/!svn/ver/297/tags/RCCE_V2.0/apps/FLUSH
			
 
				+END
			
 
				+Makefile
			
 
				+K 25
			
 
				+svn:wc:ra_dav:version-url
			
 
				+V 63
			
 
				+/svn/repository/!svn/ver/297/tags/RCCE_V2.0/apps/FLUSH/Makefile
			
 
				+END
			
 
				+RCCE_test_cacheable.c
			
 
				+K 25
			
 
				+svn:wc:ra_dav:version-url
			
 
				+V 76
			
 
				+/svn/repository/!svn/ver/297/tags/RCCE_V2.0/apps/FLUSH/RCCE_test_cacheable.c
			
 
				+END
			
--- a/RCCE_V2.0/apps/FLUSH/.svn/entries
+++ b/RCCE_V2.0/apps/FLUSH/.svn/entries
@@ -0,0 +1,96 @@
 
				+10
			
 
				+
			
 
				+dir
			
 
				+313
			
 
				+http://marcbug.scc-dc.com/svn/repository/tags/RCCE_V2.0/apps/FLUSH
			
 
				+http://marcbug.scc-dc.com/svn/repository
			
 
				+
			
 
				+
			
 
				+
			
 
				+2011-04-11T21:00:28.037293Z
			
 
				+188
			
 
				+tekubasx
			
 
				+
			
 
				+
			
 
				+
			
 
				+
			
 
				+
			
 
				+
			
 
				+
			
 
				+
			
 
				+
			
 
				+
			
 
				+
			
 
				+
			
 
				+
			
 
				+
			
 
				+c924d837-3317-4ba4-8fbd-5f2da8699d51
			
 
				+
			
 
				+RCCE_test_cacheable.c
			
 
				+file
			
 
				+
			
 
				+
			
 
				+
			
 
				+
			
 
				+2012-10-27T13:42:38.864598Z
			
 
				+86ae06d5fc3ecbb2cd5cdcd377528404
			
 
				+2011-02-23T19:51:16.745747Z
			
 
				+161
			
 
				+tekubasx
			
 
				+
			
 
				+
			
 
				+
			
 
				+
			
 
				+
			
 
				+
			
 
				+
			
 
				+
			
 
				+
			
 
				+
			
 
				+
			
 
				+
			
 
				+
			
 
				+
			
 
				+
			
 
				+
			
 
				+
			
 
				+
			
 
				+
			
 
				+
			
 
				+
			
 
				+2723
			
 
				+
			
 
				+Makefile
			
 
				+file
			
 
				+
			
 
				+
			
 
				+
			
 
				+
			
 
				+2012-10-27T13:42:38.864598Z
			
 
				+983fdf6020504a7d448bba54f62689e4
			
 
				+2011-04-11T21:00:28.037293Z
			
 
				+188
			
 
				+tekubasx
			
 
				+
			
 
				+
			
 
				+
			
 
				+
			
 
				+
			
 
				+
			
 
				+
			
 
				+
			
 
				+
			
 
				+
			
 
				+
			
 
				+
			
 
				+
			
 
				+
			
 
				+
			
 
				+
			
 
				+
			
 
				+
			
 
				+
			
 
				+
			
 
				+
			
 
				+440
			
 
				+
			
--- a/RCCE_V2.0/apps/FLUSH/.svn/text-base/Makefile.svn-base
+++ b/RCCE_V2.0/apps/FLUSH/.svn/text-base/Makefile.svn-base
@@ -0,0 +1,19 @@
 
				+SHELL=sh
			
 
				+RCCEROOT=../..
			
 
				+include $(RCCEROOT)/common/symbols
			
 
				+
			
 
				+CACHEABLEOBJS=RCCE_test_cacheable.o  $(ARCHIVE)
			
 
				+
			
 
				+default:
			
 
				+	@echo "Usage: make test_cacheable "
			
 
				+	@echo "       make clean"
			
 
				+
			
 
				+test_cacheable: $(CACHEABLEOBJS)
			
 
				+	$(CCOMPILE) -o test_cacheable $(CACHEABLEOBJS) $(CFLAGS)
			
 
				+
			
 
				+
			
 
				+RCCE_test_cacheable.o: RCCE_test_cacheable.c $(RCCEINCLUDE)/RCCE.h Makefile
			
 
				+	$(CCOMPILE) -c $(CFLAGS) RCCE_test_cacheable.c  
			
 
				+
			
 
				+clean:
			
 
				+	@ rm -f *.o  test_cacheable 
			
--- a/RCCE_V2.0/apps/FLUSH/.svn/text-base/RCCE_test_cacheable.c.svn-base
+++ b/RCCE_V2.0/apps/FLUSH/.svn/text-base/RCCE_test_cacheable.c.svn-base
@@ -0,0 +1,90 @@
 
				+#include <string.h>
			
 
				+#include <stdio.h>
			
 
				+#include "RCCE.h"
			
 
				+
			
 
				+int RCCE_APP(int argc, char **argv){
			
 
				+  int iam, bufsize=1024*64, size, i, receiver, sender,
			
 
				+    count_receiver, count_sender, count1, count2;
			
 
				+  volatile int *buffer;
			
 
				+
			
 
				+  RCCE_init(&argc, &argv);
			
 
				+
			
 
				+  iam      = RCCE_ue();
			
 
				+  receiver =1;
			
 
				+  sender   =0;
			
 
				+  size   = bufsize*sizeof(int);
			
 
				+  buffer = (int *) RCCE_shmalloc(size);
			
 
				+  count_receiver = count_sender = 0;
			
 
				+
			
 
				+/**********************************************************
			
 
				+The sender initializes its data.
			
 
				+Now this is shared data so value is "seen" by both cores.
			
 
				+The receiver flushes its cache.
			
 
				+***********************************************************/
			
 
				+     if(iam==sender) {
			
 
				+        for(i=0;i<bufsize; i++) { buffer[i]=1; }
			
 
				+     }
			
 
				+
			
 
				+     if(iam==receiver) {RCCE_DCMflush();}
			
 
				+  RCCE_barrier(&RCCE_COMM_WORLD);
			
 
				+
			
 
				+/**********************************************************
			
 
				+The sender reads its data. 
			
 
				+It reads by creating count_sender. 
			
 
				+count_sender (on the sender) is 64K = 65536.
			
 
				+count_sender (on the receiver) is 0.
			
 
				+
			
 
				+The sender modifies its data.
			
 
				+Now these data are in the sender's cache. So the data may not be seen by the receiver.
			
 
				+It might be seen by the receiver. We have no control when data from the cache are evicted.
			
 
				+
			
 
				+The sender flushes its cache 
			
 
				+This guarantees that the receiver sees the data from the sender.
			
 
				+***********************************************************/
			
 
				+     if(iam==sender) {
			
 
				+        for(i=0;i<bufsize; i++) {
			
 
				+           count_sender +=buffer[i];
			
 
				+           buffer[i]++;
			
 
				+        }
			
 
				+        RCCE_DCMflush();
			
 
				+     }
			
 
				+  RCCE_barrier(&RCCE_COMM_WORLD);
			
 
				+
			
 
				+
			
 
				+/**********************************************************
			
 
				+The receiver reads the data.
			
 
				+It should see the data from  the sender.
			
 
				+count_receiver (on the receiver) should be 128K= 131072
			
 
				+count_receiver (on the sender is 0).
			
 
				+***********************************************************/
			
 
				+     if(iam==receiver) {
			
 
				+        for(i=0;i<bufsize; i++) { 
			
 
				+           count_receiver +=buffer[i]; 
			
 
				+        }
			
 
				+     }
			
 
				+  RCCE_barrier(&RCCE_COMM_WORLD);
			
 
				+
			
 
				+/**********************************************************
			
 
				+count1 and count 2 are on both cores.
			
 
				+
			
 
				+count2 contains the number of buffer entries that are 2 (which
			
 
				+should be all of them). So count2 should be 64K.
			
 
				+
			
 
				+count1 should be 0.
			
 
				+
			
 
				+***********************************************************/
			
 
				+
			
 
				+     count1= count2= 0;
			
 
				+     for(i=0;i<bufsize; i++) {
			
 
				+        if(buffer[i]==2) count2++;
			
 
				+        if(buffer[i]==1) count1++;
			
 
				+     }
			
 
				+
			
 
				+     printf("LINE %d: Core %d: count_sender: %d  count_receiver: %d  count1: %d   count2: %d\n",
			
 
				+       __LINE__,iam,count_sender, count_receiver,count1,count2);
			
 
				+
			
 
				+  RCCE_barrier(&RCCE_COMM_WORLD);
			
 
				+     RCCE_shfree((t_vcharp)buffer);
			
 
				+     RCCE_finalize();
			
 
				+     return(0);
			
 
				+}
			
--- a/RCCE_V2.0/apps/FLUSH/Makefile
+++ b/RCCE_V2.0/apps/FLUSH/Makefile
@@ -0,0 +1,19 @@
 
				+SHELL=sh
			
 
				+RCCEROOT=../..
			
 
				+include $(RCCEROOT)/common/symbols
			
 
				+
			
 
				+CACHEABLEOBJS=RCCE_test_cacheable.o  $(ARCHIVE)
			
 
				+
			
 
				+default:
			
 
				+	@echo "Usage: make test_cacheable "
			
 
				+	@echo "       make clean"
			
 
				+
			
 
				+test_cacheable: $(CACHEABLEOBJS)
			
 
				+	$(CCOMPILE) -o test_cacheable $(CACHEABLEOBJS) $(CFLAGS)
			
 
				+
			
 
				+
			
 
				+RCCE_test_cacheable.o: RCCE_test_cacheable.c $(RCCEINCLUDE)/RCCE.h Makefile
			
 
				+	$(CCOMPILE) -c $(CFLAGS) RCCE_test_cacheable.c  
			
 
				+
			
 
				+clean:
			
 
				+	@ rm -f *.o  test_cacheable 
			
--- a/RCCE_V2.0/apps/FLUSH/RCCE_test_cacheable.c
+++ b/RCCE_V2.0/apps/FLUSH/RCCE_test_cacheable.c
@@ -0,0 +1,90 @@
 
				+#include <string.h>
			
 
				+#include <stdio.h>
			
 
				+#include "RCCE.h"
			
 
				+
			
 
				+int RCCE_APP(int argc, char **argv){
			
 
				+  int iam, bufsize=1024*64, size, i, receiver, sender,
			
 
				+    count_receiver, count_sender, count1, count2;
			
 
				+  volatile int *buffer;
			
 
				+
			
 
				+  RCCE_init(&argc, &argv);
			
 
				+
			
 
				+  iam      = RCCE_ue();
			
 
				+  receiver =1;
			
 
				+  sender   =0;
			
 
				+  size   = bufsize*sizeof(int);
			
 
				+  buffer = (int *) RCCE_shmalloc(size);
			
 
				+  count_receiver = count_sender = 0;
			
 
				+
			
 
				+/**********************************************************
			
 
				+The sender initializes its data.
			
 
				+Now this is shared data so value is "seen" by both cores.
			
 
				+The receiver flushes its cache.
			
 
				+***********************************************************/
			
 
				+     if(iam==sender) {
			
 
				+        for(i=0;i<bufsize; i++) { buffer[i]=1; }
			
 
				+     }
			
 
				+
			
 
				+     if(iam==receiver) {RCCE_DCMflush();}
			
 
				+  RCCE_barrier(&RCCE_COMM_WORLD);
			
 
				+
			
 
				+/**********************************************************
			
 
				+The sender reads its data. 
			
 
				+It reads by creating count_sender. 
			
 
				+count_sender (on the sender) is 64K = 65536.
			
 
				+count_sender (on the receiver) is 0.
			
 
				+
			
 
				+The sender modifies its data.
			
 
				+Now these data are in the sender's cache. So the data may not be seen by the receiver.
			
 
				+It might be seen by the receiver. We have no control when data from the cache are evicted.
			
 
				+
			
 
				+The sender flushes its cache 
			
 
				+This guarantees that the receiver sees the data from the sender.
			
 
				+***********************************************************/
			
 
				+     if(iam==sender) {
			
 
				+        for(i=0;i<bufsize; i++) {
			
 
				+           count_sender +=buffer[i];
			
 
				+           buffer[i]++;
			
 
				+        }
			
 
				+        RCCE_DCMflush();
			
 
				+     }
			
 
				+  RCCE_barrier(&RCCE_COMM_WORLD);
			
 
				+
			
 
				+
			
 
				+/**********************************************************
			
 
				+The receiver reads the data.
			
 
				+It should see the data from  the sender.
			
 
				+count_receiver (on the receiver) should be 128K= 131072
			
 
				+count_receiver (on the sender is 0).
			
 
				+***********************************************************/
			
 
				+     if(iam==receiver) {
			
 
				+        for(i=0;i<bufsize; i++) { 
			
 
				+           count_receiver +=buffer[i]; 
			
 
				+        }
			
 
				+     }
			
 
				+  RCCE_barrier(&RCCE_COMM_WORLD);
			
 
				+
			
 
				+/**********************************************************
			
 
				+count1 and count 2 are on both cores.
			
 
				+
			
 
				+count2 contains the number of buffer entries that are 2 (which
			
 
				+should be all of them). So count2 should be 64K.
			
 
				+
			
 
				+count1 should be 0.
			
 
				+
			
 
				+***********************************************************/
			
 
				+
			
 
				+     count1= count2= 0;
			
 
				+     for(i=0;i<bufsize; i++) {
			
 
				+        if(buffer[i]==2) count2++;
			
 
				+        if(buffer[i]==1) count1++;
			
 
				+     }
			
 
				+
			
 
				+     printf("LINE %d: Core %d: count_sender: %d  count_receiver: %d  count1: %d   count2: %d\n",
			
 
				+       __LINE__,iam,count_sender, count_receiver,count1,count2);
			
 
				+
			
 
				+  RCCE_barrier(&RCCE_COMM_WORLD);
			
 
				+     RCCE_shfree((t_vcharp)buffer);
			
 
				+     RCCE_finalize();
			
 
				+     return(0);
			
 
				+}
			
--- a/RCCE_V2.0/apps/HELLO/.svn/all-wcprops
+++ b/RCCE_V2.0/apps/HELLO/.svn/all-wcprops
@@ -0,0 +1,17 @@
 
				+K 25
			
 
				+svn:wc:ra_dav:version-url
			
 
				+V 54
			
 
				+/svn/repository/!svn/ver/297/tags/RCCE_V2.0/apps/HELLO
			
 
				+END
			
 
				+Makefile
			
 
				+K 25
			
 
				+svn:wc:ra_dav:version-url
			
 
				+V 63
			
 
				+/svn/repository/!svn/ver/297/tags/RCCE_V2.0/apps/HELLO/Makefile
			
 
				+END
			
 
				+RCCE_hello.c
			
 
				+K 25
			
 
				+svn:wc:ra_dav:version-url
			
 
				+V 67
			
 
				+/svn/repository/!svn/ver/297/tags/RCCE_V2.0/apps/HELLO/RCCE_hello.c
			
 
				+END
			
--- a/RCCE_V2.0/apps/HELLO/.svn/entries
+++ b/RCCE_V2.0/apps/HELLO/.svn/entries
@@ -0,0 +1,96 @@
 
				+10
			
 
				+
			
 
				+dir
			
 
				+313
			
 
				+http://marcbug.scc-dc.com/svn/repository/tags/RCCE_V2.0/apps/HELLO
			
 
				+http://marcbug.scc-dc.com/svn/repository
			
 
				+
			
 
				+
			
 
				+
			
 
				+2011-03-24T16:10:11.693391Z
			
 
				+176
			
 
				+tekubasx
			
 
				+
			
 
				+
			
 
				+
			
 
				+
			
 
				+
			
 
				+
			
 
				+
			
 
				+
			
 
				+
			
 
				+
			
 
				+
			
 
				+
			
 
				+
			
 
				+
			
 
				+c924d837-3317-4ba4-8fbd-5f2da8699d51
			
 
				+
			
 
				+RCCE_hello.c
			
 
				+file
			
 
				+
			
 
				+
			
 
				+
			
 
				+
			
 
				+2012-10-27T13:42:38.892598Z
			
 
				+3925eebd9980ffb722501bd7395812b2
			
 
				+2011-03-24T16:10:11.693391Z
			
 
				+176
			
 
				+tekubasx
			
 
				+
			
 
				+
			
 
				+
			
 
				+
			
 
				+
			
 
				+
			
 
				+
			
 
				+
			
 
				+
			
 
				+
			
 
				+
			
 
				+
			
 
				+
			
 
				+
			
 
				+
			
 
				+
			
 
				+
			
 
				+
			
 
				+
			
 
				+
			
 
				+
			
 
				+991
			
 
				+
			
 
				+Makefile
			
 
				+file
			
 
				+
			
 
				+
			
 
				+
			
 
				+
			
 
				+2012-10-27T13:42:38.892598Z
			
 
				+419fe8236fac4b64be5bf20eed6d71aa
			
 
				+2010-12-27T18:51:02.240775Z
			
 
				+131
			
 
				+tekubasx
			
 
				+
			
 
				+
			
 
				+
			
 
				+
			
 
				+
			
 
				+
			
 
				+
			
 
				+
			
 
				+
			
 
				+
			
 
				+
			
 
				+
			
 
				+
			
 
				+
			
 
				+
			
 
				+
			
 
				+
			
 
				+
			
 
				+
			
 
				+
			
 
				+
			
 
				+347
			
 
				+
			
--- a/RCCE_V2.0/apps/HELLO/.svn/text-base/Makefile.svn-base
+++ b/RCCE_V2.0/apps/HELLO/.svn/text-base/Makefile.svn-base
@@ -0,0 +1,19 @@
 
				+SHELL=sh
			
 
				+RCCEROOT=../..
			
 
				+include $(RCCEROOT)/common/symbols
			
 
				+
			
 
				+HELLOOBJS=RCCE_hello.o  $(ARCHIVE)
			
 
				+
			
 
				+default:
			
 
				+	@echo "Usage: make hello "
			
 
				+	@echo "       make clean"
			
 
				+
			
 
				+hello: $(HELLOOBJS)
			
 
				+	$(CCOMPILE) -o hello $(HELLOOBJS) $(CFLAGS)
			
 
				+
			
 
				+
			
 
				+RCCE_hello.o: RCCE_hello.c $(RCCEINCLUDE)/RCCE.h
			
 
				+	$(CCOMPILE) -c $(CFLAGS) RCCE_hello.c  
			
 
				+
			
 
				+clean:
			
 
				+	@ rm -f *.o  hello 
			
--- a/RCCE_V2.0/apps/HELLO/.svn/text-base/RCCE_hello.c.svn-base
+++ b/RCCE_V2.0/apps/HELLO/.svn/text-base/RCCE_hello.c.svn-base
@@ -0,0 +1,37 @@
 
				+// 

			
 
				+// Copyright 2010 Intel Corporation

			
 
				+// 

			
 
				+//    Licensed under the Apache License, Version 2.0 (the "License");

			
 
				+//    you may not use this file except in compliance with the License.

			
 
				+//    You may obtain a copy of the License at

			
 
				+// 

			
 
				+//        http://www.apache.org/licenses/LICENSE-2.0

			
 
				+// 

			
 
				+//    Unless required by applicable law or agreed to in writing, software

			
 
				+//    distributed under the License is distributed on an "AS IS" BASIS,

			
 
				+//    WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.

			
 
				+//    See the License for the specific language governing permissions and

			
 
				+//    limitations under the License.

			
 
				+// 

			
 
				+#include <string.h>

			
 
				+#include <stdio.h>

			
 
				+#include "RCCE.h"

			
 
				+

			
 
				+

			
 
				+int RCCE_APP(int argc, char **argv){

			
 
				+

			
 
				+  RCCE_init(&argc, &argv);

			
 
				+

			
 
				+  //  RCCE_debug_set(RCCE_DEBUG_ALL);

			
 
				+

			
 
				+#ifdef RCCE_VERSION

			
 
				+  printf("Hello from RCCE ... I am %s\n",RCCE_VERSION);

			
 
				+#else

			
 
				+  printf("Hello from RCCE \n");

			
 
				+#endif

			
 
				+

			
 
				+  RCCE_finalize();

			
 
				+

			
 
				+  return(0);

			
 
				+}

			
 
				+

			
--- a/RCCE_V2.0/apps/HELLO/Makefile
+++ b/RCCE_V2.0/apps/HELLO/Makefile
@@ -0,0 +1,19 @@
 
				+SHELL=sh
			
 
				+RCCEROOT=../..
			
 
				+include $(RCCEROOT)/common/symbols
			
 
				+
			
 
				+HELLOOBJS=RCCE_hello.o  $(ARCHIVE)
			
 
				+
			
 
				+default:
			
 
				+	@echo "Usage: make hello "
			
 
				+	@echo "       make clean"
			
 
				+
			
 
				+hello: $(HELLOOBJS)
			
 
				+	$(CCOMPILE) -o hello $(HELLOOBJS) $(CFLAGS)
			
 
				+
			
 
				+
			
 
				+RCCE_hello.o: RCCE_hello.c $(RCCEINCLUDE)/RCCE.h
			
 
				+	$(CCOMPILE) -c $(CFLAGS) RCCE_hello.c  
			
 
				+
			
 
				+clean:
			
 
				+	@ rm -f *.o  hello 
			
--- a/RCCE_V2.0/apps/HELLO/RCCE_hello.c
+++ b/RCCE_V2.0/apps/HELLO/RCCE_hello.c
@@ -0,0 +1,37 @@
 
				+// 

			
 
				+// Copyright 2010 Intel Corporation

			
 
				+// 

			
 
				+//    Licensed under the Apache License, Version 2.0 (the "License");

			
 
				+//    you may not use this file except in compliance with the License.

			
 
				+//    You may obtain a copy of the License at

			
 
				+// 

			
 
				+//        http://www.apache.org/licenses/LICENSE-2.0

			
 
				+// 

			
 
				+//    Unless required by applicable law or agreed to in writing, software

			
 
				+//    distributed under the License is distributed on an "AS IS" BASIS,

			
 
				+//    WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.

			
 
				+//    See the License for the specific language governing permissions and

			
 
				+//    limitations under the License.

			
 
				+// 

			
 
				+#include <string.h>

			
 
				+#include <stdio.h>

			
 
				+#include "RCCE.h"

			
 
				+

			
 
				+

			
 
				+int RCCE_APP(int argc, char **argv){

			
 
				+

			
 
				+  RCCE_init(&argc, &argv);

			
 
				+

			
 
				+  //  RCCE_debug_set(RCCE_DEBUG_ALL);

			
 
				+

			
 
				+#ifdef RCCE_VERSION

			
 
				+  printf("Hello from RCCE ... I am %s\n",RCCE_VERSION);

			
 
				+#else

			
 
				+  printf("Hello from RCCE \n");

			
 
				+#endif

			
 
				+

			
 
				+  RCCE_finalize();

			
 
				+

			
 
				+  return(0);

			
 
				+}

			
 
				+

			
--- a/RCCE_V2.0/apps/HELLO/RCCE_hello.o
+++ b/RCCE_V2.0/apps/HELLO/RCCE_hello.o
--- a/RCCE_V2.0/apps/NPB/.svn/all-wcprops
+++ b/RCCE_V2.0/apps/NPB/.svn/all-wcprops
@@ -0,0 +1,11 @@
 
				+K 25
			
 
				+svn:wc:ra_dav:version-url
			
 
				+V 52
			
 
				+/svn/repository/!svn/ver/297/tags/RCCE_V2.0/apps/NPB
			
 
				+END
			
 
				+Makefile
			
 
				+K 25
			
 
				+svn:wc:ra_dav:version-url
			
 
				+V 61
			
 
				+/svn/repository/!svn/ver/297/tags/RCCE_V2.0/apps/NPB/Makefile
			
 
				+END
			
--- a/RCCE_V2.0/apps/NPB/.svn/entries
+++ b/RCCE_V2.0/apps/NPB/.svn/entries
@@ -0,0 +1,77 @@
 
				+10
			
 
				+
			
 
				+dir
			
 
				+313
			
 
				+http://marcbug.scc-dc.com/svn/repository/tags/RCCE_V2.0/apps/NPB
			
 
				+http://marcbug.scc-dc.com/svn/repository
			
 
				+
			
 
				+
			
 
				+
			
 
				+2010-12-27T18:19:08.586526Z
			
 
				+126
			
 
				+tekubasx
			
 
				+
			
 
				+
			
 
				+
			
 
				+
			
 
				+
			
 
				+
			
 
				+
			
 
				+
			
 
				+
			
 
				+
			
 
				+
			
 
				+
			
 
				+
			
 
				+
			
 
				+c924d837-3317-4ba4-8fbd-5f2da8699d51
			
 
				+
			
 
				+LU
			
 
				+dir
			
 
				+
			
 
				+BT
			
 
				+dir
			
 
				+
			
 
				+config
			
 
				+dir
			
 
				+
			
 
				+common
			
 
				+dir
			
 
				+
			
 
				+Makefile
			
 
				+file
			
 
				+
			
 
				+
			
 
				+
			
 
				+
			
 
				+2012-10-27T13:42:38.860598Z
			
 
				+8b1616489d56e77d35e80c21073c8ac7
			
 
				+2010-06-25T23:28:47.346002Z
			
 
				+7
			
 
				+tekubasx
			
 
				+
			
 
				+
			
 
				+
			
 
				+
			
 
				+
			
 
				+
			
 
				+
			
 
				+
			
 
				+
			
 
				+
			
 
				+
			
 
				+
			
 
				+
			
 
				+
			
 
				+
			
 
				+
			
 
				+
			
 
				+
			
 
				+
			
 
				+
			
 
				+
			
 
				+1430
			
 
				+
			
 
				+sys
			
 
				+dir
			
 
				+
			
--- a/RCCE_V2.0/apps/NPB/.svn/text-base/Makefile.svn-base
+++ b/RCCE_V2.0/apps/NPB/.svn/text-base/Makefile.svn-base
@@ -0,0 +1,50 @@
 
				+SHELL=sh
			
 
				+CLASS=U
			
 
				+NPROCS=1
			
 
				+
			
 
				+RCCEROOT=../..
			
 
				+include $(RCCEROOT)/common/symbols
			
 
				+
			
 
				+default: header
			
 
				+	@ sys/print_instructions
			
 
				+
			
 
				+bt: clean header
			
 
				+	cd sys; make
			
 
				+	cd BT; $(MAKE) \
			
 
				+               CFLAGS="$(CFLAGS)"           \
			
 
				+               NPROCS=$(NPROCS)             \
			
 
				+               CLASS=$(CLASS)               \
			
 
				+               CCOMPILE=$(CCOMPILE)         \
			
 
				+               RCCEROOT=$(RCCEROOT)         \
			
 
				+               RCCEINCLUDE=$(RCCEINCLUDE)   \
			
 
				+               RCK_LIB_SRC=$(RCK_LIB_SRC)   \
			
 
				+               RCCE_LIB_SRC=$(RCCE_LIB_SRC) \
			
 
				+               ARCHIVE=$(ARCHIVE)
			
 
				+               
			
 
				+lu: clean header
			
 
				+	cd sys; make
			
 
				+	cd LU; $(MAKE) \
			
 
				+               CFLAGS="$(CFLAGS)"           \
			
 
				+               NPROCS=$(NPROCS)             \
			
 
				+               CLASS=$(CLASS)               \
			
 
				+               CCOMPILE=$(CCOMPILE)         \
			
 
				+               RCCEROOT=$(RCCEROOT)         \
			
 
				+               RCCEINCLUDE=$(RCCEINCLUDE)   \
			
 
				+               RCK_LIB_SRC=$(RCK_LIB_SRC)   \
			
 
				+               RCCE_LIB_SRC=$(RCCE_LIB_SRC) \
			
 
				+               ARCHIVE=$(ARCHIVE)
			
 
				+               
			
 
				+
			
 
				+
			
 
				+# It would be nice to make clean in each subdirectory (the targets
			
 
				+# are defined) but on a really clean system this will won't work
			
 
				+# because those makefiles need config/make.def
			
 
				+clean:
			
 
				+	- rm -f core 
			
 
				+	- rm -f *~ */core */*~ */*.o */npbparams.h */*.obj */*.exe
			
 
				+	- rm -f sys/setparams sys/makesuite sys/setparams.h
			
 
				+	- rm -f BT/bt.*.* LU/lu.*.*
			
 
				+
			
 
				+header:
			
 
				+	@ sys/print_header
			
 
				+
			
--- a/RCCE_V2.0/apps/NPB/BT/.svn/all-wcprops
+++ b/RCCE_V2.0/apps/NPB/BT/.svn/all-wcprops
@@ -0,0 +1,179 @@
 
				+K 25
			
 
				+svn:wc:ra_dav:version-url
			
 
				+V 55
			
 
				+/svn/repository/!svn/ver/297/tags/RCCE_V2.0/apps/NPB/BT
			
 
				+END
			
 
				+header.h
			
 
				+K 25
			
 
				+svn:wc:ra_dav:version-url
			
 
				+V 64
			
 
				+/svn/repository/!svn/ver/297/tags/RCCE_V2.0/apps/NPB/BT/header.h
			
 
				+END
			
 
				+adi.c
			
 
				+K 25
			
 
				+svn:wc:ra_dav:version-url
			
 
				+V 61
			
 
				+/svn/repository/!svn/ver/297/tags/RCCE_V2.0/apps/NPB/BT/adi.c
			
 
				+END
			
 
				+work_lhs.h
			
 
				+K 25
			
 
				+svn:wc:ra_dav:version-url
			
 
				+V 66
			
 
				+/svn/repository/!svn/ver/297/tags/RCCE_V2.0/apps/NPB/BT/work_lhs.h
			
 
				+END
			
 
				+exact_solution.c
			
 
				+K 25
			
 
				+svn:wc:ra_dav:version-url
			
 
				+V 72
			
 
				+/svn/repository/!svn/ver/297/tags/RCCE_V2.0/apps/NPB/BT/exact_solution.c
			
 
				+END
			
 
				+initialize.c
			
 
				+K 25
			
 
				+svn:wc:ra_dav:version-url
			
 
				+V 68
			
 
				+/svn/repository/!svn/ver/297/tags/RCCE_V2.0/apps/NPB/BT/initialize.c
			
 
				+END
			
 
				+timers.c
			
 
				+K 25
			
 
				+svn:wc:ra_dav:version-url
			
 
				+V 64
			
 
				+/svn/repository/!svn/ver/297/tags/RCCE_V2.0/apps/NPB/BT/timers.c
			
 
				+END
			
 
				+verify.c
			
 
				+K 25
			
 
				+svn:wc:ra_dav:version-url
			
 
				+V 64
			
 
				+/svn/repository/!svn/ver/297/tags/RCCE_V2.0/apps/NPB/BT/verify.c
			
 
				+END
			
 
				+bt.c
			
 
				+K 25
			
 
				+svn:wc:ra_dav:version-url
			
 
				+V 60
			
 
				+/svn/repository/!svn/ver/297/tags/RCCE_V2.0/apps/NPB/BT/bt.c
			
 
				+END
			
 
				+setup_mpi.c
			
 
				+K 25
			
 
				+svn:wc:ra_dav:version-url
			
 
				+V 67
			
 
				+/svn/repository/!svn/ver/297/tags/RCCE_V2.0/apps/NPB/BT/setup_mpi.c
			
 
				+END
			
 
				+applu_share.h
			
 
				+K 25
			
 
				+svn:wc:ra_dav:version-url
			
 
				+V 69
			
 
				+/svn/repository/!svn/ver/297/tags/RCCE_V2.0/apps/NPB/BT/applu_share.h
			
 
				+END
			
 
				+timers.h
			
 
				+K 25
			
 
				+svn:wc:ra_dav:version-url
			
 
				+V 64
			
 
				+/svn/repository/!svn/ver/297/tags/RCCE_V2.0/apps/NPB/BT/timers.h
			
 
				+END
			
 
				+copy_faces.c
			
 
				+K 25
			
 
				+svn:wc:ra_dav:version-url
			
 
				+V 68
			
 
				+/svn/repository/!svn/ver/297/tags/RCCE_V2.0/apps/NPB/BT/copy_faces.c
			
 
				+END
			
 
				+print_results.c
			
 
				+K 25
			
 
				+svn:wc:ra_dav:version-url
			
 
				+V 71
			
 
				+/svn/repository/!svn/ver/297/tags/RCCE_V2.0/apps/NPB/BT/print_results.c
			
 
				+END
			
 
				+x_solve.c
			
 
				+K 25
			
 
				+svn:wc:ra_dav:version-url
			
 
				+V 65
			
 
				+/svn/repository/!svn/ver/297/tags/RCCE_V2.0/apps/NPB/BT/x_solve.c
			
 
				+END
			
 
				+exact_rhs.c
			
 
				+K 25
			
 
				+svn:wc:ra_dav:version-url
			
 
				+V 67
			
 
				+/svn/repository/!svn/ver/297/tags/RCCE_V2.0/apps/NPB/BT/exact_rhs.c
			
 
				+END
			
 
				+y_solve.c
			
 
				+K 25
			
 
				+svn:wc:ra_dav:version-url
			
 
				+V 65
			
 
				+/svn/repository/!svn/ver/297/tags/RCCE_V2.0/apps/NPB/BT/y_solve.c
			
 
				+END
			
 
				+z_solve.c
			
 
				+K 25
			
 
				+svn:wc:ra_dav:version-url
			
 
				+V 65
			
 
				+/svn/repository/!svn/ver/297/tags/RCCE_V2.0/apps/NPB/BT/z_solve.c
			
 
				+END
			
 
				+solve_subs.c
			
 
				+K 25
			
 
				+svn:wc:ra_dav:version-url
			
 
				+V 68
			
 
				+/svn/repository/!svn/ver/297/tags/RCCE_V2.0/apps/NPB/BT/solve_subs.c
			
 
				+END
			
 
				+set_constants.c
			
 
				+K 25
			
 
				+svn:wc:ra_dav:version-url
			
 
				+V 71
			
 
				+/svn/repository/!svn/ver/297/tags/RCCE_V2.0/apps/NPB/BT/set_constants.c
			
 
				+END
			
 
				+make_set.c
			
 
				+K 25
			
 
				+svn:wc:ra_dav:version-url
			
 
				+V 66
			
 
				+/svn/repository/!svn/ver/297/tags/RCCE_V2.0/apps/NPB/BT/make_set.c
			
 
				+END
			
 
				+add.c
			
 
				+K 25
			
 
				+svn:wc:ra_dav:version-url
			
 
				+V 61
			
 
				+/svn/repository/!svn/ver/297/tags/RCCE_V2.0/apps/NPB/BT/add.c
			
 
				+END
			
 
				+error.c
			
 
				+K 25
			
 
				+svn:wc:ra_dav:version-url
			
 
				+V 63
			
 
				+/svn/repository/!svn/ver/297/tags/RCCE_V2.0/apps/NPB/BT/error.c
			
 
				+END
			
 
				+define.c
			
 
				+K 25
			
 
				+svn:wc:ra_dav:version-url
			
 
				+V 64
			
 
				+/svn/repository/!svn/ver/297/tags/RCCE_V2.0/apps/NPB/BT/define.c
			
 
				+END
			
 
				+applu_macros.h
			
 
				+K 25
			
 
				+svn:wc:ra_dav:version-url
			
 
				+V 70
			
 
				+/svn/repository/!svn/ver/297/tags/RCCE_V2.0/apps/NPB/BT/applu_macros.h
			
 
				+END
			
 
				+mpinpb.h
			
 
				+K 25
			
 
				+svn:wc:ra_dav:version-url
			
 
				+V 64
			
 
				+/svn/repository/!svn/ver/297/tags/RCCE_V2.0/apps/NPB/BT/mpinpb.h
			
 
				+END
			
 
				+inputbt.data.sample
			
 
				+K 25
			
 
				+svn:wc:ra_dav:version-url
			
 
				+V 75
			
 
				+/svn/repository/!svn/ver/297/tags/RCCE_V2.0/apps/NPB/BT/inputbt.data.sample
			
 
				+END
			
 
				+applu_protos.h
			
 
				+K 25
			
 
				+svn:wc:ra_dav:version-url
			
 
				+V 70
			
 
				+/svn/repository/!svn/ver/297/tags/RCCE_V2.0/apps/NPB/BT/applu_protos.h
			
 
				+END
			
 
				+Makefile
			
 
				+K 25
			
 
				+svn:wc:ra_dav:version-url
			
 
				+V 64
			
 
				+/svn/repository/!svn/ver/297/tags/RCCE_V2.0/apps/NPB/BT/Makefile
			
 
				+END
			
 
				+rhs.c
			
 
				+K 25
			
 
				+svn:wc:ra_dav:version-url
			
 
				+V 61
			
 
				+/svn/repository/!svn/ver/297/tags/RCCE_V2.0/apps/NPB/BT/rhs.c
			
 
				+END
			
--- a/RCCE_V2.0/apps/NPB/BT/.svn/entries
+++ b/RCCE_V2.0/apps/NPB/BT/.svn/entries
--- a/RCCE_V2.0/apps/NPB/BT/.svn/text-base/Makefile.svn-base
+++ b/RCCE_V2.0/apps/NPB/BT/.svn/text-base/Makefile.svn-base
@@ -0,0 +1,65 @@
 
				+SHELL=/bin/sh
			
 
				+BENCHMARK=bt
			
 
				+BENCHMARKU=BT
			
 
				+
			
 
				+PROGRAM  = $(BENCHMARK).$(CLASS).$(NPROCS)
			
 
				+
			
 
				+default:: ${PROGRAM}
			
 
				+
			
 
				+# This makes sure the configuration utility setparams 
			
 
				+# is up to date. 
			
 
				+# Note that this must be run every time, which is why the
			
 
				+# target does not exist and is not created. 
			
 
				+# If you create a file called "config" you will break things. 
			
 
				+config:
			
 
				+	cd ../sys; ${MAKE} all
			
 
				+	../sys/setparams ${BENCHMARK} ${NPROCS} ${CLASS}
			
 
				+
			
 
				+# Normally setparams updates npbparams.h only if the settings (CLASS/NPROCS)
			
 
				+# have changed. However, we also want to update if the compile options
			
 
				+# may have changed (set in ../config/make.def). 
			
 
				+npbparams.h: ../config/make.def
			
 
				+	@ echo make.def modified. Rebuilding npbparams.h just in case
			
 
				+	rm -f npbparams.h
			
 
				+	../sys/setparams ${BENCHMARK} ${NPROCS} ${CLASS}
			
 
				+
			
 
				+# So that "make benchmark-name" works
			
 
				+${BENCHMARK}:  default
			
 
				+${BENCHMARKU}: default
			
 
				+
			
 
				+bt.o:             bt.c  header.h npbparams.h  mpinpb.h
			
 
				+make_set.o:       make_set.c  header.h npbparams.h  mpinpb.h
			
 
				+initialize.o:     initialize.c  header.h npbparams.h
			
 
				+exact_solution.o: exact_solution.c  header.h npbparams.h
			
 
				+exact_rhs.o:      exact_rhs.c  header.h npbparams.h
			
 
				+set_constants.o:  set_constants.c  header.h npbparams.h
			
 
				+adi.o:            adi.c  header.h npbparams.h
			
 
				+define.o:         define.c  header.h npbparams.h
			
 
				+copy_faces.o:     copy_faces.c  header.h npbparams.h  mpinpb.h
			
 
				+rhs.o:            rhs.c  header.h npbparams.h
			
 
				+x_solve.o:        x_solve.c  header.h work_lhs.h npbparams.h  mpinpb.h
			
 
				+y_solve.o:        y_solve.c  header.h work_lhs.h npbparams.h  mpinpb.h
			
 
				+z_solve.o:        z_solve.c  header.h work_lhs.h npbparams.h  mpinpb.h
			
 
				+solve_subs.o:     solve_subs.c  npbparams.h
			
 
				+add.o:            add.c  header.h npbparams.h
			
 
				+error.o:          error.c  header.h npbparams.h  mpinpb.h
			
 
				+verify.o:         verify.c  header.h npbparams.h  mpinpb.h
			
 
				+setup_mpi.o:      setup_mpi.c mpinpb.h npbparams.h 
			
 
				+
			
 
				+
			
 
				+OBJS = bt.o make_set.o initialize.o exact_solution.o \
			
 
				+       exact_rhs.o set_constants.o adi.o define.o copy_faces.o  \
			
 
				+       rhs.o x_solve.o y_solve.o z_solve.o add.o solve_subs.o   \
			
 
				+       error.o verify.o setup_mpi.o print_results.o timers.o $(ARCHIVE) 
			
 
				+
			
 
				+$(PROGRAM): ${OBJS} 
			
 
				+	${CCOMPILE} ${CFLAGS} -o ${PROGRAM} ${OBJS} 
			
 
				+# use line below for gcc, which does not link libm by default
			
 
				+#	${CCOMPILE} ${CFLAGS} -o ${PROGRAM} ${OBJS} -lm
			
 
				+
			
 
				+.c.o:
			
 
				+	${CCOMPILE} -c $(CFLAGS)  $<
			
 
				+
			
 
				+clean:
			
 
				+	- rm -f *.o *~ mputil*
			
 
				+	- rm -f  npbparams.h core
			
--- a/RCCE_V2.0/apps/NPB/BT/.svn/text-base/add.c.svn-base
+++ b/RCCE_V2.0/apps/NPB/BT/.svn/text-base/add.c.svn-base
@@ -0,0 +1,44 @@
 
				+// 
			
 
				+// Copyright 2010 Intel Corporation
			
 
				+// 
			
 
				+//    Licensed under the Apache License, Version 2.0 (the "License");
			
 
				+//    you may not use this file except in compliance with the License.
			
 
				+//    You may obtain a copy of the License at
			
 
				+// 
			
 
				+//        http://www.apache.org/licenses/LICENSE-2.0
			
 
				+// 
			
 
				+//    Unless required by applicable law or agreed to in writing, software
			
 
				+//    distributed under the License is distributed on an "AS IS" BASIS,
			
 
				+//    WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
			
 
				+//    See the License for the specific language governing permissions and
			
 
				+//    limitations under the License.
			
 
				+// 
			
 
				+//---------------------------------------------------------------------
			
 
				+//---------------------------------------------------------------------
			
 
				+#include "header.h"
			
 
				+
			
 
				+void  add() {
			
 
				+
			
 
				+//---------------------------------------------------------------------
			
 
				+//---------------------------------------------------------------------
			
 
				+
			
 
				+//---------------------------------------------------------------------
			
 
				+//     addition of update to the vector u
			
 
				+//---------------------------------------------------------------------
			
 
				+
			
 
				+      int  c, i, j, k, m;
			
 
				+
			
 
				+      for (c = 1; c <= ncells; c++) {
			
 
				+         for (k = start(3,c); k <= cell_size(3,c)-end(3,c)-1; k++) {
			
 
				+            for (j = start(2,c); j <= cell_size(2,c)-end(2,c)-1; j++) {
			
 
				+               for (i = start(1,c); i <= cell_size(1,c)-end(1,c)-1; i++) {
			
 
				+                  for (m = 1; m <= 5; m++) {
			
 
				+                     u(m,i,j,k,c) = u(m,i,j,k,c) + rhs(m,i,j,k,c);
			
 
				+                  }
			
 
				+               }
			
 
				+            }
			
 
				+         }
			
 
				+      }
			
 
				+
			
 
				+      return;
			
 
				+}
			
--- a/RCCE_V2.0/apps/NPB/BT/.svn/text-base/adi.c.svn-base
+++ b/RCCE_V2.0/apps/NPB/BT/.svn/text-base/adi.c.svn-base
@@ -0,0 +1,34 @@
 
				+// 
			
 
				+// Copyright 2010 Intel Corporation
			
 
				+// 
			
 
				+//    Licensed under the Apache License, Version 2.0 (the "License");
			
 
				+//    you may not use this file except in compliance with the License.
			
 
				+//    You may obtain a copy of the License at
			
 
				+// 
			
 
				+//        http://www.apache.org/licenses/LICENSE-2.0
			
 
				+// 
			
 
				+//    Unless required by applicable law or agreed to in writing, software
			
 
				+//    distributed under the License is distributed on an "AS IS" BASIS,
			
 
				+//    WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
			
 
				+//    See the License for the specific language governing permissions and
			
 
				+//    limitations under the License.
			
 
				+// 
			
 
				+//---------------------------------------------------------------------
			
 
				+//---------------------------------------------------------------------
			
 
				+#include "header.h"
			
 
				+#include "RCCE.h"
			
 
				+
			
 
				+void  adi() {
			
 
				+
			
 
				+//---------------------------------------------------------------------
			
 
				+//---------------------------------------------------------------------
			
 
				+
			
 
				+      copy_faces();
			
 
				+      x_solve();
			
 
				+      y_solve();
			
 
				+      z_solve();
			
 
				+      add();
			
 
				+
			
 
				+      return;
			
 
				+}
			
 
				+
			
--- a/RCCE_V2.0/apps/NPB/BT/.svn/text-base/applu_macros.h.svn-base
+++ b/RCCE_V2.0/apps/NPB/BT/.svn/text-base/applu_macros.h.svn-base
@@ -0,0 +1,8 @@
 
				+/* PAD32byte is used to compute a cacheline padded length of n (input) bytes */
			
 
				+#define  PAD32byte(n) ((n)%32==0 ? (n) : (n) + 32 - (n)%32)
			
 
				+/* PAD32dbl is used to compute a cacheline padded length of n (input) doubles */
			
 
				+#define  PAD32dbl(n)  ((n)%(32/sizeof(double))==0 ? (n) : (n) + (32/sizeof(double)) \
			
 
				+                      - (n)%(32/sizeof(double)))
			
 
				+
			
 
				+#define max(x,y)      ((x)>(y)? (x) : (y))
			
 
				+#define min(x,y)      ((x)<(y)? (x) : (y))
			
--- a/RCCE_V2.0/apps/NPB/BT/.svn/text-base/applu_protos.h.svn-base
+++ b/RCCE_V2.0/apps/NPB/BT/.svn/text-base/applu_protos.h.svn-base
@@ -0,0 +1,38 @@
 
				+void blts(int);
			
 
				+void buts(int, double *);
			
 
				+void erhs();
			
 
				+void error();
			
 
				+void exact(int, int, int, double *);
			
 
				+void exchange_1(double *, int, int);
			
 
				+void exchange_3(double *, int);
			
 
				+void exchange_4(double *, double *, int, int, int, int);
			
 
				+void exchange_5(double *, int, int);
			
 
				+void exchange_6(double *, int, int);
			
 
				+void RCCE_allreduce_d(double *, double *, int, int);
			
 
				+void init_comm(int *, char ***);
			
 
				+void jacld(int);
			
 
				+void jacu(int);
			
 
				+void l2norm(int, int, int, double *, double *);
			
 
				+void neighbors();
			
 
				+void pintgr();
			
 
				+void print_results(char *, char *, int *,  int *, int *, int *,
			
 
				+                    int *, int *, double *, double *, char *,
			
 
				+                    int *, char *, char *, char *, char *, char *,
			
 
				+                    char *, char *, char *, char *);
			
 
				+void proc_grid();
			
 
				+void bcast_inputs();
			
 
				+void read_input();
			
 
				+void rhs();
			
 
				+void setbv();
			
 
				+void setcoeff();
			
 
				+void setiv();
			
 
				+void ssor(int);
			
 
				+void subdomain();
			
 
				+void timer_clear(int *);
			
 
				+void timer_start(int *);
			
 
				+void timer_stop(int *);
			
 
				+void verify(double *, double *, double *, char *);
			
 
				+int  nodedim();
			
 
				+double timer_read(int *);
			
 
				+double test_rsd();
			
 
				+
			
--- a/RCCE_V2.0/apps/NPB/BT/.svn/text-base/applu_share.h.svn-base
+++ b/RCCE_V2.0/apps/NPB/BT/.svn/text-base/applu_share.h.svn-base
@@ -0,0 +1,60 @@
 
				+#include "npbparams.h"
			
 
				+#include "applu_protos.h"
			
 
				+#include "RCCE.h"
			
 
				+
			
 
				+extern double u[5*(isiz1+4)*(isiz2+4)*isiz3],
			
 
				+              rsd[5*(isiz1+4)*(isiz2+4)*isiz3],
			
 
				+              frct[5*(isiz1+4)*(isiz2+4)*isiz3],
			
 
				+              flux[5*(isiz1+2)*(isiz2+2)*isiz3];
			
 
				+extern double a[5*5*isiz1*isiz2],
			
 
				+              b[5*5*isiz1*isiz2],
			
 
				+              c[5*5*isiz1*isiz2],
			
 
				+              d[5*5*isiz1*isiz2];
			
 
				+
			
 
				+extern double dt, omega, tolrsd[5], rsdnm[5], errnm[5], frc, ttotal;
			
 
				+extern double tolrsd1_def, tolrsd2_def, tolrsd3_def, tolrsd4_def, tolrsd5_def,
			
 
				+              omega_default;
			
 
				+extern double ce[5*13];
			
 
				+
			
 
				+extern int ndim, id, num, xdim, ydim, row, col;
			
 
				+extern int ii1, ii2, ji1, ji2, ki1, ki2;
			
 
				+extern int itmax, invert; 
			
 
				+extern int ipr, ipr_default, inorm;
			
 
				+extern int north,south,east,west;
			
 
				+extern int nx0, ny0, nz0;
			
 
				+extern int nx, ny, nz;
			
 
				+extern int ist, iend, jst, jend, ipt, jpt;
			
 
				+extern int dp_type;
			
 
				+extern double tx1, ty1, tz1, 
			
 
				+              dx1, dy1, dz1, 
			
 
				+              tx2, ty2, tz2, 
			
 
				+              dx2, dy2, dz2, 
			
 
				+              tx3, ty3, tz3, 
			
 
				+              dx3, dy3, dz3, 
			
 
				+              dx4, dy4, dz4, 
			
 
				+              dx5, dy5, dz5, 
			
 
				+              dssp, c1,  c2,  
			
 
				+              c3,  c4,  c5;
			
 
				+extern double dxi, deta, dzeta;
			
 
				+extern double npmax, maxtime;
			
 
				+extern double *buf1_exch_1;
			
 
				+
			
 
				+#ifdef _OPENMP
			
 
				+#pragma omp threadprivate (nx, ny, nz, nx0, ny0, nz0, \
			
 
				+                     ipt, ist, iend, jpt, jst, jend, \
			
 
				+                     ii1, ii2, ji1, ji2, ki1, ki2, \
			
 
				+                     dxi, deta, dzeta, \
			
 
				+                     tx1, tx2, tx3, ty1, ty2, ty3, tz1, tz2, tz3)
			
 
				+#pragma omp threadprivate (dx1, dx2, dx3, dx4, dx5, \
			
 
				+                     dy1, dy2, dy3, dy4, dy5, \
			
 
				+                     dz1, dz2, dz3, dz4, dz5, \
			
 
				+                     dssp)
			
 
				+#pragma omp threadprivate(u, rsd, frct, flux)
			
 
				+#pragma omp threadprivate(ipr, inorm)
			
 
				+#pragma omp threadprivate(itmax, invert, \
			
 
				+                    dt, omega, tolrsd, rsdnm, errnm, frc, ttotal, \
			
 
				+                    a, b, c, d)
			
 
				+#pragma omp threadprivate(ce)
			
 
				+#pragma omp threadprivate (id, ndim, num, xdim, ydim, row, col, \
			
 
				+                     north,south,east,west, buf1_exch_1, npmax, maxtime)
			
 
				+#endif
			
--- a/RCCE_V2.0/apps/NPB/BT/.svn/text-base/bt.c.svn-base
+++ b/RCCE_V2.0/apps/NPB/BT/.svn/text-base/bt.c.svn-base
@@ -0,0 +1,216 @@
 
				+//-------------------------------------------------------------------------!
			
 
				+//                                                                         !
			
 
				+//        N  A  S     P A R A L L E L     B E N C H M A R K S  3.3         !
			
 
				+//                                                                         !
			
 
				+//                                   B T                                   !
			
 
				+//                                                                         !
			
 
				+//-------------------------------------------------------------------------!
			
 
				+//                                                                         !
			
 
				+//    This benchmark is part of the NAS Parallel Benchmark 3.3 suite.      !
			
 
				+//    It is described in NAS Technical Reports 95-020 and 02-007.          !
			
 
				+//                                                                         !
			
 
				+//    Permission to use, copy, distribute and modify this software         !
			
 
				+//    for any purpose with or without fee is hereby granted.  We           !
			
 
				+//    request, however, that all derived work reference the NAS            !
			
 
				+//    Parallel Benchmarks 3.3. This software is provided "as is"           !
			
 
				+//    without express or implied warranty.                                 !
			
 
				+//                                                                         !
			
 
				+//    Information on NPB 3.3, including the technical report, the          !
			
 
				+//    original specifications, source code, results and information        !
			
 
				+//    on how to submit new results, is available at:                       !
			
 
				+//                                                                         !
			
 
				+//           http://www.nas.nasa.gov/Software/NPB/                         !
			
 
				+//                                                                         !
			
 
				+//    Send comments or suggestions to  npb@nas.nasa.gov                    !
			
 
				+//                                                                         !
			
 
				+//          NAS Parallel Benchmarks Group                                  !
			
 
				+//          NASA Ames Research Center                                      !
			
 
				+//          Mail Stop: T27A-1                                              !
			
 
				+//          Moffett Field, CA   94035-1000                                 !
			
 
				+//                                                                         !
			
 
				+//          E-mail:  npb@nas.nasa.gov                                      !
			
 
				+//          Fax:     (650) 604-3957                                        !
			
 
				+//                                                                         !
			
 
				+//-------------------------------------------------------------------------!
			
 
				+// 
			
 
				+// Copyright 2010 Intel Corporation
			
 
				+// 
			
 
				+//    Licensed under the Apache License, Version 2.0 (the "License");
			
 
				+//    you may not use this file except in compliance with the License.
			
 
				+//    You may obtain a copy of the License at
			
 
				+// 
			
 
				+//        http://www.apache.org/licenses/LICENSE-2.0
			
 
				+// 
			
 
				+//    Unless required by applicable law or agreed to in writing, software
			
 
				+//    distributed under the License is distributed on an "AS IS" BASIS,
			
 
				+//    WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
			
 
				+//    See the License for the specific language governing permissions and
			
 
				+//    limitations under the License.
			
 
				+// 
			
 
				+//---------------------------------------------------------------------
			
 
				+//
			
 
				+// Authors: R. F. Van der Wijngaart
			
 
				+//          T. Harris
			
 
				+//          M. Yarrow
			
 
				+//
			
 
				+//---------------------------------------------------------------------
			
 
				+#include <stdio.h>
			
 
				+#include <string.h>
			
 
				+#include "RCCE.h"
			
 
				+#include "applu_macros.h"
			
 
				+#define G_MAIN
			
 
				+#include "header.h"
			
 
				+#include "mpinpb.h"
			
 
				+
			
 
				+#define BSIZE 132
			
 
				+void make_color(void);
			
 
				+void print_results(char*, char, int, int, int, int, int, int, double,
			
 
				+                   double, char*, int, char*, char*, char*, char*, 
			
 
				+                   char*, char*, char*, char*);
			
 
				+
			
 
				+//---------------------------------------------------------------------
			
 
				+//      program MPBT;
			
 
				+//---------------------------------------------------------------------
			
 
				+int RCCE_APP(int argc, char **argv) {
			
 
				+
			
 
				+       int N = 1000, nothing;
			
 
				+       int i, niter, step, c, error, fstatus;
			
 
				+       double navg, mflops, mbytes, n3;
			
 
				+       RCCE_COMM aux[N];
			
 
				+
			
 
				+       double t, tmax, tiominv, tpc;
			
 
				+       int verified;
			
 
				+       char class;
			
 
				+       size_t chunk;
			
 
				+
			
 
				+       char cbuf[BSIZE];
			
 
				+
			
 
				+       if (setup_mpi(&argc, &argv)) {
			
 
				+       RCCE_finalize();
			
 
				+       return 0;
			
 
				+       }
			
 
				+
			
 
				+//       RCCE_debug_set(RCCE_DEBUG_ALL);
			
 
				+
			
 
				+//---------------------------------------------------------------------
			
 
				+//      Root node reads input file (if it exists) else takes
			
 
				+//      defaults from parameters
			
 
				+//---------------------------------------------------------------------
			
 
				+       if (node == root) {
			
 
				+          
			
 
				+          printf("\n\n NAS Parallel Benchmarks 3.3 -- BT Benchmark\n");
			
 
				+
			
 
				+       }
			
 
				+          niter = NITER_DEFAULT;
			
 
				+          dt    = dt_default;
			
 
				+          grid_points(1) = PROBLEM_SIZE;
			
 
				+          grid_points(2) = PROBLEM_SIZE;
			
 
				+          grid_points(3) = PROBLEM_SIZE;
			
 
				+
			
 
				+       if (node == root) {
			
 
				+          printf(" Size: %4dx%4dx%4d\n", 
			
 
				+                 grid_points(1), grid_points(2), grid_points(3));
			
 
				+          printf(" Iterations: %4d    dt: %11.7f\n", niter, dt);
			
 
				+          if (no_nodes != total_nodes)
			
 
				+              printf(" Total number of processes: %5d\n", total_nodes);
			
 
				+          if (no_nodes != MAXCELLS*MAXCELLS) 
			
 
				+              printf(" WARNING: compiled for %5d processes\n",
			
 
				+                     MAXCELLS*MAXCELLS);
			
 
				+          printf(" Number of active processes: %5d\n\n", no_nodes);
			
 
				+
			
 
				+       }
			
 
				+
			
 
				+       make_set();
			
 
				+       make_color();
			
 
				+
			
 
				+
			
 
				+       for (c = 1; c <= MAXCELLS; c++) {
			
 
				+          if ( (cell_size(1,c) > IMAX) ||
			
 
				+               (cell_size(2,c) > JMAX) ||
			
 
				+               (cell_size(3,c) > KMAX) ) {
			
 
				+             printf(" %d %d %d %d %d\n", node, c, cell_size(1,c),
			
 
				+                     cell_size(2,c), cell_size(3,c));
			
 
				+             printf(" Problem size too big for compiled array sizes\n");
			
 
				+          }
			
 
				+       }
			
 
				+
			
 
				+       set_constants();
			
 
				+
			
 
				+       initialize();
			
 
				+
			
 
				+       lhsinit();
			
 
				+
			
 
				+       exact_rhs();
			
 
				+
			
 
				+       compute_buffer_size(5);
			
 
				+
			
 
				+//---------------------------------------------------------------------
			
 
				+//      do one time step to touch all code, and reinitialize
			
 
				+//---------------------------------------------------------------------
			
 
				+       adi();
			
 
				+
			
 
				+       initialize();
			
 
				+
			
 
				+       timer_clear(2);
			
 
				+
			
 
				+//---------------------------------------------------------------------
			
 
				+//      Synchronize before placing time stamp
			
 
				+//---------------------------------------------------------------------
			
 
				+       RCCE_barrier(&RCCE_COMM_WORLD);
			
 
				+
			
 
				+       timer_clear(1);
			
 
				+       timer_start(1);
			
 
				+
			
 
				+       for (step = 1; step <= niter; step++) {
			
 
				+
			
 
				+          if (node == root) {
			
 
				+             if ((step%20) == 0 || step == niter ||
			
 
				+                 step == 1) {
			
 
				+		 printf(" Time step %4d\n", step); fflush(stdout);
			
 
				+             }
			
 
				+          }
			
 
				+          adi();
			
 
				+       }
			
 
				+
			
 
				+       timer_stop(1);
			
 
				+       t = timer_read(1);
			
 
				+       
			
 
				+       verify(niter, &class, &verified);
			
 
				+
			
 
				+       RCCE_reduce((char*)(&t), (char*)(&tmax), 1, RCCE_DOUBLE, RCCE_MAX, root, RCCE_COMM_WORLD);
			
 
				+
			
 
				+       if( node == root ) {
			
 
				+          n3 = 1.0e0*grid_points(1)*grid_points(2)*grid_points(3);
			
 
				+          navg = (grid_points(1)+grid_points(2)+grid_points(3))/3.0;
			
 
				+          if( tmax != 0. ) {
			
 
				+             mflops = 1.0e-6*(double)(niter)*
			
 
				+               (3478.8*(double)n3-17655.7*navg*navg+28023.7*navg)
			
 
				+               / tmax;
			
 
				+          } else {
			
 
				+             mflops = 0.0;
			
 
				+          }
			
 
				+
			
 
				+         print_results("BT", class, grid_points[0], 
			
 
				+           grid_points[1], grid_points[2], niter, MAXCELLS*MAXCELLS, 
			
 
				+           total_nodes, tmax, mflops, "          floating point", 
			
 
				+           verified, NPBVERSION,COMPILETIME, CS1, CS2, CS3, CS4, CS5, 
			
 
				+           CS6);
			
 
				+
			
 
				+
			
 
				+//         FILE *perf_file;
			
 
				+//         char name[50] = "/shared/DEMOS/RCCE/NPB_BT/perf."; 
			
 
				+//         char postfix[50]; 
			
 
				+//         sprintf(postfix, "%d", total_nodes); 
			
 
				+//         strcat(name, postfix); 
			
 
				+//         perf_file = fopen(name,"w"); 
			
 
				+//         fprintf(perf_file, "%d", (int)mflops); 
			
 
				+//         fclose(perf_file); 
			
 
				+       }
			
 
				+
			
 
				+
			
 
				+       RCCE_finalize();
			
 
				+
			
 
				+       return 0;
			
 
				+
			
 
				+}
			
 
				+
			
--- a/RCCE_V2.0/apps/NPB/BT/.svn/text-base/copy_faces.c.svn-base
+++ b/RCCE_V2.0/apps/NPB/BT/.svn/text-base/copy_faces.c.svn-base
@@ -0,0 +1,338 @@
 
				+//---------------------------------------------------------------------
			
 
				+// 
			
 
				+// Copyright 2010 Intel Corporation
			
 
				+// 
			
 
				+//    Licensed under the Apache License, Version 2.0 (the "License");
			
 
				+//    you may not use this file except in compliance with the License.
			
 
				+//    You may obtain a copy of the License at
			
 
				+// 
			
 
				+//        http://www.apache.org/licenses/LICENSE-2.0
			
 
				+// 
			
 
				+//    Unless required by applicable law or agreed to in writing, software
			
 
				+//    distributed under the License is distributed on an "AS IS" BASIS,
			
 
				+//    WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
			
 
				+//    See the License for the specific language governing permissions and
			
 
				+//    limitations under the License.
			
 
				+// 
			
 
				+//---------------------------------------------------------------------
			
 
				+#include "header.h"
			
 
				+#include "mpinpb.h"
			
 
				+
			
 
				+void copy_faces() {
			
 
				+
			
 
				+//---------------------------------------------------------------------
			
 
				+//---------------------------------------------------------------------
			
 
				+
			
 
				+//---------------------------------------------------------------------
			
 
				+//     
			
 
				+// This function copies the face values of a variable defined on a set 
			
 
				+// of cells to the overlap locations of the adjacent sets of cells. 
			
 
				+// Because a set of cells interfaces in each direction with exactly one 
			
 
				+// other set, we only need to fill six different buffers. We could try to
			
 
				+// overlap communication with computation, by computing
			
 
				+// some internal values while communicating boundary values, but this
			
 
				+// adds so much overhead that it's not clearly useful. 
			
 
				+//---------------------------------------------------------------------
			
 
				+
			
 
				+      int i, j, k, c, m, p0, p1, phase,
			
 
				+           p2, p3, p4, p5, b_size[6], ss[6], 
			
 
				+           sr[6], error;
			
 
				+
			
 
				+#define b_size(m) b_size[m]
			
 
				+#define ss(m) ss[m]
			
 
				+#define sr(m) sr[m]
			
 
				+
			
 
				+//---------------------------------------------------------------------
			
 
				+//     exit immediately if there are no faces to be copied           
			
 
				+//---------------------------------------------------------------------
			
 
				+      if (no_nodes == 1) {
			
 
				+         compute_rhs();
			
 
				+         return;
			
 
				+      }
			
 
				+
			
 
				+      ss(0) = start_send_east;
			
 
				+      ss(1) = start_send_west;
			
 
				+      ss(2) = start_send_north;
			
 
				+      ss(3) = start_send_south;
			
 
				+      ss(4) = start_send_top;
			
 
				+      ss(5) = start_send_bottom;
			
 
				+
			
 
				+      sr(0) = start_recv_east;
			
 
				+      sr(1) = start_recv_west;
			
 
				+      sr(2) = start_recv_north;
			
 
				+      sr(3) = start_recv_south;
			
 
				+      sr(4) = start_recv_top;
			
 
				+      sr(5) = start_recv_bottom;
			
 
				+
			
 
				+      b_size(0) = east_size   ;
			
 
				+      b_size(1) = west_size   ;
			
 
				+      b_size(2) = north_size  ;
			
 
				+      b_size(3) = south_size  ;
			
 
				+      b_size(4) = top_size    ;
			
 
				+      b_size(5) = bottom_size ;
			
 
				+
			
 
				+//---------------------------------------------------------------------
			
 
				+//     because the difference stencil for the diagonalized scheme is 
			
 
				+//     orthogonal, we do not have to perform the staged copying of faces,
			
 
				+//     but can send all face information simultaneously to the neighboring
			
 
				+//     cells in all directions          
			
 
				+//---------------------------------------------------------------------
			
 
				+      p0 = 0;
			
 
				+      p1 = 0;
			
 
				+      p2 = 0;
			
 
				+      p3 = 0;
			
 
				+      p4 = 0;
			
 
				+      p5 = 0;
			
 
				+
			
 
				+      for (c = 1; c <= ncells; c++) {
			
 
				+
			
 
				+//---------------------------------------------------------------------
			
 
				+//     fill the buffer to be sent to eastern neighbors (i-dir)
			
 
				+//---------------------------------------------------------------------
			
 
				+         if (cell_coord(1,c) != ncells) {
			
 
				+            for (k = 0; k <= cell_size(3,c)-1; k++) {
			
 
				+               for (j = 0; j <= cell_size(2,c)-1; j++) {
			
 
				+                  for (i = cell_size(1,c)-2; i <= cell_size(1,c)-1; i++) {
			
 
				+                     for (m = 1; m <= 5; m++) {
			
 
				+                        out_buffer(ss(0)+p0) = u(m,i,j,k,c);
			
 
				+                        p0 = p0 + 1;
			
 
				+                     }
			
 
				+                  }
			
 
				+               }
			
 
				+            }
			
 
				+         }
			
 
				+
			
 
				+//---------------------------------------------------------------------
			
 
				+//     fill the buffer to be sent to western neighbors 
			
 
				+//---------------------------------------------------------------------
			
 
				+         if (cell_coord(1,c) != 1) {
			
 
				+            for (k = 0; k <= cell_size(3,c)-1; k++) {
			
 
				+               for (j = 0; j <= cell_size(2,c)-1; j++) {
			
 
				+                  for (i = 0; i <= 1; i++) {
			
 
				+                     for (m = 1; m <= 5; m++) {
			
 
				+                        out_buffer(ss(1)+p1) = u(m,i,j,k,c);
			
 
				+                        p1 = p1 + 1;
			
 
				+                     }
			
 
				+                  }
			
 
				+               }
			
 
				+            }
			
 
				+
			
 
				+         }
			
 
				+
			
 
				+//---------------------------------------------------------------------
			
 
				+//     fill the buffer to be sent to northern neighbors (j_dir)
			
 
				+//---------------------------------------------------------------------
			
 
				+         if (cell_coord(2,c) != ncells) {
			
 
				+            for (k = 0; k <= cell_size(3,c)-1; k++) {
			
 
				+               for (j = cell_size(2,c)-2; j <= cell_size(2,c)-1; j++) {
			
 
				+                  for (i = 0; i <= cell_size(1,c)-1; i++) {
			
 
				+                     for (m = 1; m <= 5; m++) {
			
 
				+                        out_buffer(ss(2)+p2) = u(m,i,j,k,c);
			
 
				+                        p2 = p2 + 1;
			
 
				+                     }
			
 
				+                  }
			
 
				+               }
			
 
				+            }
			
 
				+         }
			
 
				+
			
 
				+//---------------------------------------------------------------------
			
 
				+//     fill the buffer to be sent to southern neighbors 
			
 
				+//---------------------------------------------------------------------
			
 
				+         if (cell_coord(2,c)!= 1) {
			
 
				+            for (k = 0; k <= cell_size(3,c)-1; k++) {
			
 
				+               for (j = 0; j <= 1; j++) {
			
 
				+                  for (i = 0; i <= cell_size(1,c)-1; i++) {
			
 
				+                     for (m = 1; m <= 5; m++) {
			
 
				+                        out_buffer(ss(3)+p3) = u(m,i,j,k,c);
			
 
				+                        p3 = p3 + 1;
			
 
				+                     }
			
 
				+                  }
			
 
				+               }
			
 
				+            }
			
 
				+         }
			
 
				+
			
 
				+//---------------------------------------------------------------------
			
 
				+//     fill the buffer to be sent to top neighbors (k-dir)
			
 
				+//---------------------------------------------------------------------
			
 
				+         if (cell_coord(3,c) != ncells) {
			
 
				+            for (k = cell_size(3,c)-2; k <= cell_size(3,c)-1; k++) {
			
 
				+               for (j = 0; j <= cell_size(2,c)-1; j++) {
			
 
				+                  for (i = 0; i <= cell_size(1,c)-1; i++) {
			
 
				+                     for (m = 1; m <= 5; m++) {
			
 
				+                        out_buffer(ss(4)+p4) = u(m,i,j,k,c);
			
 
				+                        p4 = p4 + 1;
			
 
				+                     }
			
 
				+                  }
			
 
				+               }
			
 
				+            }
			
 
				+         }
			
 
				+
			
 
				+//---------------------------------------------------------------------
			
 
				+//     fill the buffer to be sent to bottom neighbors
			
 
				+//---------------------------------------------------------------------
			
 
				+         if (cell_coord(3,c)!= 1) {
			
 
				+            for (k = 0; k <= 1; k++) {
			
 
				+               for (j = 0; j <= cell_size(2,c)-1; j++) {
			
 
				+                  for (i = 0; i <= cell_size(1,c)-1; i++) {
			
 
				+                     for (m = 1; m <= 5; m++) {
			
 
				+                        out_buffer(ss(5)+p5) = u(m,i,j,k,c);
			
 
				+                        p5 = p5 + 1;
			
 
				+                     }
			
 
				+                  }
			
 
				+               }
			
 
				+            }
			
 
				+         }
			
 
				+
			
 
				+//---------------------------------------------------------------------
			
 
				+//     cell loop
			
 
				+//---------------------------------------------------------------------
			
 
				+      }
			
 
				+
			
 
				+      for (phase = 0; phase < 3; phase++) {
			
 
				+
			
 
				+      if (send_color[WESTDIR]==phase)  {
			
 
				+        RCCE_send((char*)(&out_buffer(ss(1))), b_size(1)*sizeof(double), predecessor(1));
			
 
				+      }
			
 
				+      if (recv_color[WESTDIR]==phase)  {
			
 
				+        RCCE_recv((char*)(&in_buffer(sr(0))),  b_size(0)*sizeof(double), successor(1));
			
 
				+      }
			
 
				+
			
 
				+      if (send_color[EASTDIR]==phase)  {
			
 
				+        RCCE_send((char*)(&out_buffer(ss(0))), b_size(0)*sizeof(double), successor(1));
			
 
				+      }
			
 
				+      if (recv_color[EASTDIR]==phase)  {
			
 
				+        RCCE_recv((char*)(&in_buffer(sr(1))),  b_size(1)*sizeof(double), predecessor(1));
			
 
				+      }
			
 
				+
			
 
				+      if (send_color[SOUTHDIR]==phase)  {
			
 
				+        RCCE_send((char*)(&out_buffer(ss(3))), b_size(3)*sizeof(double), predecessor(2));
			
 
				+      }
			
 
				+      if (recv_color[SOUTHDIR]==phase)  {
			
 
				+        RCCE_recv((char*)(&in_buffer(sr(2))),  b_size(2)*sizeof(double), successor(2));
			
 
				+      }
			
 
				+
			
 
				+      if (send_color[NORTHDIR]==phase)  {
			
 
				+        RCCE_send((char*)(&out_buffer(ss(2))), b_size(2)*sizeof(double),successor(2));
			
 
				+      }
			
 
				+      if (recv_color[NORTHDIR]==phase)  {
			
 
				+        RCCE_recv((char*)(&in_buffer(sr(3))),  b_size(3)*sizeof(double), predecessor(2));
			
 
				+      }
			
 
				+
			
 
				+      if (send_color[BOTTOMDIR]==phase)  {
			
 
				+        RCCE_send((char*)(&out_buffer(ss(5))), b_size(5)*sizeof(double),predecessor(3));
			
 
				+      }
			
 
				+      if (recv_color[BOTTOMDIR]==phase)  {
			
 
				+        RCCE_recv((char*)(&in_buffer(sr(4))),  b_size(4)*sizeof(double), successor(3));
			
 
				+      }
			
 
				+
			
 
				+      if (send_color[TOPDIR]==phase)  {
			
 
				+        RCCE_send((char*)(&out_buffer(ss(4))), b_size(4)*sizeof(double),successor(3));
			
 
				+      }
			
 
				+      if (recv_color[TOPDIR]==phase)  {
			
 
				+        RCCE_recv((char*)(&in_buffer(sr(5))),  b_size(5)*sizeof(double), predecessor(3));
			
 
				+      }
			
 
				+   }      
			
 
				+
			
 
				+//---------------------------------------------------------------------
			
 
				+//     unpack the data that has just been received;             
			
 
				+//---------------------------------------------------------------------
			
 
				+      p0 = 0;
			
 
				+      p1 = 0;
			
 
				+      p2 = 0;
			
 
				+      p3 = 0;
			
 
				+      p4 = 0;
			
 
				+      p5 = 0;
			
 
				+
			
 
				+      for (c = 1; c <= ncells; c++) {
			
 
				+
			
 
				+         if (cell_coord(1,c) != 1) {
			
 
				+            for (k = 0; k <= cell_size(3,c)-1; k++) {
			
 
				+               for (j = 0; j <= cell_size(2,c)-1; j++) {
			
 
				+                  for (i = -2; i <= -1; i++) {
			
 
				+                     for (m = 1; m <= 5; m++) {
			
 
				+                        u(m,i,j,k,c) = in_buffer(sr(1)+p0);
			
 
				+                        p0 = p0 + 1;
			
 
				+                     }
			
 
				+                  }
			
 
				+               }
			
 
				+            }
			
 
				+         }
			
 
				+
			
 
				+         if (cell_coord(1,c) != ncells) {
			
 
				+            for (k = 0; k <= cell_size(3,c)-1; k++) {
			
 
				+               for (j = 0; j <= cell_size(2,c)-1; j++) {
			
 
				+                  for (i = cell_size(1,c); i <= cell_size(1,c)+1; i++) {
			
 
				+                     for (m = 1; m <= 5; m++) {
			
 
				+                        u(m,i,j,k,c) = in_buffer(sr(0)+p1);
			
 
				+                        p1 = p1 + 1;
			
 
				+                     }
			
 
				+                  }
			
 
				+               }
			
 
				+            }
			
 
				+         }
			
 
				+            
			
 
				+         if (cell_coord(2,c) != 1) {
			
 
				+            for (k = 0; k <= cell_size(3,c)-1; k++) {
			
 
				+               for (j = -2; j <= -1; j++) {
			
 
				+                  for (i = 0; i <= cell_size(1,c)-1; i++) {
			
 
				+                     for (m = 1; m <= 5; m++) {
			
 
				+                        u(m,i,j,k,c) = in_buffer(sr(3)+p2);
			
 
				+                        p2 = p2 + 1;
			
 
				+                     }
			
 
				+                  }
			
 
				+               }
			
 
				+            }
			
 
				+
			
 
				+         }
			
 
				+            
			
 
				+         if (cell_coord(2,c) != ncells) {
			
 
				+            for (k = 0; k <= cell_size(3,c)-1; k++) {
			
 
				+               for (j = cell_size(2,c); j <= cell_size(2,c)+1; j++) {
			
 
				+                  for (i = 0; i <= cell_size(1,c)-1; i++) {
			
 
				+                     for (m = 1; m <= 5; m++) {
			
 
				+                        u(m,i,j,k,c) = in_buffer(sr(2)+p3);
			
 
				+                        p3 = p3 + 1;
			
 
				+                     }
			
 
				+                  }
			
 
				+               }
			
 
				+            }
			
 
				+         }
			
 
				+
			
 
				+         if (cell_coord(3,c) != 1) {
			
 
				+            for (k = -2; k <= -1; k++) {
			
 
				+               for (j = 0; j <= cell_size(2,c)-1; j++) {
			
 
				+                  for (i = 0; i <= cell_size(1,c)-1; i++) {
			
 
				+                     for (m = 1; m <= 5; m++) {
			
 
				+                        u(m,i,j,k,c) = in_buffer(sr(5)+p4);
			
 
				+                        p4 = p4 + 1;
			
 
				+                     }
			
 
				+                  }
			
 
				+               }
			
 
				+            }
			
 
				+         }
			
 
				+
			
 
				+         if (cell_coord(3,c) != ncells) {
			
 
				+            for (k = cell_size(3,c); k <= cell_size(3,c)+1; k++) {
			
 
				+               for (j = 0; j <= cell_size(2,c)-1; j++) {
			
 
				+                  for (i = 0; i <= cell_size(1,c)-1; i++) {
			
 
				+                     for (m = 1; m <= 5; m++) {
			
 
				+                        u(m,i,j,k,c) = in_buffer(sr(4)+p5);
			
 
				+                        p5 = p5 + 1;
			
 
				+                     }
			
 
				+                  }
			
 
				+               }
			
 
				+            }
			
 
				+         }
			
 
				+
			
 
				+//---------------------------------------------------------------------
			
 
				+//     cells loop
			
 
				+//---------------------------------------------------------------------
			
 
				+      }
			
 
				+
			
 
				+//---------------------------------------------------------------------
			
 
				+//     do the rest of the rhs that uses the copied face values          
			
 
				+//---------------------------------------------------------------------
			
 
				+      compute_rhs();
			
 
				+
			
 
				+      return;
			
 
				+}
			
--- a/RCCE_V2.0/apps/NPB/BT/.svn/text-base/define.c.svn-base
+++ b/RCCE_V2.0/apps/NPB/BT/.svn/text-base/define.c.svn-base
@@ -0,0 +1,78 @@
 
				+//---------------------------------------------------------------------
			
 
				+// 
			
 
				+// Copyright 2010 Intel Corporation
			
 
				+// 
			
 
				+//    Licensed under the Apache License, Version 2.0 (the "License");
			
 
				+//    you may not use this file except in compliance with the License.
			
 
				+//    You may obtain a copy of the License at
			
 
				+// 
			
 
				+//        http://www.apache.org/licenses/LICENSE-2.0
			
 
				+// 
			
 
				+//    Unless required by applicable law or agreed to in writing, software
			
 
				+//    distributed under the License is distributed on an "AS IS" BASIS,
			
 
				+//    WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
			
 
				+//    See the License for the specific language governing permissions and
			
 
				+//    limitations under the License.
			
 
				+// 
			
 
				+//---------------------------------------------------------------------
			
 
				+#include "header.h"
			
 
				+
			
 
				+void compute_buffer_size(int dim) {
			
 
				+
			
 
				+//---------------------------------------------------------------------
			
 
				+//---------------------------------------------------------------------
			
 
				+
			
 
				+      int  c, face_size;
			
 
				+
			
 
				+      if (ncells == 1) return;
			
 
				+
			
 
				+//---------------------------------------------------------------------
			
 
				+//     compute the actual sizes of the buffers; note that there is 
			
 
				+//     always one cell face that doesn't need buffer space, because it 
			
 
				+//     is at the boundary of the grid
			
 
				+//---------------------------------------------------------------------
			
 
				+      west_size = 0;
			
 
				+      east_size = 0;
			
 
				+
			
 
				+      for (c = 1; c <= ncells; c++) {
			
 
				+         face_size = cell_size(2,c) * cell_size(3,c) * dim * 2;
			
 
				+         if (cell_coord(1,c)!=1) west_size = west_size + face_size;
			
 
				+         if (cell_coord(1,c)!=ncells) east_size = east_size + 
			
 
				+              face_size ;
			
 
				+      }
			
 
				+
			
 
				+      north_size = 0;
			
 
				+      south_size = 0;
			
 
				+      for (c = 1; c <= ncells; c++) {
			
 
				+         face_size = cell_size(1,c)*cell_size(3,c) * dim * 2;
			
 
				+         if (cell_coord(2,c)!=1) south_size = south_size + face_size;
			
 
				+         if (cell_coord(2,c)!=ncells) north_size = north_size + 
			
 
				+              face_size ;
			
 
				+      }
			
 
				+
			
 
				+      top_size = 0;
			
 
				+      bottom_size = 0;
			
 
				+      for (c = 1; c <= ncells; c++) {
			
 
				+         face_size = cell_size(1,c) * cell_size(2,c) * dim * 2;
			
 
				+         if (cell_coord(3,c)!=1) bottom_size = bottom_size + 
			
 
				+              face_size;
			
 
				+         if (cell_coord(3,c)!=ncells) top_size = top_size +
			
 
				+              face_size     ;
			
 
				+      }
			
 
				+
			
 
				+      start_send_west   = 1;
			
 
				+      start_send_east   = start_send_west   + west_size;
			
 
				+      start_send_south  = start_send_east   + east_size;
			
 
				+      start_send_north  = start_send_south  + south_size;
			
 
				+      start_send_bottom = start_send_north  + north_size;
			
 
				+      start_send_top    = start_send_bottom + bottom_size;
			
 
				+      start_recv_west   = 1;
			
 
				+      start_recv_east   = start_recv_west   + west_size;
			
 
				+      start_recv_south  = start_recv_east   + east_size;
			
 
				+      start_recv_north  = start_recv_south  + south_size;
			
 
				+      start_recv_bottom = start_recv_north  + north_size;
			
 
				+      start_recv_top    = start_recv_bottom + bottom_size;
			
 
				+
			
 
				+      return;
			
 
				+}
			
 
				+
			
--- a/RCCE_V2.0/apps/NPB/BT/.svn/text-base/error.c.svn-base
+++ b/RCCE_V2.0/apps/NPB/BT/.svn/text-base/error.c.svn-base
@@ -0,0 +1,121 @@
 
				+//---------------------------------------------------------------------
			
 
				+// 
			
 
				+// Copyright 2010 Intel Corporation
			
 
				+// 
			
 
				+//    Licensed under the Apache License, Version 2.0 (the "License");
			
 
				+//    you may not use this file except in compliance with the License.
			
 
				+//    You may obtain a copy of the License at
			
 
				+// 
			
 
				+//        http://www.apache.org/licenses/LICENSE-2.0
			
 
				+// 
			
 
				+//    Unless required by applicable law or agreed to in writing, software
			
 
				+//    distributed under the License is distributed on an "AS IS" BASIS,
			
 
				+//    WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
			
 
				+//    See the License for the specific language governing permissions and
			
 
				+//    limitations under the License.
			
 
				+// 
			
 
				+//---------------------------------------------------------------------
			
 
				+#include <math.h>
			
 
				+#include "header.h"
			
 
				+#include "mpinpb.h"
			
 
				+#include "applu_macros.h"
			
 
				+
			
 
				+#define u_exact(m) u_exact[m-1]
			
 
				+#define rms(m) rms[m-1]
			
 
				+#define rms_work(m) rms_work[m-1]
			
 
				+
			
 
				+void error_norm(double rms[]) {
			
 
				+
			
 
				+//---------------------------------------------------------------------
			
 
				+//---------------------------------------------------------------------
			
 
				+
			
 
				+//---------------------------------------------------------------------
			
 
				+//     this function computes the norm of the difference between the
			
 
				+//     computed solution and the exact solution
			
 
				+//---------------------------------------------------------------------
			
 
				+
			
 
				+      int c, i, j, k, m, ii, jj, kk, d, error;
			
 
				+      double xi, eta, zeta, u_exact[5], rms_work[5],
			
 
				+           add;
			
 
				+
			
 
				+      for (m = 1; m <= 5; m++) {
			
 
				+         rms_work(m) = 0.0e0;
			
 
				+      }
			
 
				+
			
 
				+      for (c = 1; c <= ncells; c++) {
			
 
				+         kk = 0;
			
 
				+         for (k = cell_low(3,c); k <= cell_high(3,c); k++) {
			
 
				+            zeta = (double)(k) * dnzm1;
			
 
				+            jj = 0;
			
 
				+            for (j = cell_low(2,c); j <= cell_high(2,c); j++) {
			
 
				+               eta = (double)(j) * dnym1;
			
 
				+               ii = 0;
			
 
				+               for (i = cell_low(1,c); i <= cell_high(1,c); i++) {
			
 
				+                  xi = (double)(i) * dnxm1;
			
 
				+                  exact_solution(xi, eta, zeta, u_exact);
			
 
				+
			
 
				+                  for (m = 1; m <= 5; m++) {
			
 
				+                     add = u(m,ii,jj,kk,c)-u_exact(m);
			
 
				+                     rms_work(m) = rms_work(m) + add*add;
			
 
				+                  }
			
 
				+                  ii = ii + 1;
			
 
				+               }
			
 
				+               jj = jj + 1;
			
 
				+            }
			
 
				+            kk = kk + 1;
			
 
				+         }
			
 
				+      }
			
 
				+
			
 
				+      RCCE_allreduce((char*)rms_work, (char*)rms, 5, RCCE_DOUBLE, RCCE_SUM, RCCE_COMM_WORLD);
			
 
				+
			
 
				+      for (m = 1; m <= 5; m++) {
			
 
				+         for (d = 1; d <= 3; d++) {
			
 
				+            rms(m) = rms(m) / (double)(grid_points(d)-2);
			
 
				+         }
			
 
				+         rms(m) = sqrt(rms(m));
			
 
				+      }
			
 
				+
			
 
				+      return;
			
 
				+}
			
 
				+
			
 
				+
			
 
				+//---------------------------------------------------------------------
			
 
				+//---------------------------------------------------------------------
			
 
				+
			
 
				+void rhs_norm(double rms[]) {
			
 
				+
			
 
				+//---------------------------------------------------------------------
			
 
				+//---------------------------------------------------------------------
			
 
				+
			
 
				+      int c, i, j, k, d, m, error;
			
 
				+      double rms_work[5], add;
			
 
				+
			
 
				+      for (m = 1; m <= 5; m++) {
			
 
				+         rms_work(m) = 0.0e0;
			
 
				+      }
			
 
				+
			
 
				+      for (c = 1; c <= ncells; c++) {
			
 
				+         for (k = start(3,c); k <= cell_size(3,c)-end(3,c)-1; k++) {
			
 
				+            for (j = start(2,c); j <= cell_size(2,c)-end(2,c)-1; j++) {
			
 
				+               for (i = start(1,c); i <= cell_size(1,c)-end(1,c)-1; i++) {
			
 
				+                  for (m = 1; m <= 5; m++) {
			
 
				+                     add = rhs(m,i,j,k,c);
			
 
				+                     rms_work(m) = rms_work(m) + add*add;
			
 
				+                  }
			
 
				+               }
			
 
				+            }
			
 
				+         }
			
 
				+      }
			
 
				+
			
 
				+      RCCE_allreduce((char*)rms_work, (char*)rms, 5, RCCE_DOUBLE, RCCE_SUM, RCCE_COMM_WORLD);
			
 
				+
			
 
				+      for (m = 1; m <= 5; m++) {
			
 
				+         for (d = 1; d <= 3; d++) {
			
 
				+            rms(m) = rms(m) / (double)(grid_points(d)-2);
			
 
				+         }
			
 
				+         rms(m) = sqrt(rms(m));
			
 
				+      }
			
 
				+
			
 
				+      return;
			
 
				+}
			
 
				+
			
--- a/RCCE_V2.0/apps/NPB/BT/.svn/text-base/exact_rhs.c.svn-base
+++ b/RCCE_V2.0/apps/NPB/BT/.svn/text-base/exact_rhs.c.svn-base
@@ -0,0 +1,375 @@
 
				+
			
 
				+//---------------------------------------------------------------------
			
 
				+// 
			
 
				+// Copyright 2010 Intel Corporation
			
 
				+// 
			
 
				+//    Licensed under the Apache License, Version 2.0 (the "License");
			
 
				+//    you may not use this file except in compliance with the License.
			
 
				+//    You may obtain a copy of the License at
			
 
				+// 
			
 
				+//        http://www.apache.org/licenses/LICENSE-2.0
			
 
				+// 
			
 
				+//    Unless required by applicable law or agreed to in writing, software
			
 
				+//    distributed under the License is distributed on an "AS IS" BASIS,
			
 
				+//    WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
			
 
				+//    See the License for the specific language governing permissions and
			
 
				+//    limitations under the License.
			
 
				+// 
			
 
				+//---------------------------------------------------------------------
			
 
				+#include "header.h"
			
 
				+
			
 
				+void exact_rhs() {
			
 
				+
			
 
				+//---------------------------------------------------------------------
			
 
				+//---------------------------------------------------------------------
			
 
				+
			
 
				+//---------------------------------------------------------------------
			
 
				+//     compute the right hand side based on exact solution
			
 
				+//---------------------------------------------------------------------
			
 
				+
			
 
				+      double dtemp[5], xi, eta, zeta, dtpp;
			
 
				+      int          c, m, i, j, k, ip1, im1, jp1, 
			
 
				+           jm1, km1, kp1;
			
 
				+#define dtemp(m) dtemp[m-1]
			
 
				+
			
 
				+
			
 
				+//---------------------------------------------------------------------
			
 
				+//     loop over all cells owned by this node                   
			
 
				+//---------------------------------------------------------------------
			
 
				+      for (c = 1; c <= ncells; c++) {
			
 
				+
			
 
				+//---------------------------------------------------------------------
			
 
				+//     initialize                                  
			
 
				+//---------------------------------------------------------------------
			
 
				+         for (k = 0; k <= cell_size(3,c)-1; k++) {
			
 
				+            for (j = 0; j <= cell_size(2,c)-1; j++) {
			
 
				+               for (i = 0; i <= cell_size(1,c)-1; i++) {
			
 
				+                  for (m = 1; m <= 5; m++) {
			
 
				+                     forcing(m,i,j,k,c) = 0.0e0;
			
 
				+                  }
			
 
				+               }
			
 
				+            }
			
 
				+         }
			
 
				+
			
 
				+//---------------------------------------------------------------------
			
 
				+//     xi-direction flux differences                      
			
 
				+//---------------------------------------------------------------------
			
 
				+         for (k = start(3,c); k <= cell_size(3,c)-end(3,c)-1; k++) {
			
 
				+            zeta = (double)(k+cell_low(3,c)) * dnzm1;
			
 
				+            for (j = start(2,c); j <= cell_size(2,c)-end(2,c)-1; j++) {
			
 
				+               eta = (double)(j+cell_low(2,c)) * dnym1;
			
 
				+
			
 
				+               for (i = -2*(1-start(1,c)); i <= cell_size(1,c)+1-2*end(1,c); i++) {
			
 
				+                  xi = (double)(i+cell_low(1,c)) * dnxm1;
			
 
				+
			
 
				+                  exact_solution(xi, eta, zeta, dtemp);
			
 
				+                  for (m = 1; m <= 5; m++) {
			
 
				+                     ue(i,m) = dtemp(m);
			
 
				+                  }
			
 
				+
			
 
				+                  dtpp = 1.0e0 / dtemp(1);
			
 
				+
			
 
				+                  for (m = 2; m <= 5; m++) {
			
 
				+                     buf(i,m) = dtpp * dtemp(m);
			
 
				+                  }
			
 
				+
			
 
				+                  cuf(i)   = buf(i,2) * buf(i,2);
			
 
				+                  buf(i,1) = cuf(i) + buf(i,3) * buf(i,3) + 
			
 
				+                       buf(i,4) * buf(i,4) ;
			
 
				+                  q(i) = 0.5e0*(buf(i,2)*ue(i,2) + buf(i,3)*ue(i,3) +
			
 
				+                       buf(i,4)*ue(i,4));
			
 
				+
			
 
				+               }
			
 
				+               
			
 
				+               for (i = start(1,c); i <= cell_size(1,c)-end(1,c)-1; i++) {
			
 
				+                  im1 = i-1;
			
 
				+                  ip1 = i+1;
			
 
				+
			
 
				+                  forcing(1,i,j,k,c) = forcing(1,i,j,k,c) -
			
 
				+                       tx2*( ue(ip1,2)-ue(im1,2) )+
			
 
				+                       dx1tx1*(ue(ip1,1)-2.0e0*ue(i,1)+ue(im1,1));
			
 
				+
			
 
				+                  forcing(2,i,j,k,c) = forcing(2,i,j,k,c) - tx2 * (
			
 
				+                       (ue(ip1,2)*buf(ip1,2)+c2*(ue(ip1,5)-q(ip1)))-
			
 
				+                       (ue(im1,2)*buf(im1,2)+c2*(ue(im1,5)-q(im1))))+
			
 
				+                       xxcon1*(buf(ip1,2)-2.0e0*buf(i,2)+buf(im1,2))+
			
 
				+                       dx2tx1*( ue(ip1,2)-2.0e0* ue(i,2)+ue(im1,2));
			
 
				+
			
 
				+                  forcing(3,i,j,k,c) = forcing(3,i,j,k,c) - tx2 * (
			
 
				+                       ue(ip1,3)*buf(ip1,2)-ue(im1,3)*buf(im1,2))+
			
 
				+                       xxcon2*(buf(ip1,3)-2.0e0*buf(i,3)+buf(im1,3))+
			
 
				+                       dx3tx1*( ue(ip1,3)-2.0e0*ue(i,3) +ue(im1,3));
			
 
				+                  
			
 
				+                  forcing(4,i,j,k,c) = forcing(4,i,j,k,c) - tx2*(
			
 
				+                       ue(ip1,4)*buf(ip1,2)-ue(im1,4)*buf(im1,2))+
			
 
				+                       xxcon2*(buf(ip1,4)-2.0e0*buf(i,4)+buf(im1,4))+
			
 
				+                       dx4tx1*( ue(ip1,4)-2.0e0* ue(i,4)+ ue(im1,4));
			
 
				+
			
 
				+                  forcing(5,i,j,k,c) = forcing(5,i,j,k,c) - tx2*(
			
 
				+                       buf(ip1,2)*(c1*ue(ip1,5)-c2*q(ip1))-
			
 
				+                       buf(im1,2)*(c1*ue(im1,5)-c2*q(im1)))+
			
 
				+                       0.5e0*xxcon3*(buf(ip1,1)-2.0e0*buf(i,1)+
			
 
				+                       buf(im1,1))+
			
 
				+                       xxcon4*(cuf(ip1)-2.0e0*cuf(i)+cuf(im1))+
			
 
				+                       xxcon5*(buf(ip1,5)-2.0e0*buf(i,5)+buf(im1,5))+
			
 
				+                       dx5tx1*( ue(ip1,5)-2.0e0* ue(i,5)+ ue(im1,5));
			
 
				+               }
			
 
				+
			
 
				+//---------------------------------------------------------------------
			
 
				+//     Fourth-order dissipation                         
			
 
				+//---------------------------------------------------------------------
			
 
				+               if (start(1,c) > 0) {
			
 
				+                  for (m = 1; m <= 5; m++) {
			
 
				+                     i = 1;
			
 
				+                     forcing(m,i,j,k,c) = forcing(m,i,j,k,c) - dssp *
			
 
				+                          (5.0e0*ue(i,m) - 4.0e0*ue(i+1,m) +ue(i+2,m));
			
 
				+                     i = 2;
			
 
				+                     forcing(m,i,j,k,c) = forcing(m,i,j,k,c) - dssp *
			
 
				+                          (-4.0e0*ue(i-1,m) + 6.0e0*ue(i,m) -
			
 
				+                          4.0e0*ue(i+1,m) +       ue(i+2,m));
			
 
				+                  }
			
 
				+               }
			
 
				+
			
 
				+               for (i = start(1,c)*3; i <= cell_size(1,c)-3*end(1,c)-1; i++) {
			
 
				+                  for (m = 1; m <= 5; m++) {
			
 
				+                     forcing(m,i,j,k,c) = forcing(m,i,j,k,c) - dssp*
			
 
				+                          (ue(i-2,m) - 4.0e0*ue(i-1,m) +
			
 
				+                          6.0e0*ue(i,m) - 4.0e0*ue(i+1,m) + ue(i+2,m));
			
 
				+                  }
			
 
				+               }
			
 
				+
			
 
				+               if (end(1,c) > 0) {
			
 
				+                  for (m = 1; m <= 5; m++) {
			
 
				+                     i = cell_size(1,c)-3;
			
 
				+                     forcing(m,i,j,k,c) = forcing(m,i,j,k,c) - dssp *
			
 
				+                          (ue(i-2,m) - 4.0e0*ue(i-1,m) +
			
 
				+                          6.0e0*ue(i,m) - 4.0e0*ue(i+1,m));
			
 
				+                     i = cell_size(1,c)-2;
			
 
				+                     forcing(m,i,j,k,c) = forcing(m,i,j,k,c) - dssp *
			
 
				+                          (ue(i-2,m) - 4.0e0*ue(i-1,m) + 5.0e0*ue(i,m));
			
 
				+                  }
			
 
				+               }
			
 
				+
			
 
				+            }
			
 
				+         }
			
 
				+
			
 
				+//---------------------------------------------------------------------
			
 
				+//     eta-direction flux differences             
			
 
				+//---------------------------------------------------------------------
			
 
				+         for (k = start(3,c); k <= cell_size(3,c)-end(3,c)-1; k++) {
			
 
				+            zeta = (double)(k+cell_low(3,c)) * dnzm1;
			
 
				+            for (i = start(1,c); i <= cell_size(1,c)-end(1,c)-1; i++) {
			
 
				+               xi = (double)(i+cell_low(1,c)) * dnxm1;
			
 
				+
			
 
				+               for (j = -2*(1-start(2,c)); j <= cell_size(2,c)+1-2*end(2,c); j++) {
			
 
				+                  eta = (double)(j+cell_low(2,c)) * dnym1;
			
 
				+
			
 
				+                  exact_solution(xi, eta, zeta, dtemp);
			
 
				+                  for (m = 1; m <= 5; m++) {
			
 
				+                     ue(j,m) = dtemp(m);
			
 
				+                  }
			
 
				+                  
			
 
				+                  dtpp = 1.0e0/dtemp(1);
			
 
				+
			
 
				+                  for (m = 2; m <= 5; m++) {
			
 
				+                     buf(j,m) = dtpp * dtemp(m);
			
 
				+                  }
			
 
				+
			
 
				+                  cuf(j)   = buf(j,3) * buf(j,3);
			
 
				+                  buf(j,1) = cuf(j) + buf(j,2) * buf(j,2) + 
			
 
				+                       buf(j,4) * buf(j,4);
			
 
				+                  q(j) = 0.5e0*(buf(j,2)*ue(j,2) + buf(j,3)*ue(j,3) +
			
 
				+                       buf(j,4)*ue(j,4));
			
 
				+               }
			
 
				+
			
 
				+               for (j = start(2,c); j <= cell_size(2,c)-end(2,c)-1; j++) {
			
 
				+                  jm1 = j-1;
			
 
				+                  jp1 = j+1;
			
 
				+                  
			
 
				+                  forcing(1,i,j,k,c) = forcing(1,i,j,k,c) -
			
 
				+                       ty2*( ue(jp1,3)-ue(jm1,3) )+
			
 
				+                       dy1ty1*(ue(jp1,1)-2.0e0*ue(j,1)+ue(jm1,1));
			
 
				+
			
 
				+                  forcing(2,i,j,k,c) = forcing(2,i,j,k,c) - ty2*(
			
 
				+                       ue(jp1,2)*buf(jp1,3)-ue(jm1,2)*buf(jm1,3))+
			
 
				+                       yycon2*(buf(jp1,2)-2.0e0*buf(j,2)+buf(jm1,2))+
			
 
				+                       dy2ty1*( ue(jp1,2)-2.0* ue(j,2)+ ue(jm1,2));
			
 
				+
			
 
				+                  forcing(3,i,j,k,c) = forcing(3,i,j,k,c) - ty2*(
			
 
				+                       (ue(jp1,3)*buf(jp1,3)+c2*(ue(jp1,5)-q(jp1)))-
			
 
				+                       (ue(jm1,3)*buf(jm1,3)+c2*(ue(jm1,5)-q(jm1))))+
			
 
				+                       yycon1*(buf(jp1,3)-2.0e0*buf(j,3)+buf(jm1,3))+
			
 
				+                       dy3ty1*( ue(jp1,3)-2.0e0*ue(j,3) +ue(jm1,3));
			
 
				+
			
 
				+                  forcing(4,i,j,k,c) = forcing(4,i,j,k,c) - ty2*(
			
 
				+                       ue(jp1,4)*buf(jp1,3)-ue(jm1,4)*buf(jm1,3))+
			
 
				+                       yycon2*(buf(jp1,4)-2.0e0*buf(j,4)+buf(jm1,4))+
			
 
				+                       dy4ty1*( ue(jp1,4)-2.0e0*ue(j,4)+ ue(jm1,4));
			
 
				+
			
 
				+                  forcing(5,i,j,k,c) = forcing(5,i,j,k,c) - ty2*(
			
 
				+                       buf(jp1,3)*(c1*ue(jp1,5)-c2*q(jp1))-
			
 
				+                       buf(jm1,3)*(c1*ue(jm1,5)-c2*q(jm1)))+
			
 
				+                       0.5e0*yycon3*(buf(jp1,1)-2.0e0*buf(j,1)+
			
 
				+                       buf(jm1,1))+
			
 
				+                       yycon4*(cuf(jp1)-2.0e0*cuf(j)+cuf(jm1))+
			
 
				+                       yycon5*(buf(jp1,5)-2.0e0*buf(j,5)+buf(jm1,5))+
			
 
				+                       dy5ty1*(ue(jp1,5)-2.0e0*ue(j,5)+ue(jm1,5));
			
 
				+               }
			
 
				+
			
 
				+//---------------------------------------------------------------------
			
 
				+//     Fourth-order dissipation                      
			
 
				+//---------------------------------------------------------------------
			
 
				+               if (start(2,c) > 0) {
			
 
				+                  for (m = 1; m <= 5; m++) {
			
 
				+                     j = 1;
			
 
				+                     forcing(m,i,j,k,c) = forcing(m,i,j,k,c) - dssp *
			
 
				+                          (5.0e0*ue(j,m) - 4.0e0*ue(j+1,m) +ue(j+2,m));
			
 
				+                     j = 2;
			
 
				+                     forcing(m,i,j,k,c) = forcing(m,i,j,k,c) - dssp *
			
 
				+                          (-4.0e0*ue(j-1,m) + 6.0e0*ue(j,m) -
			
 
				+                          4.0e0*ue(j+1,m) +       ue(j+2,m));
			
 
				+                  }
			
 
				+               }
			
 
				+
			
 
				+               for (j = start(2,c)*3; j <= cell_size(2,c)-3*end(2,c)-1; j++) {
			
 
				+                  for (m = 1; m <= 5; m++) {
			
 
				+                     forcing(m,i,j,k,c) = forcing(m,i,j,k,c) - dssp*
			
 
				+                          (ue(j-2,m) - 4.0e0*ue(j-1,m) +
			
 
				+                          6.0e0*ue(j,m) - 4.0e0*ue(j+1,m) + ue(j+2,m));
			
 
				+                  }
			
 
				+               }
			
 
				+
			
 
				+               if (end(2,c) > 0) {
			
 
				+                  for (m = 1; m <= 5; m++) {
			
 
				+                     j = cell_size(2,c)-3;
			
 
				+                     forcing(m,i,j,k,c) = forcing(m,i,j,k,c) - dssp *
			
 
				+                          (ue(j-2,m) - 4.0e0*ue(j-1,m) +
			
 
				+                          6.0e0*ue(j,m) - 4.0e0*ue(j+1,m));
			
 
				+                     j = cell_size(2,c)-2;
			
 
				+                     forcing(m,i,j,k,c) = forcing(m,i,j,k,c) - dssp *
			
 
				+                          (ue(j-2,m) - 4.0e0*ue(j-1,m) + 5.0e0*ue(j,m));
			
 
				+
			
 
				+                  }
			
 
				+               }
			
 
				+
			
 
				+            }
			
 
				+         }
			
 
				+
			
 
				+//---------------------------------------------------------------------
			
 
				+//     zeta-direction flux differences                      
			
 
				+//---------------------------------------------------------------------
			
 
				+         for (j = start(2,c); j <= cell_size(2,c)-end(2,c)-1; j++) {
			
 
				+            eta = (double)(j+cell_low(2,c)) * dnym1;
			
 
				+            for (i = start(1,c); i <= cell_size(1,c)-end(1,c)-1; i++) {
			
 
				+               xi = (double)(i+cell_low(1,c)) * dnxm1;
			
 
				+
			
 
				+               for (k = -2*(1-start(3,c)); k <= cell_size(3,c)+1-2*end(3,c); k++) {
			
 
				+                  zeta = (double)(k+cell_low(3,c)) * dnzm1;
			
 
				+
			
 
				+                  exact_solution(xi, eta, zeta, dtemp);
			
 
				+                  for (m = 1; m <= 5; m++) {
			
 
				+                     ue(k,m) = dtemp(m);
			
 
				+                  }
			
 
				+
			
 
				+                  dtpp = 1.0e0/dtemp(1);
			
 
				+
			
 
				+                  for (m = 2; m <= 5; m++) {
			
 
				+                     buf(k,m) = dtpp * dtemp(m);
			
 
				+                  }
			
 
				+
			
 
				+                  cuf(k)   = buf(k,4) * buf(k,4);
			
 
				+                  buf(k,1) = cuf(k) + buf(k,2) * buf(k,2) + 
			
 
				+                       buf(k,3) * buf(k,3);
			
 
				+                  q(k) = 0.5e0*(buf(k,2)*ue(k,2) + buf(k,3)*ue(k,3) +
			
 
				+                       buf(k,4)*ue(k,4));
			
 
				+               }
			
 
				+
			
 
				+               for (k = start(3,c); k <= cell_size(3,c)-end(3,c)-1; k++) {
			
 
				+                  km1 = k-1;
			
 
				+                  kp1 = k+1;
			
 
				+                  
			
 
				+                  forcing(1,i,j,k,c) = forcing(1,i,j,k,c) -
			
 
				+                       tz2*( ue(kp1,4)-ue(km1,4) )+
			
 
				+                       dz1tz1*(ue(kp1,1)-2.0e0*ue(k,1)+ue(km1,1));
			
 
				+
			
 
				+                  forcing(2,i,j,k,c) = forcing(2,i,j,k,c) - tz2 * (
			
 
				+                       ue(kp1,2)*buf(kp1,4)-ue(km1,2)*buf(km1,4))+
			
 
				+                       zzcon2*(buf(kp1,2)-2.0e0*buf(k,2)+buf(km1,2))+
			
 
				+                       dz2tz1*( ue(kp1,2)-2.0e0* ue(k,2)+ ue(km1,2));
			
 
				+
			
 
				+                  forcing(3,i,j,k,c) = forcing(3,i,j,k,c) - tz2 * (
			
 
				+                       ue(kp1,3)*buf(kp1,4)-ue(km1,3)*buf(km1,4))+
			
 
				+                       zzcon2*(buf(kp1,3)-2.0e0*buf(k,3)+buf(km1,3))+
			
 
				+                       dz3tz1*(ue(kp1,3)-2.0e0*ue(k,3)+ue(km1,3));
			
 
				+
			
 
				+                  forcing(4,i,j,k,c) = forcing(4,i,j,k,c) - tz2 * (
			
 
				+                       (ue(kp1,4)*buf(kp1,4)+c2*(ue(kp1,5)-q(kp1)))-
			
 
				+                       (ue(km1,4)*buf(km1,4)+c2*(ue(km1,5)-q(km1))))+
			
 
				+                       zzcon1*(buf(kp1,4)-2.0e0*buf(k,4)+buf(km1,4))+
			
 
				+                       dz4tz1*( ue(kp1,4)-2.0e0*ue(k,4) +ue(km1,4));
			
 
				+
			
 
				+                  forcing(5,i,j,k,c) = forcing(5,i,j,k,c) - tz2 * (
			
 
				+                       buf(kp1,4)*(c1*ue(kp1,5)-c2*q(kp1))-
			
 
				+                       buf(km1,4)*(c1*ue(km1,5)-c2*q(km1)))+
			
 
				+                       0.5e0*zzcon3*(buf(kp1,1)-2.0e0*buf(k,1)
			
 
				+                       +buf(km1,1))+
			
 
				+                       zzcon4*(cuf(kp1)-2.0e0*cuf(k)+cuf(km1))+
			
 
				+                       zzcon5*(buf(kp1,5)-2.0e0*buf(k,5)+buf(km1,5))+
			
 
				+                       dz5tz1*( ue(kp1,5)-2.0e0*ue(k,5)+ ue(km1,5));
			
 
				+               }
			
 
				+
			
 
				+//---------------------------------------------------------------------
			
 
				+//     Fourth-order dissipation                        
			
 
				+//---------------------------------------------------------------------
			
 
				+               if (start(3,c) > 0) {
			
 
				+                  for (m = 1; m <= 5; m++) {
			
 
				+                     k = 1;
			
 
				+                     forcing(m,i,j,k,c) = forcing(m,i,j,k,c) - dssp *
			
 
				+                          (5.0e0*ue(k,m) - 4.0e0*ue(k+1,m) +ue(k+2,m));
			
 
				+                     k = 2;
			
 
				+                     forcing(m,i,j,k,c) = forcing(m,i,j,k,c) - dssp *
			
 
				+                          (-4.0e0*ue(k-1,m) + 6.0e0*ue(k,m) -
			
 
				+                          4.0e0*ue(k+1,m) +       ue(k+2,m));
			
 
				+                  }
			
 
				+               }
			
 
				+
			
 
				+               for (k = start(3,c)*3; k <= cell_size(3,c)-3*end(3,c)-1; k++) {
			
 
				+                  for (m = 1; m <= 5; m++) {
			
 
				+                     forcing(m,i,j,k,c) = forcing(m,i,j,k,c) - dssp*
			
 
				+                          (ue(k-2,m) - 4.0e0*ue(k-1,m) +
			
 
				+                          6.0e0*ue(k,m) - 4.0e0*ue(k+1,m) + ue(k+2,m));
			
 
				+                  }
			
 
				+               }
			
 
				+
			
 
				+               if (end(3,c) > 0) {
			
 
				+                  for (m = 1; m <= 5; m++) {
			
 
				+                     k = cell_size(3,c)-3;
			
 
				+                     forcing(m,i,j,k,c) = forcing(m,i,j,k,c) - dssp *
			
 
				+                          (ue(k-2,m) - 4.0e0*ue(k-1,m) +
			
 
				+                          6.0e0*ue(k,m) - 4.0e0*ue(k+1,m));
			
 
				+                     k = cell_size(3,c)-2;
			
 
				+                     forcing(m,i,j,k,c) = forcing(m,i,j,k,c) - dssp *
			
 
				+                          (ue(k-2,m) - 4.0e0*ue(k-1,m) + 5.0e0*ue(k,m));
			
 
				+                  }
			
 
				+               }
			
 
				+
			
 
				+            }
			
 
				+         }
			
 
				+
			
 
				+//---------------------------------------------------------------------
			
 
				+//     now change the sign of the forcing function, 
			
 
				+//---------------------------------------------------------------------
			
 
				+         for (k = start(3,c); k <= cell_size(3,c)-end(3,c)-1; k++) {
			
 
				+            for (j = start(2,c); j <= cell_size(2,c)-end(2,c)-1; j++) {
			
 
				+               for (i = start(1,c); i <= cell_size(1,c)-end(1,c)-1; i++) {
			
 
				+                  for (m = 1; m <= 5; m++) {
			
 
				+                     forcing(m,i,j,k,c) = -1.e0 * forcing(m,i,j,k,c);
			
 
				+                  }
			
 
				+               }
			
 
				+            }
			
 
				+         }
			
 
				+
			
 
				+      }
			
 
				+
			
 
				+      return;
			
 
				+}
			
--- a/RCCE_V2.0/apps/NPB/BT/.svn/text-base/exact_solution.c.svn-base
+++ b/RCCE_V2.0/apps/NPB/BT/.svn/text-base/exact_solution.c.svn-base
@@ -0,0 +1,43 @@
 
				+//---------------------------------------------------------------------
			
 
				+// 
			
 
				+// Copyright 2010 Intel Corporation
			
 
				+// 
			
 
				+//    Licensed under the Apache License, Version 2.0 (the "License");
			
 
				+//    you may not use this file except in compliance with the License.
			
 
				+//    You may obtain a copy of the License at
			
 
				+// 
			
 
				+//        http://www.apache.org/licenses/LICENSE-2.0
			
 
				+// 
			
 
				+//    Unless required by applicable law or agreed to in writing, software
			
 
				+//    distributed under the License is distributed on an "AS IS" BASIS,
			
 
				+//    WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
			
 
				+//    See the License for the specific language governing permissions and
			
 
				+//    limitations under the License.
			
 
				+// 
			
 
				+//---------------------------------------------------------------------
			
 
				+#include "header.h"
			
 
				+
			
 
				+void exact_solution(double xi,double eta,double zeta,double dtemp[]) {
			
 
				+
			
 
				+//---------------------------------------------------------------------
			
 
				+//---------------------------------------------------------------------
			
 
				+
			
 
				+//---------------------------------------------------------------------
			
 
				+//     this function returns the exact solution at point xi, eta, zeta  
			
 
				+//---------------------------------------------------------------------
			
 
				+
			
 
				+      int m;
			
 
				+#define dtemp(m) dtemp[m-1]
			
 
				+
			
 
				+      for (m = 1; m <= 5; m++) {
			
 
				+         dtemp(m) =  ce(m,1) +
			
 
				+           xi*(ce(m,2) + xi*(ce(m,5) + xi*(ce(m,8) + xi*ce(m,11)))) +
			
 
				+           eta*(ce(m,3) + eta*(ce(m,6) + eta*(ce(m,9) + eta*ce(m,12))))+
			
 
				+           zeta*(ce(m,4) + zeta*(ce(m,7) + zeta*(ce(m,10) + 
			
 
				+           zeta*ce(m,13))));
			
 
				+      }
			
 
				+
			
 
				+      return;
			
 
				+}
			
 
				+
			
 
				+
			
--- a/RCCE_V2.0/apps/NPB/BT/.svn/text-base/header.h.svn-base
+++ b/RCCE_V2.0/apps/NPB/BT/.svn/text-base/header.h.svn-base
@@ -0,0 +1,287 @@
 
				+//---------------------------------------------------------------------
			
 
				+//---------------------------------------------------------------------
			
 
				+//
			
 
				+//  header.h
			
 
				+//
			
 
				+//---------------------------------------------------------------------
			
 
				+//---------------------------------------------------------------------
			
 
				+#ifndef __HEADER_H
			
 
				+#define __HEADER_H
			
 
				+
			
 
				+//---------------------------------------------------------------------
			
 
				+// The following include file is generated automatically by the
			
 
				+// "setparams" utility. It defines 
			
 
				+//      maxcells:      the square root of the maximum number of processors
			
 
				+//      problem_size:  12, 64, 102, 162 (for class T, A, B, C)
			
 
				+//      dt_default:    default time step for this problem size if no
			
 
				+//                     config file
			
 
				+//      niter_default: default number of iterations for this problem size
			
 
				+//---------------------------------------------------------------------
			
 
				+
			
 
				+#include "npbparams.h"
			
 
				+#include "RCCE.h"
			
 
				+//we introduce the next definition to avoid confusing the compiler, which
			
 
				+//sometimes thinks the variable class is a reserved word
			
 
				+#define class _class_
			
 
				+#include "../common/common.h"
			
 
				+
			
 
				+#define AA 0
			
 
				+#define BB 1
			
 
				+#define CC 2
			
 
				+#define BLOCK_SIZE 5
			
 
				+
			
 
				+#define EAST   2000
			
 
				+#define WEST   3000
			
 
				+#define NORTH  4000
			
 
				+#define SOUTH  5000
			
 
				+#define BOTTOM 6000
			
 
				+#define TOP    7000
			
 
				+
			
 
				+#define WESTDIR   0
			
 
				+#define EASTDIR   1
			
 
				+#define SOUTHDIR  2
			
 
				+#define NORTHDIR  3
			
 
				+#define BOTTOMDIR 4
			
 
				+#define TOPDIR    5
			
 
				+
			
 
				+#define MAX_CELL_DIM ((PROBLEM_SIZE/MAXCELLS)+1)
			
 
				+#define IMAX MAX_CELL_DIM
			
 
				+#define JMAX MAX_CELL_DIM
			
 
				+#define KMAX MAX_CELL_DIM
			
 
				+
			
 
				+#define BUF_SIZE (MAX_CELL_DIM*MAX_CELL_DIM*(MAXCELLS-1)*60+1)
			
 
				+
			
 
				+#define SQR(x) (x)*(x)
			
 
				+
			
 
				+#define grid_points(m) grid_points[m-1]
			
 
				+#define ce(m,n) ce[(m-1)+5*(n-1)]
			
 
				+#define cell_coord(m,n) cell_coord[(m-1)+3*(n-1)]
			
 
				+#define cell_low(m,n) cell_low[(m-1)+3*(n-1)]
			
 
				+#define cell_high(m,n) cell_high[(m-1)+3*(n-1)]
			
 
				+#define cell_size(m,n) cell_size[(m-1)+3*(n-1)]
			
 
				+#define predecessor(m) predecessor[m-1]
			
 
				+#define slice(m,n) slice[(m-1)+3*(n-1)]
			
 
				+#define grid_size(m) grid_size[m-1]
			
 
				+#define successor(m) successor[m-1]
			
 
				+#define start(m,n) start[(m-1)+3*(n-1)]
			
 
				+#define end(m,n) end[(m-1)+3*(n-1)]
			
 
				+#define us(i,j,k,c) us[(i+1)+(IMAX+2)*((j+1)+(JMAX+2)*((k+1)+(KMAX+2)*(c-1)))]
			
 
				+#define vs(i,j,k,c) vs[(i+1)+(IMAX+2)*((j+1)+(JMAX+2)*((k+1)+(KMAX+2)*(c-1)))]
			
 
				+#define ws(i,j,k,c) ws[(i+1)+(IMAX+2)*((j+1)+(JMAX+2)*((k+1)+(KMAX+2)*(c-1)))]
			
 
				+#define qs(i,j,k,c) qs[(i+1)+(IMAX+2)*((j+1)+(JMAX+2)*((k+1)+(KMAX+2)*(c-1)))]
			
 
				+#define rho_i(i,j,k,c) rho_i[(i+1)+(IMAX+2)*((j+1)+(JMAX+2)*((k+1)+(KMAX+2)*(c-1)))]
			
 
				+#define square(i,j,k,c) square[(i+1)+(IMAX+2)*((j+1)+(JMAX+2)*((k+1)+(KMAX+2)*(c-1)))]
			
 
				+#define forcing(m,i,j,k,c) forcing[(m-1)+5*(i+IMAX*(j+JMAX*(k+KMAX*(c-1))))]
			
 
				+#define u(m,i,j,k,c) u[(m-1)+5*((i+2)+(IMAX+4)*((j+2)+(JMAX+4)*((k+2)+(KMAX+4)*(c-1))))]
			
 
				+#define rhs(m,i,j,k,c) rhs[(m-1)+5*((i+1)+(IMAX+1)*((j+1)+(JMAX+1)*((k+1)+(KMAX+1)*(c-1))))]
			
 
				+#define lhsc(m,n,i,j,k,c) lhsc[(m-1)+5*((n-1)+5*((i+1)+(IMAX+1)*((j+1)+(JMAX+1)*((k+1)+(KMAX+1)*(c-1)))))]
			
 
				+#define backsub_info(m,i,j,c) backsub_info[(m-1)+5*((i)+(IMAX+1)*((j)+(JMAX+1)*(c-1)))]
			
 
				+#define in_buffer(i) in_buffer[i-1]
			
 
				+#define out_buffer(i) out_buffer[i-1]
			
 
				+#define cv(m) cv[m+2]
			
 
				+#define rhon(m) rhon[m+2]
			
 
				+#define rhos(m) rhos[m+2]
			
 
				+#define rhoq(m) rhoq[m+2]
			
 
				+#define cuf(m) cuf[m+2]
			
 
				+#define q(m) q[m+2]
			
 
				+#define ue(m,n) ue[(m+2)+(MAX_CELL_DIM+4)*(n-1)]
			
 
				+#define buf(m,n) buf[(m+2)+(MAX_CELL_DIM+4)*(n-1)]
			
 
				+#define sum(m) sum[m-1]
			
 
				+#define xce_sub(m) xce_sub[m-1]
			
 
				+
			
 
				+
			
 
				+#ifdef G_MAIN
			
 
				+      int     ncells, grid_points[3];
			
 
				+      double  elapsed_time;
			
 
				+
			
 
				+      double  tx1, tx2, tx3, ty1, ty2, ty3, tz1, tz2, tz3, 
			
 
				+                        dx1, dx2, dx3, dx4, dx5, dy1, dy2, dy3, dy4, 
			
 
				+                        dy5, dz1, dz2, dz3, dz4, dz5, dssp, dt, 
			
 
				+                        ce[5*13], dxmax, dymax, dzmax, xxcon1, xxcon2, 
			
 
				+                        xxcon3, xxcon4, xxcon5, dx1tx1, dx2tx1, dx3tx1,
			
 
				+                        dx4tx1, dx5tx1, yycon1, yycon2, yycon3, yycon4,
			
 
				+                        yycon5, dy1ty1, dy2ty1, dy3ty1, dy4ty1, dy5ty1,
			
 
				+                        zzcon1, zzcon2, zzcon3, zzcon4, zzcon5, dz1tz1, 
			
 
				+                        dz2tz1, dz3tz1, dz4tz1, dz5tz1, dnxm1, dnym1, 
			
 
				+                        dnzm1, c1c2, c1c5, c3c4, c1345, conz1, c1, c2, 
			
 
				+                        c3, c4, c5, c4dssp, c5dssp, dtdssp, dttx1, bt,
			
 
				+                        dttx2, dtty1, dtty2, dttz1, dttz2, c2dttx1, 
			
 
				+                        c2dtty1, c2dttz1, comz1, comz4, comz5, comz6, 
			
 
				+                        c3c4tx3, c3c4ty3, c3c4tz3, c2iv, con43, con16;
			
 
				+
			
 
				+      int     cell_coord[MAXCELLS*3], cell_low[MAXCELLS*3], 
			
 
				+              cell_high[MAXCELLS*3],  cell_size[MAXCELLS*3],
			
 
				+              predecessor[3],         slice[MAXCELLS*3],
			
 
				+              grid_size[3],           successor[3],
			
 
				+              start[MAXCELLS*3],      end[MAXCELLS*3];
			
 
				+
			
 
				+      double 
			
 
				+         us      [(IMAX+2)*(JMAX+2)*(KMAX+2)*MAXCELLS],
			
 
				+         vs      [(IMAX+2)*(JMAX+2)*(KMAX+2)*MAXCELLS],
			
 
				+         ws      [(IMAX+2)*(JMAX+2)*(KMAX+2)*MAXCELLS],
			
 
				+         qs      [(IMAX+2)*(JMAX+2)*(KMAX+2)*MAXCELLS],
			
 
				+         rho_i   [(IMAX+2)*(JMAX+2)*(KMAX+2)*MAXCELLS],
			
 
				+         square  [(IMAX+2)*(JMAX+2)*(KMAX+2)*MAXCELLS],
			
 
				+         forcing [5*IMAX*JMAX*KMAX*MAXCELLS],
			
 
				+         u       [5*(IMAX+4)*(JMAX+4)*(KMAX+4)*MAXCELLS],
			
 
				+         rhs     [5*(IMAX+1)*(JMAX+1)*(KMAX+1)*MAXCELLS],
			
 
				+         lhsc    [5*5*(IMAX+1)*(JMAX+1)*(KMAX+1)*MAXCELLS],
			
 
				+         backsub_info [5*(MAX_CELL_DIM+1)*(MAX_CELL_DIM+1)*MAXCELLS],
			
 
				+         in_buffer[BUF_SIZE], out_buffer[BUF_SIZE];
			
 
				+
			
 
				+      double cv[MAX_CELL_DIM+4],   rhon[MAX_CELL_DIM+4],
			
 
				+             rhos[MAX_CELL_DIM+4], rhoq[MAX_CELL_DIM+4],
			
 
				+             cuf[MAX_CELL_DIM+4],  q[MAX_CELL_DIM+4],
			
 
				+             ue[(MAX_CELL_DIM+4)*5], buf[(MAX_CELL_DIM+4)*5];
			
 
				+
			
 
				+      int  west_size, east_size, bottom_size, top_size,
			
 
				+               north_size, south_size, start_send_west, 
			
 
				+               start_send_east, start_send_south, start_send_north,
			
 
				+               start_send_bottom, start_send_top, start_recv_west,
			
 
				+               start_recv_east, start_recv_south, start_recv_north,
			
 
				+               start_recv_bottom, start_recv_top;
			
 
				+//
			
 
				+//     These are used by btio
			
 
				+//
			
 
				+      int collbuf_nodes, collbuf_size, iosize,
			
 
				+              idump, record_length,
			
 
				+              idump_sub, rd_interval;
			
 
				+      double sum[NITER_DEFAULT], xce_sub[5];
			
 
				+      long int iseek;
			
 
				+      int    send_color[6], recv_color[6];
			
 
				+#else
			
 
				+extern int     ncells, grid_points[3];
			
 
				+extern double  elapsed_time;
			
 
				+
			
 
				+extern double  tx1, tx2, tx3, ty1, ty2, ty3, tz1, tz2, tz3, 
			
 
				+                        dx1, dx2, dx3, dx4, dx5, dy1, dy2, dy3, dy4, 
			
 
				+                        dy5, dz1, dz2, dz3, dz4, dz5, dssp, dt, 
			
 
				+                        ce[5*13], dxmax, dymax, dzmax, xxcon1, xxcon2, 
			
 
				+                        xxcon3, xxcon4, xxcon5, dx1tx1, dx2tx1, dx3tx1,
			
 
				+                        dx4tx1, dx5tx1, yycon1, yycon2, yycon3, yycon4,
			
 
				+                        yycon5, dy1ty1, dy2ty1, dy3ty1, dy4ty1, dy5ty1,
			
 
				+                        zzcon1, zzcon2, zzcon3, zzcon4, zzcon5, dz1tz1, 
			
 
				+                        dz2tz1, dz3tz1, dz4tz1, dz5tz1, dnxm1, dnym1, 
			
 
				+                        dnzm1, c1c2, c1c5, c3c4, c1345, conz1, c1, c2, 
			
 
				+                        c3, c4, c5, c4dssp, c5dssp, dtdssp, dttx1, bt,
			
 
				+                        dttx2, dtty1, dtty2, dttz1, dttz2, c2dttx1, 
			
 
				+                        c2dtty1, c2dttz1, comz1, comz4, comz5, comz6, 
			
 
				+                        c3c4tx3, c3c4ty3, c3c4tz3, c2iv, con43, con16;
			
 
				+
			
 
				+extern int    cell_coord[MAXCELLS*3], cell_low[MAXCELLS*3], 
			
 
				+              cell_high[MAXCELLS*3],  cell_size[MAXCELLS*3],
			
 
				+              predecessor[3],         slice[MAXCELLS*3],
			
 
				+              grid_size[3],           successor[3],
			
 
				+              start[MAXCELLS*3],      end[MAXCELLS*3];
			
 
				+
			
 
				+extern double 
			
 
				+         us      [(IMAX+2)*(JMAX+2)*(KMAX+2)*MAXCELLS],
			
 
				+         vs      [(IMAX+2)*(JMAX+2)*(KMAX+2)*MAXCELLS],
			
 
				+         ws      [(IMAX+2)*(JMAX+2)*(KMAX+2)*MAXCELLS],
			
 
				+         qs      [(IMAX+2)*(JMAX+2)*(KMAX+2)*MAXCELLS],
			
 
				+         rho_i   [(IMAX+2)*(JMAX+2)*(KMAX+2)*MAXCELLS],
			
 
				+         square  [(IMAX+2)*(JMAX+2)*(KMAX+2)*MAXCELLS],
			
 
				+         forcing [5*IMAX*JMAX*KMAX*MAXCELLS],
			
 
				+         u       [5*(IMAX+4)*(JMAX+4)*(KMAX+4)*MAXCELLS],
			
 
				+         rhs     [5*(IMAX+1)*(JMAX+1)*(KMAX+1)*MAXCELLS],
			
 
				+         lhsc    [5*5*(IMAX+1)*(JMAX+1)*(KMAX+1)*MAXCELLS],
			
 
				+         backsub_info [5*(MAX_CELL_DIM+1)*(MAX_CELL_DIM+1)*MAXCELLS],
			
 
				+         in_buffer[BUF_SIZE], out_buffer[BUF_SIZE];
			
 
				+
			
 
				+extern double cv[MAX_CELL_DIM+4],   rhon[MAX_CELL_DIM+4],
			
 
				+             rhos[MAX_CELL_DIM+4], rhoq[MAX_CELL_DIM+4],
			
 
				+             cuf[MAX_CELL_DIM+4],  q[MAX_CELL_DIM+4],
			
 
				+             ue[(MAX_CELL_DIM+4)*5], buf[(MAX_CELL_DIM+4)*5];
			
 
				+
			
 
				+extern int  west_size, east_size, bottom_size, top_size,
			
 
				+               north_size, south_size, start_send_west, 
			
 
				+               start_send_east, start_send_south, start_send_north,
			
 
				+               start_send_bottom, start_send_top, start_recv_west,
			
 
				+               start_recv_east, start_recv_south, start_recv_north,
			
 
				+               start_recv_bottom, start_recv_top;
			
 
				+
			
 
				+//
			
 
				+//     These are used by btio
			
 
				+//
			
 
				+extern int collbuf_nodes, collbuf_size, iosize,
			
 
				+              idump, record_length,
			
 
				+              idump_sub, rd_interval;
			
 
				+extern double sum[NITER_DEFAULT], xce_sub[5];
			
 
				+extern long int iseek;
			
 
				+extern int    send_color[6], recv_color[6];
			
 
				+
			
 
				+#endif /*G_MAIN*/
			
 
				+
			
 
				+extern void matvec_sub(double ablock[], double avec[], double bvec[]);
			
 
				+extern void matmul_sub(double ablock[], double bblock[], double cblock[]);
			
 
				+extern void binvcrhs( double lhs[], double c[], double r[] );
			
 
				+extern void binvrhs( double lhs[], double r[] );
			
 
				+extern void exact_solution(double xi,double eta,double zeta,double dtemp[]);
			
 
				+
			
 
				+extern int setup_mpi(int *argc, char ***argv);
			
 
				+extern void make_set(void);
			
 
				+extern void set_constants(void);
			
 
				+extern void lhsinit(void);
			
 
				+extern void lhsabinit(double lhsa[], double lhsb[], int size);
			
 
				+extern void initialize(void);
			
 
				+extern void exact_rhs(void);
			
 
				+extern void compute_buffer_size(int c);
			
 
				+extern void adi(void);
			
 
				+extern void compute_rhs(void);
			
 
				+extern void copy_faces(void);
			
 
				+extern void x_solve(void);
			
 
				+extern void y_solve(void);
			
 
				+extern void z_solve(void);
			
 
				+extern void add(void);
			
 
				+extern void verify(int niter, char *class, int *verified);
			
 
				+extern void error_norm(double rms[]);
			
 
				+extern void rhs_norm(double rms[]);
			
 
				+
			
 
				+extern void setup_btio(void);
			
 
				+extern void output_timestep(void);
			
 
				+extern void btio_cleanup(void);
			
 
				+extern void btio_verify(int *verified);
			
 
				+extern void accumulate_norms(double xce[]);
			
 
				+extern void clear_timestep(void);
			
 
				+
			
 
				+#endif
			
 
				+
			
 
				+#ifdef _OPENMP
			
 
				+#pragma omp threadprivate (cell_coord, cell_low, cell_high,  cell_size)
			
 
				+#pragma omp threadprivate (predecessor, slice, grid_size, successor)
			
 
				+#pragma omp threadprivate (start, end)
			
 
				+
			
 
				+#pragma omp threadprivate (ncells, grid_points, elapsed_time)
			
 
				+#pragma omp threadprivate (tx1, tx2, tx3, ty1, ty2, ty3, tz1, tz2, tz3, \
			
 
				+                           dx1, dx2, dx3, dx4, dx5, dy1, dy2, dy3, dy4, \
			
 
				+                           dy5, dz1, dz2, dz3, dz4, dz5, dssp, dt, \
			
 
				+                           ce, dxmax, dymax, dzmax, xxcon1, xxcon2, \
			
 
				+                           xxcon3, xxcon4, xxcon5, dx1tx1, dx2tx1, dx3tx1, \
			
 
				+                           dx4tx1, dx5tx1, yycon1, yycon2, yycon3, yycon4, \
			
 
				+                           yycon5, dy1ty1, dy2ty1, dy3ty1, dy4ty1, dy5ty1, \
			
 
				+                           zzcon1, zzcon2, zzcon3, zzcon4, zzcon5, dz1tz1, \
			
 
				+                           dz2tz1, dz3tz1, dz4tz1, dz5tz1, dnxm1, dnym1, \
			
 
				+                           dnzm1, c1c2, c1c5, c3c4, c1345, conz1, c1, c2, \
			
 
				+                           c3, c4, c5, c4dssp, c5dssp, dtdssp, dttx1, bt, \
			
 
				+                           dttx2, dtty1, dtty2, dttz1, dttz2, c2dttx1, \
			
 
				+                           c2dtty1, c2dttz1, comz1, comz4, comz5, comz6, \
			
 
				+                           c3c4tx3, c3c4ty3, c3c4tz3, c2iv, con43, con16)
			
 
				+
			
 
				+#pragma omp threadprivate (us, vs, ws, qs, rho_i, square, forcing, \
			
 
				+                           u, rhs, lhsc, backsub_info, in_buffer, out_buffer)
			
 
				+
			
 
				+#pragma omp threadprivate (cv, rhon, rhos, rhoq, cuf, q, ue, buf)
			
 
				+
			
 
				+#pragma omp threadprivate (west_size, east_size, bottom_size, top_size, \
			
 
				+                           north_size, south_size, start_send_west, \
			
 
				+                           start_send_east, start_send_south, start_send_north, \
			
 
				+                           start_send_bottom, start_send_top, start_recv_west, \
			
 
				+                           start_recv_east, start_recv_south, start_recv_north, \
			
 
				+                           start_recv_bottom, start_recv_top, send_color, recv_color)
			
 
				+//
			
 
				+//     These are used by btio
			
 
				+//
			
 
				+#pragma omp threadprivate (collbuf_nodes, collbuf_size, iosize, idump,\
			
 
				+                           record_length, idump_sub, rd_interval, \
			
 
				+                           sum, xce_sub, iseek)
			
 
				+#endif
			
--- a/RCCE_V2.0/apps/NPB/BT/.svn/text-base/initialize.c.svn-base
+++ b/RCCE_V2.0/apps/NPB/BT/.svn/text-base/initialize.c.svn-base
@@ -0,0 +1,321 @@
 
				+//---------------------------------------------------------------------
			
 
				+// 
			
 
				+// Copyright 2010 Intel Corporation
			
 
				+// 
			
 
				+//    Licensed under the Apache License, Version 2.0 (the "License");
			
 
				+//    you may not use this file except in compliance with the License.
			
 
				+//    You may obtain a copy of the License at
			
 
				+// 
			
 
				+//        http://www.apache.org/licenses/LICENSE-2.0
			
 
				+// 
			
 
				+//    Unless required by applicable law or agreed to in writing, software
			
 
				+//    distributed under the License is distributed on an "AS IS" BASIS,
			
 
				+//    WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
			
 
				+//    See the License for the specific language governing permissions and
			
 
				+//    limitations under the License.
			
 
				+// 
			
 
				+//---------------------------------------------------------------------
			
 
				+#include "header.h"
			
 
				+
			
 
				+void  initialize() {
			
 
				+
			
 
				+//---------------------------------------------------------------------
			
 
				+//---------------------------------------------------------------------
			
 
				+
			
 
				+//---------------------------------------------------------------------
			
 
				+//     This subroutine initializes the field variable u using 
			
 
				+//     tri-linear transfinite interpolation of the boundary values     
			
 
				+//---------------------------------------------------------------------
			
 
				+      
			
 
				+      int c, i, j, k, m, ii, jj, kk, ix, iy, iz;
			
 
				+      double xi, eta, zeta, Pface[5*3*2], Pxi, Peta, 
			
 
				+           Pzeta, temp[5];
			
 
				+#define Pface(m,n,i) Pface[(m-1)+5*((n-1)+3*(i-1))]
			
 
				+#define temp(m) temp[m-1]
			
 
				+
			
 
				+//---------------------------------------------------------------------
			
 
				+//  Later (in compute_rhs) we compute 1/u for every element. A few of 
			
 
				+//  the corner elements are not used, but it convenient (and faster) 
			
 
				+//  to compute the whole thing with a simple loop. Make sure those 
			
 
				+//  values are nonzero by initializing the whole thing here. 
			
 
				+//---------------------------------------------------------------------
			
 
				+      for (c = 1; c <= ncells; c++) {
			
 
				+         for (kk = -1; kk <= KMAX; kk++) {
			
 
				+            for (jj = -1; jj <= JMAX; jj++) {
			
 
				+               for (ii = -1; ii <= IMAX; ii++) {
			
 
				+                  for (m = 1; m <= 5; m++) {
			
 
				+                     u(m, ii, jj, kk, c) = 1.0;
			
 
				+                  }
			
 
				+               }
			
 
				+            }
			
 
				+         }
			
 
				+      }
			
 
				+//---------------------------------------------------------------------
			
 
				+
			
 
				+
			
 
				+
			
 
				+//---------------------------------------------------------------------
			
 
				+//     first store the "interpolated" values everywhere on the grid    
			
 
				+//---------------------------------------------------------------------
			
 
				+      for (c = 1; c <= ncells; c++) {
			
 
				+         kk = 0;
			
 
				+         for (k = cell_low(3,c); k <= cell_high(3,c); k++) {
			
 
				+            zeta = (double)(k) * dnzm1;
			
 
				+            jj = 0;
			
 
				+            for (j = cell_low(2,c); j <= cell_high(2,c); j++) {
			
 
				+               eta = (double)(j) * dnym1;
			
 
				+               ii = 0;
			
 
				+               for (i = cell_low(1,c); i <= cell_high(1,c); i++) {
			
 
				+                  xi = (double)(i) * dnxm1;
			
 
				+                  
			
 
				+                  for (ix = 1; ix <= 2; ix++) {
			
 
				+                     exact_solution((double)(ix-1), eta, zeta, 
			
 
				+                          &Pface(1,1,ix));
			
 
				+                  }
			
 
				+
			
 
				+                  for (iy = 1; iy <= 2; iy++) {
			
 
				+                     exact_solution(xi, (double)(iy-1) , zeta, 
			
 
				+                          &Pface(1,2,iy));
			
 
				+                  }
			
 
				+
			
 
				+                  for (iz = 1; iz <= 2; iz++) {
			
 
				+                     exact_solution(xi, eta, (double)(iz-1),   
			
 
				+                          &Pface(1,3,iz));
			
 
				+                  }
			
 
				+
			
 
				+                  for (m = 1; m <= 5; m++) {
			
 
				+                     Pxi   = xi   * Pface(m,1,2) + 
			
 
				+                          (1.0e0-xi)   * Pface(m,1,1);
			
 
				+                     Peta  = eta  * Pface(m,2,2) + 
			
 
				+                          (1.0e0-eta)  * Pface(m,2,1);
			
 
				+                     Pzeta = zeta * Pface(m,3,2) + 
			
 
				+                          (1.0e0-zeta) * Pface(m,3,1);
			
 
				+                     
			
 
				+                     u(m,ii,jj,kk,c) = Pxi + Peta + Pzeta - 
			
 
				+                          Pxi*Peta - Pxi*Pzeta - Peta*Pzeta + 
			
 
				+                          Pxi*Peta*Pzeta;
			
 
				+
			
 
				+                  }
			
 
				+                  ii = ii + 1;
			
 
				+               }
			
 
				+               jj = jj + 1;
			
 
				+            }
			
 
				+            kk = kk+1;
			
 
				+         }
			
 
				+      }
			
 
				+
			
 
				+//---------------------------------------------------------------------
			
 
				+//     now store the exact values on the boundaries        
			
 
				+//---------------------------------------------------------------------
			
 
				+
			
 
				+//---------------------------------------------------------------------
			
 
				+//     west face                                                  
			
 
				+//---------------------------------------------------------------------
			
 
				+      c = slice(1,1);
			
 
				+      ii = 0;
			
 
				+      xi = 0.0e0;
			
 
				+      kk = 0;
			
 
				+      for (k = cell_low(3,c); k <= cell_high(3,c); k++) {
			
 
				+         zeta = (double)(k) * dnzm1;
			
 
				+         jj = 0;
			
 
				+         for (j = cell_low(2,c); j <= cell_high(2,c); j++) {
			
 
				+            eta = (double)(j) * dnym1;
			
 
				+            exact_solution(xi, eta, zeta, temp);
			
 
				+            for (m = 1; m <= 5; m++) {
			
 
				+               u(m,ii,jj,kk,c) = temp(m);
			
 
				+            }
			
 
				+            jj = jj + 1;
			
 
				+         }
			
 
				+         kk = kk + 1;
			
 
				+      }
			
 
				+
			
 
				+//---------------------------------------------------------------------
			
 
				+//     east face                                                      
			
 
				+//---------------------------------------------------------------------
			
 
				+      c  = slice(1,ncells);
			
 
				+      ii = cell_size(1,c)-1;
			
 
				+      xi = 1.0e0;
			
 
				+      kk = 0;
			
 
				+      for (k = cell_low(3,c); k <= cell_high(3,c); k++) {
			
 
				+         zeta = (double)(k) * dnzm1;
			
 
				+         jj = 0;
			
 
				+         for (j = cell_low(2,c); j <= cell_high(2,c); j++) {
			
 
				+            eta = (double)(j) * dnym1;
			
 
				+            exact_solution(xi, eta, zeta, temp);
			
 
				+            for (m = 1; m <= 5; m++) {
			
 
				+               u(m,ii,jj,kk,c) = temp(m);
			
 
				+            }
			
 
				+            jj = jj + 1;
			
 
				+         }
			
 
				+         kk = kk + 1;
			
 
				+      }
			
 
				+
			
 
				+//---------------------------------------------------------------------
			
 
				+//     south face                                                 
			
 
				+//---------------------------------------------------------------------
			
 
				+      c = slice(2,1);
			
 
				+      jj = 0;
			
 
				+      eta = 0.0e0;
			
 
				+      kk = 0;
			
 
				+      for (k = cell_low(3,c); k <= cell_high(3,c); k++) {
			
 
				+         zeta = (double)(k) * dnzm1;
			
 
				+         ii = 0;
			
 
				+         for (i = cell_low(1,c); i <= cell_high(1,c); i++) {
			
 
				+            xi = (double)(i) * dnxm1;
			
 
				+            exact_solution(xi, eta, zeta, temp);
			
 
				+            for (m = 1; m <= 5; m++) {
			
 
				+               u(m,ii,jj,kk,c) = temp(m);
			
 
				+            }
			
 
				+            ii = ii + 1;
			
 
				+         }
			
 
				+         kk = kk + 1;
			
 
				+      }
			
 
				+
			
 
				+
			
 
				+//---------------------------------------------------------------------
			
 
				+//     north face                                    
			
 
				+//---------------------------------------------------------------------
			
 
				+      c = slice(2,ncells);
			
 
				+      jj = cell_size(2,c)-1;
			
 
				+      eta = 1.0e0;
			
 
				+      kk = 0;
			
 
				+      for (k = cell_low(3,c); k <= cell_high(3,c); k++) {
			
 
				+         zeta = (double)(k) * dnzm1;
			
 
				+         ii = 0;
			
 
				+         for (i = cell_low(1,c); i <= cell_high(1,c); i++) {
			
 
				+            xi = (double)(i) * dnxm1;
			
 
				+            exact_solution(xi, eta, zeta, temp);
			
 
				+            for (m = 1; m <= 5; m++) {
			
 
				+               u(m,ii,jj,kk,c) = temp(m);
			
 
				+            }
			
 
				+            ii = ii + 1;
			
 
				+         }
			
 
				+         kk = kk + 1;
			
 
				+      }
			
 
				+
			
 
				+//---------------------------------------------------------------------
			
 
				+//     bottom face                                       
			
 
				+//---------------------------------------------------------------------
			
 
				+      c = slice(3,1);
			
 
				+      kk = 0;
			
 
				+      zeta = 0.0e0;
			
 
				+      jj = 0;
			
 
				+      for (j = cell_low(2,c); j <= cell_high(2,c); j++) {
			
 
				+         eta = (double)(j) * dnym1;
			
 
				+         ii = 0;
			
 
				+         for (i = cell_low(1,c); i <= cell_high(1,c); i++) {
			
 
				+            xi = (double)(i) *dnxm1;
			
 
				+            exact_solution(xi, eta, zeta, temp);
			
 
				+            for (m = 1; m <= 5; m++) {
			
 
				+               u(m,ii,jj,kk,c) = temp(m);
			
 
				+            }
			
 
				+            ii = ii + 1;
			
 
				+         }
			
 
				+         jj = jj + 1;
			
 
				+      }
			
 
				+
			
 
				+//---------------------------------------------------------------------
			
 
				+//     top face     
			
 
				+//---------------------------------------------------------------------
			
 
				+      c = slice(3,ncells);
			
 
				+      kk = cell_size(3,c)-1;
			
 
				+      zeta = 1.0e0;
			
 
				+      jj = 0;
			
 
				+      for (j = cell_low(2,c); j <= cell_high(2,c); j++) {
			
 
				+         eta = (double)(j) * dnym1;
			
 
				+         ii = 0;
			
 
				+         for (i = cell_low(1,c); i <= cell_high(1,c); i++) {
			
 
				+            xi = (double)(i) * dnxm1;
			
 
				+            exact_solution(xi, eta, zeta, temp);
			
 
				+            for (m = 1; m <= 5; m++) {
			
 
				+               u(m,ii,jj,kk,c) = temp(m);
			
 
				+            }
			
 
				+            ii = ii + 1;
			
 
				+         }
			
 
				+         jj = jj + 1;
			
 
				+      }
			
 
				+
			
 
				+      return;
			
 
				+}
			
 
				+
			
 
				+
			
 
				+//---------------------------------------------------------------------
			
 
				+//---------------------------------------------------------------------
			
 
				+
			
 
				+void lhsinit() {
			
 
				+
			
 
				+//---------------------------------------------------------------------
			
 
				+//---------------------------------------------------------------------
			
 
				+      
			
 
				+      int i, j, k, d, c, m, n;
			
 
				+
			
 
				+//---------------------------------------------------------------------
			
 
				+//     loop over all cells                                       
			
 
				+//---------------------------------------------------------------------
			
 
				+      for (c = 1; c <= ncells; c++) {
			
 
				+
			
 
				+//---------------------------------------------------------------------
			
 
				+//     first, initialize the start and end arrays
			
 
				+//---------------------------------------------------------------------
			
 
				+         for (d = 1; d <= 3; d++) {
			
 
				+            if (cell_coord(d,c) == 1) {
			
 
				+               start(d,c) = 1;
			
 
				+            } else {
			
 
				+               start(d,c) = 0;
			
 
				+            }
			
 
				+            if (cell_coord(d,c) == ncells) {
			
 
				+               end(d,c) = 1;
			
 
				+            } else {
			
 
				+               end(d,c) = 0;
			
 
				+            }
			
 
				+         }
			
 
				+
			
 
				+//---------------------------------------------------------------------
			
 
				+//     zero the whole left hand side for starters
			
 
				+//---------------------------------------------------------------------
			
 
				+         for (k = 0; k <= cell_size(3,c)-1; k++) {
			
 
				+            for (j = 0; j <= cell_size(2,c)-1; j++) {
			
 
				+               for (i = 0; i <= cell_size(1,c)-1; i++) {
			
 
				+                  for (m = 1; m <= 5; m++) {
			
 
				+                     for (n = 1; n <= 5; n++) {
			
 
				+                        lhsc(m,n,i,j,k,c) = 0.0e0;
			
 
				+                     }
			
 
				+                  }
			
 
				+               }
			
 
				+            }
			
 
				+         }
			
 
				+
			
 
				+      }
			
 
				+
			
 
				+      return;
			
 
				+}
			
 
				+
			
 
				+
			
 
				+//---------------------------------------------------------------------
			
 
				+//---------------------------------------------------------------------
			
 
				+
			
 
				+void lhsabinit(double lhsa[], double lhsb[], int size) {
			
 
				+
			
 
				+#define lhsa(m,n,i) lhsa[(m-1)+5*((n-1)+5*(i+1))]
			
 
				+#define lhsb(m,n,i) lhsb[(m-1)+5*((n-1)+5*(i+1))]
			
 
				+
			
 
				+      int i, m, n;
			
 
				+
			
 
				+//---------------------------------------------------------------------
			
 
				+//     next, set all diagonal values to 1. This is overkill, but convenient
			
 
				+//---------------------------------------------------------------------
			
 
				+      for (i = 0; i <= size; i++) {
			
 
				+         for (m = 1; m <= 5; m++) {
			
 
				+            for (n = 1; n <= 5; n++) {
			
 
				+               lhsa(m,n,i) = 0.0e0;
			
 
				+               lhsb(m,n,i) = 0.0e0;
			
 
				+            }
			
 
				+            lhsb(m,m,i) = 1.0e0;
			
 
				+         }
			
 
				+      }
			
 
				+
			
 
				+      return;
			
 
				+}
			
 
				+
			
 
				+
			
 
				+
			
--- a/RCCE_V2.0/apps/NPB/BT/.svn/text-base/inputbt.data.sample.svn-base
+++ b/RCCE_V2.0/apps/NPB/BT/.svn/text-base/inputbt.data.sample.svn-base
@@ -0,0 +1,5 @@
 
				+200       number of time steps
			
 
				+0.0008d0  dt for class A = 0.0008d0. class B = 0.0003d0  class C = 0.0001d0
			
 
				+64 64 64
			
 
				+5 0        write interval (optional read interval) for BTIO
			
 
				+0 1000000  number of nodes in collective buffering and buffer size for BTIO
			
--- a/RCCE_V2.0/apps/NPB/BT/.svn/text-base/make_set.c.svn-base
+++ b/RCCE_V2.0/apps/NPB/BT/.svn/text-base/make_set.c.svn-base
@@ -0,0 +1,222 @@
 
				+//---------------------------------------------------------------------
			
 
				+// 
			
 
				+// Copyright 2010 Intel Corporation
			
 
				+// 
			
 
				+//    Licensed under the Apache License, Version 2.0 (the "License");
			
 
				+//    you may not use this file except in compliance with the License.
			
 
				+//    You may obtain a copy of the License at
			
 
				+// 
			
 
				+//        http://www.apache.org/licenses/LICENSE-2.0
			
 
				+// 
			
 
				+//    Unless required by applicable law or agreed to in writing, software
			
 
				+//    distributed under the License is distributed on an "AS IS" BASIS,
			
 
				+//    WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
			
 
				+//    See the License for the specific language governing permissions and
			
 
				+//    limitations under the License.
			
 
				+// 
			
 
				+//---------------------------------------------------------------------
			
 
				+#include <stdio.h>
			
 
				+#include <stdlib.h>
			
 
				+#include <math.h>
			
 
				+#include "header.h"
			
 
				+#include "mpinpb.h"
			
 
				+
			
 
				+#define mod(p,q) ((p)%(q))
			
 
				+#define max(x,y)      ((x)>(y)? (x) : (y))
			
 
				+#define min(x,y)      ((x)<(y)? (x) : (y))
			
 
				+
			
 
				+void make_set() {
			
 
				+
			
 
				+//---------------------------------------------------------------------
			
 
				+//---------------------------------------------------------------------
			
 
				+
			
 
				+//---------------------------------------------------------------------
			
 
				+//     This function allocates space for a set of cells and fills the set
			
 
				+//     such that communication between cells on different nodes is only
			
 
				+//     nearest neighbor
			
 
				+//---------------------------------------------------------------------
			
 
				+
			
 
				+
			
 
				+      int p, i, j, c, dir, size, excess, ierr,ierrcode;
			
 
				+
			
 
				+//---------------------------------------------------------------------
			
 
				+//     compute square root; add small number to allow for roundoff
			
 
				+//     (note: this is computed in setup_mpi.f also, but prefer to do
			
 
				+//     it twice because of some include file problems).
			
 
				+//---------------------------------------------------------------------
			
 
				+      ncells = (int)(sqrt((double)(no_nodes) + 0.00001e0));
			
 
				+
			
 
				+//---------------------------------------------------------------------
			
 
				+//     this makes coding easier
			
 
				+//---------------------------------------------------------------------
			
 
				+      p = ncells;
			
 
				+      
			
 
				+//---------------------------------------------------------------------
			
 
				+//     determine the location of the cell at the bottom of the 3D 
			
 
				+//     array of cells
			
 
				+//---------------------------------------------------------------------
			
 
				+      cell_coord(1,1) = mod(node,p) ;
			
 
				+      cell_coord(2,1) = node/p ;
			
 
				+      cell_coord(3,1) = 0;
			
 
				+
			
 
				+//---------------------------------------------------------------------
			
 
				+//     set the cell_coords for cells in the rest of the z-layers; 
			
 
				+//     this comes down to a simple linear numbering in the z-direct-
			
 
				+//     ion, and to the doubly-cyclic numbering in the other dirs     
			
 
				+//---------------------------------------------------------------------
			
 
				+      for (c = 2; c <= p; c++) {
			
 
				+         cell_coord(1,c) = mod(cell_coord(1,c-1)+1,p) ;
			
 
				+         cell_coord(2,c) = mod(cell_coord(2,c-1)-1+p,p) ;
			
 
				+         cell_coord(3,c) = c-1;
			
 
				+      }
			
 
				+
			
 
				+//---------------------------------------------------------------------
			
 
				+//     offset all the coordinates by 1 to adjust for Fortran arrays
			
 
				+//---------------------------------------------------------------------
			
 
				+      for (dir = 1; dir <= 3; dir++) {
			
 
				+         for (c = 1; c <= p; c++) {
			
 
				+            cell_coord(dir,c) = cell_coord(dir,c) + 1;
			
 
				+         }
			
 
				+      }
			
 
				+      
			
 
				+//---------------------------------------------------------------------
			
 
				+//     slice(dir,n) contains the sequence number of the cell that is in
			
 
				+//     coordinate plane n in the dir direction
			
 
				+//---------------------------------------------------------------------
			
 
				+      for (dir = 1; dir <= 3; dir++) {
			
 
				+         for (c = 1; c <= p; c++) {
			
 
				+            slice(dir,cell_coord(dir,c)) = c;
			
 
				+         }
			
 
				+      }
			
 
				+
			
 
				+
			
 
				+//---------------------------------------------------------------------
			
 
				+//     fill the predecessor and successor entries, using the indices 
			
 
				+//     of the bottom cells (they are the same at each level of k 
			
 
				+//     anyway) acting as if full periodicity pertains; note that p is
			
 
				+//     added to those arguments to the mod functions that might
			
 
				+//     otherwise return wrong values when using the modulo function
			
 
				+//---------------------------------------------------------------------
			
 
				+      i = cell_coord(1,1)-1;
			
 
				+      j = cell_coord(2,1)-1;
			
 
				+
			
 
				+      predecessor(1) = mod(i-1+p,p) + p*j;
			
 
				+      predecessor(2) = i + p*mod(j-1+p,p);
			
 
				+      predecessor(3) = mod(i+1,p) + p*mod(j-1+p,p);
			
 
				+      successor(1)   = mod(i+1,p) + p*j;
			
 
				+      successor(2)   = i + p*mod(j+1,p);
			
 
				+      successor(3)   = mod(i-1+p,p) + p*mod(j+1,p);
			
 
				+
			
 
				+//---------------------------------------------------------------------
			
 
				+//     now compute the sizes of the cells                                
			
 
				+//---------------------------------------------------------------------
			
 
				+      for (dir = 1; dir <= 3; dir++) {
			
 
				+//---------------------------------------------------------------------
			
 
				+//     set cell_coord range for each direction                           
			
 
				+//---------------------------------------------------------------------
			
 
				+         size   = grid_points(dir)/p;
			
 
				+         excess = mod(grid_points(dir),p);
			
 
				+         for (c = 1; c <= ncells; c++) {
			
 
				+            if (cell_coord(dir,c) <= excess) {
			
 
				+               cell_size(dir,c) = size+1;
			
 
				+               cell_low(dir,c) = (cell_coord(dir,c)-1)*(size+1);
			
 
				+               cell_high(dir,c) = cell_low(dir,c)+size;
			
 
				+            } else {
			
 
				+               cell_size(dir,c) = size;
			
 
				+               cell_low(dir,c)  = excess*(size+1)+
			
 
				+                    (cell_coord(dir,c)-excess-1)*size;
			
 
				+               cell_high(dir,c) = cell_low(dir,c)+size-1;
			
 
				+            }
			
 
				+            if (cell_size(dir, c) <= 2) {
			
 
				+               printf(" Error: Cell size too small. Min size is 3\n");
			
 
				+               ierrcode = 1;
			
 
				+               exit(1);
			
 
				+            }
			
 
				+         }
			
 
				+      }
			
 
				+
			
 
				+      return;
			
 
				+}
			
 
				+
			
 
				+//---------------------------------------------------------------------
			
 
				+//---------------------------------------------------------------------
			
 
				+
			
 
				+
			
 
				+void make_color() {
			
 
				+
			
 
				+//---------------------------------------------------------------------
			
 
				+//---------------------------------------------------------------------
			
 
				+
			
 
				+//---------------------------------------------------------------------
			
 
				+//     This function determines cycles in the communication graphs in
			
 
				+//     the six coordinate directions, and colors the ranks so they know
			
 
				+//     how to construct deadlock-free blocking communication schedules
			
 
				+//---------------------------------------------------------------------
			
 
				+
			
 
				+      int p, i, j, dir, node_loc, comm_color, node_min, length, start_found;
			
 
				+
			
 
				+//---------------------------------------------------------------------
			
 
				+//     compute square root; add small number to allow for roundoff
			
 
				+//     (note: this is computed in setup_mpi.f also, but prefer to do
			
 
				+//     it twice because of some include file problems).
			
 
				+//---------------------------------------------------------------------
			
 
				+      ncells = (int)(sqrt((double)(no_nodes) + 0.00001e0));
			
 
				+
			
 
				+//---------------------------------------------------------------------
			
 
				+//     this makes coding easier
			
 
				+//---------------------------------------------------------------------
			
 
				+      p = ncells;
			
 
				+
			
 
				+      for (dir = 0; dir<6; dir++) {
			
 
				+
			
 
				+        node_loc = node_min = node; length = 1; start_found = 0;
			
 
				+        while (!start_found) {
			
 
				+          i = mod(node_loc,p) ;
			
 
				+          j = node_loc/p ;
			
 
				+
			
 
				+          switch (dir) {
			
 
				+            case (WESTDIR):   node_loc = mod(i-1+p,p) + p*j;          break;
			
 
				+            case (EASTDIR):   node_loc = mod(i+1,p) + p*j;            break;
			
 
				+            case (SOUTHDIR):  node_loc = i + p*mod(j-1+p,p);          break;
			
 
				+            case (NORTHDIR):  node_loc = i + p*mod(j+1,p);            break;
			
 
				+            case (BOTTOMDIR): node_loc = mod(i+1,p) + p*mod(j-1+p,p); break;
			
 
				+            case (TOPDIR):    node_loc = mod(i-1+p,p) + p*mod(j+1,p); break;
			
 
				+          }
			
 
				+
			
 
				+          // the next block ensures that the node with the lowest rank
			
 
				+          // in this cycle is colored WHITE (=0), and that nodes an even
			
 
				+          // number of jumps removed from that lowest-ranked member
			
 
				+          // are also white. The others are RED (1).
			
 
				+          if (node_loc <= node_min) {
			
 
				+            node_min = node_loc;
			
 
				+            comm_color = 0;
			
 
				+          } else comm_color = !comm_color;
			
 
				+          if (node_loc == node) start_found = 1;
			
 
				+          else length++;
			
 
				+        }
			
 
				+        send_color[dir] = comm_color;
			
 
				+        recv_color[dir] = !send_color[dir];
			
 
				+        // if the number of nodes in this cycle is odd, we need to treat the 
			
 
				+        // last node before the "start" of the cycle differently
			
 
				+        if (length%2) {
			
 
				+          if (node == node_min) recv_color[dir] = 2;
			
 
				+          i = mod(node,p) ;
			
 
				+          j = node/p ;
			
 
				+          switch (dir) {
			
 
				+            case (WESTDIR):   node_loc = mod(i-1+p,p) + p*j;          break;
			
 
				+            case (EASTDIR):   node_loc = mod(i+1,p) + p*j;            break;
			
 
				+            case (SOUTHDIR):  node_loc = i + p*mod(j-1+p,p);          break;
			
 
				+            case (NORTHDIR):  node_loc = i + p*mod(j+1,p);            break;
			
 
				+            case (BOTTOMDIR): node_loc = mod(i+1,p) + p*mod(j-1+p,p); break;
			
 
				+            case (TOPDIR):    node_loc = mod(i-1+p,p) + p*mod(j+1,p); break;
			
 
				+          }      
			
 
				+          if (node_loc == node_min) send_color[dir] = 2;
			
 
				+        }
			
 
				+      }
			
 
				+     return;
			
 
				+}
			
 
				+
			
 
				+//---------------------------------------------------------------------
			
 
				+//---------------------------------------------------------------------
			
 
				+
			
 
				+
			
--- a/RCCE_V2.0/apps/NPB/BT/.svn/text-base/mpinpb.h.svn-base
+++ b/RCCE_V2.0/apps/NPB/BT/.svn/text-base/mpinpb.h.svn-base
@@ -0,0 +1,34 @@
 
				+
			
 
				+//---------------------------------------------------------------------
			
 
				+// 
			
 
				+// Copyright 2010 Intel Corporation
			
 
				+// 
			
 
				+//    Licensed under the Apache License, Version 2.0 (the "License");
			
 
				+//    you may not use this file except in compliance with the License.
			
 
				+//    You may obtain a copy of the License at
			
 
				+// 
			
 
				+//        http://www.apache.org/licenses/LICENSE-2.0
			
 
				+// 
			
 
				+//    Unless required by applicable law or agreed to in writing, software
			
 
				+//    distributed under the License is distributed on an "AS IS" BASIS,
			
 
				+//    WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
			
 
				+//    See the License for the specific language governing permissions and
			
 
				+//    limitations under the License.
			
 
				+// 
			
 
				+//---------------------------------------------------------------------
			
 
				+#ifndef __MPINPB_H
			
 
				+#define __MPINPB_H
			
 
				+
			
 
				+#ifdef G_MAIN
			
 
				+       int           node, no_nodes, total_nodes, root;
			
 
				+       int           active;
			
 
				+#else
			
 
				+extern int           node, no_nodes, total_nodes, root;
			
 
				+extern int           active;
			
 
				+
			
 
				+#endif
			
 
				+#ifdef _OPENMP
			
 
				+#pragma omp threadprivate (node, no_nodes, total_nodes, root, active)
			
 
				+#endif
			
 
				+#endif
			
 
				+
			
--- a/RCCE_V2.0/apps/NPB/BT/.svn/text-base/print_results.c.svn-base
+++ b/RCCE_V2.0/apps/NPB/BT/.svn/text-base/print_results.c.svn-base
@@ -0,0 +1,104 @@
 
				+/*****************************************************************/
			
 
				+/******     C  _  P  R  I  N  T  _  R  E  S  U  L  T  S     ******/
			
 
				+/*****************************************************************/
			
 
				+// 
			
 
				+// Copyright 2010 Intel Corporation
			
 
				+// 
			
 
				+//    Licensed under the Apache License, Version 2.0 (the "License");
			
 
				+//    you may not use this file except in compliance with the License.
			
 
				+//    You may obtain a copy of the License at
			
 
				+// 
			
 
				+//        http://www.apache.org/licenses/LICENSE-2.0
			
 
				+// 
			
 
				+//    Unless required by applicable law or agreed to in writing, software
			
 
				+//    distributed under the License is distributed on an "AS IS" BASIS,
			
 
				+//    WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
			
 
				+//    See the License for the specific language governing permissions and
			
 
				+//    limitations under the License.
			
 
				+// 
			
 
				+#include <stdlib.h>
			
 
				+#include <stdio.h>
			
 
				+#define class _class_
			
 
				+
			
 
				+void print_results( char   *name,
			
 
				+                      char   class,
			
 
				+                      int    n1, 
			
 
				+                      int    n2,
			
 
				+                      int    n3,
			
 
				+                      int    niter,
			
 
				+                      int    nprocs_compiled,
			
 
				+                      int    nprocs_total,
			
 
				+                      double t,
			
 
				+                      double mops,
			
 
				+		      char   *optype,
			
 
				+                      int    passed_verification,
			
 
				+                      char   *npbversion,
			
 
				+                      char   *compiletime,
			
 
				+                      char   *mpicc,
			
 
				+                      char   *clink,
			
 
				+                      char   *cmpi_lib,
			
 
				+                      char   *cmpi_inc,
			
 
				+                      char   *cflags,
			
 
				+                      char   *clinkflags )
			
 
				+{
			
 
				+    char *evalue="1000";
			
 
				+
			
 
				+    printf( "\n\n %s Benchmark Completed\n", name ); 
			
 
				+
			
 
				+    printf( " Class           =                        %c\n", class );
			
 
				+
			
 
				+    printf( " Size            =            %3dx %3dx %3d\n", n1,n2,n3 );
			
 
				+
			
 
				+    printf( " Iterations      =             %12d\n", niter );
			
 
				+ 
			
 
				+    printf( " Time in seconds =             %12.2f\n", t );
			
 
				+
			
 
				+    printf( " Total processes =             %12d\n", nprocs_total );
			
 
				+
			
 
				+    if ( nprocs_compiled != 0 )
			
 
				+        printf( " Compiled procs  =             %12d\n", nprocs_compiled );
			
 
				+
			
 
				+    printf( " Mop/s total     =             %12.2f\n", mops );
			
 
				+
			
 
				+    printf( " Mop/s/process   =             %12.2f\n", mops/((float) nprocs_total) );
			
 
				+
			
 
				+    printf( " Operation type  = %24s\n", optype);
			
 
				+
			
 
				+    if( passed_verification )
			
 
				+        printf( " Verification    =               SUCCESSFUL\n" );
			
 
				+    else
			
 
				+        printf( " Verification    =             UNSUCCESSFUL\n" );
			
 
				+
			
 
				+    printf( " Version         =             %12s\n", npbversion );
			
 
				+
			
 
				+    printf( " Compile date    =             %12s\n", compiletime );
			
 
				+
			
 
				+    printf( "\n Compile options:\n" );
			
 
				+
			
 
				+    printf( "    MPICC        = %s\n", mpicc );
			
 
				+
			
 
				+    printf( "    CLINK        = %s\n", clink );
			
 
				+
			
 
				+    printf( "    CMPI_LIB     = %s\n", cmpi_lib );
			
 
				+
			
 
				+    printf( "    CMPI_INC     = %s\n", cmpi_inc );
			
 
				+
			
 
				+    printf( "    CFLAGS       = %s\n", cflags );
			
 
				+
			
 
				+    printf( "    CLINKFLAGS   = %s\n", clinkflags );
			
 
				+#ifdef SMP
			
 
				+    evalue = getenv("MP_SET_NUMTHREADS");
			
 
				+    printf( "   MULTICPUS = %s\n", evalue );
			
 
				+#endif
			
 
				+
			
 
				+    printf( "\n\n" );
			
 
				+    printf( " Please send the results of this run to:\n\n" );
			
 
				+    printf( " NPB Development Team\n" );
			
 
				+    printf( " Internet: npb@nas.nasa.gov\n \n" );
			
 
				+    printf( " If email is not available, send this to:\n\n" );
			
 
				+    printf( " MS T27A-1\n" );
			
 
				+    printf( " NASA Ames Research Center\n" );
			
 
				+    printf( " Moffett Field, CA  94035-1000\n\n" );
			
 
				+    printf( " Fax: 650-604-3957\n\n" );
			
 
				+}
			
 
				+ 
			
--- a/RCCE_V2.0/apps/NPB/BT/.svn/text-base/rhs.c.svn-base
+++ b/RCCE_V2.0/apps/NPB/BT/.svn/text-base/rhs.c.svn-base
@@ -0,0 +1,439 @@
 
				+//---------------------------------------------------------------------
			
 
				+// 
			
 
				+// Copyright 2010 Intel Corporation
			
 
				+// 
			
 
				+//    Licensed under the Apache License, Version 2.0 (the "License");
			
 
				+//    you may not use this file except in compliance with the License.
			
 
				+//    You may obtain a copy of the License at
			
 
				+// 
			
 
				+//        http://www.apache.org/licenses/LICENSE-2.0
			
 
				+// 
			
 
				+//    Unless required by applicable law or agreed to in writing, software
			
 
				+//    distributed under the License is distributed on an "AS IS" BASIS,
			
 
				+//    WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
			
 
				+//    See the License for the specific language governing permissions and
			
 
				+//    limitations under the License.
			
 
				+// 
			
 
				+//---------------------------------------------------------------------
			
 
				+#include "header.h"
			
 
				+
			
 
				+void compute_rhs() {
			
 
				+
			
 
				+//---------------------------------------------------------------------
			
 
				+//---------------------------------------------------------------------
			
 
				+
			
 
				+      int c, i, j, k, m;
			
 
				+      double rho_inv, uijk, up1, um1, vijk, vp1, vm1,
			
 
				+           wijk, wp1, wm1;
			
 
				+
			
 
				+
			
 
				+//---------------------------------------------------------------------
			
 
				+//     loop over all cells owned by this node                           
			
 
				+//---------------------------------------------------------------------
			
 
				+      for (c = 1; c <= ncells; c++) {
			
 
				+
			
 
				+//---------------------------------------------------------------------
			
 
				+//     compute the reciprocal of density, and the kinetic energy, 
			
 
				+//     and the speed of sound.
			
 
				+//---------------------------------------------------------------------
			
 
				+         for (k = -1; k <= cell_size(3,c); k++) {
			
 
				+            for (j = -1; j <= cell_size(2,c); j++) {
			
 
				+               for (i = -1; i <= cell_size(1,c); i++) {
			
 
				+                  rho_inv = 1.0e0/u(1,i,j,k,c);
			
 
				+                  rho_i(i,j,k,c) = rho_inv;
			
 
				+                  us(i,j,k,c) = u(2,i,j,k,c) * rho_inv;
			
 
				+                  vs(i,j,k,c) = u(3,i,j,k,c) * rho_inv;
			
 
				+                  ws(i,j,k,c) = u(4,i,j,k,c) * rho_inv;
			
 
				+                  square(i,j,k,c)     = 0.5e0* (
			
 
				+                       u(2,i,j,k,c)*u(2,i,j,k,c) + 
			
 
				+                       u(3,i,j,k,c)*u(3,i,j,k,c) +
			
 
				+                       u(4,i,j,k,c)*u(4,i,j,k,c) ) * rho_inv;
			
 
				+                  qs(i,j,k,c) = square(i,j,k,c) * rho_inv;
			
 
				+               }
			
 
				+            }
			
 
				+         }
			
 
				+
			
 
				+//---------------------------------------------------------------------
			
 
				+// copy the exact forcing term to the right hand side;  because 
			
 
				+// this forcing term is known, we can store it on the whole of every 
			
 
				+// cell,  including the boundary                   
			
 
				+//---------------------------------------------------------------------
			
 
				+
			
 
				+         for (k = 0; k <= cell_size(3,c)-1; k++) {
			
 
				+            for (j = 0; j <= cell_size(2,c)-1; j++) {
			
 
				+               for (i = 0; i <= cell_size(1,c)-1; i++) {
			
 
				+                  for (m = 1; m <= 5; m++) {
			
 
				+                     rhs(m,i,j,k,c) = forcing(m,i,j,k,c);
			
 
				+                  }
			
 
				+               }
			
 
				+            }
			
 
				+         }
			
 
				+
			
 
				+
			
 
				+//---------------------------------------------------------------------
			
 
				+//     compute xi-direction fluxes 
			
 
				+//---------------------------------------------------------------------
			
 
				+         for (k = start(3,c); k <= cell_size(3,c)-end(3,c)-1; k++) {
			
 
				+            for (j = start(2,c); j <= cell_size(2,c)-end(2,c)-1; j++) {
			
 
				+               for (i = start(1,c); i <= cell_size(1,c)-end(1,c)-1; i++) {
			
 
				+                  uijk = us(i,j,k,c);
			
 
				+                  up1  = us(i+1,j,k,c);
			
 
				+                  um1  = us(i-1,j,k,c);
			
 
				+
			
 
				+                  rhs(1,i,j,k,c) = rhs(1,i,j,k,c) + dx1tx1 * 
			
 
				+                       (u(1,i+1,j,k,c) - 2.0e0*u(1,i,j,k,c) + 
			
 
				+                       u(1,i-1,j,k,c)) -
			
 
				+                       tx2 * (u(2,i+1,j,k,c) - u(2,i-1,j,k,c));
			
 
				+
			
 
				+                  rhs(2,i,j,k,c) = rhs(2,i,j,k,c) + dx2tx1 * 
			
 
				+                       (u(2,i+1,j,k,c) - 2.0e0*u(2,i,j,k,c) + 
			
 
				+                       u(2,i-1,j,k,c)) +
			
 
				+                       xxcon2*con43 * (up1 - 2.0e0*uijk + um1) -
			
 
				+                       tx2 * (u(2,i+1,j,k,c)*up1 - 
			
 
				+                       u(2,i-1,j,k,c)*um1 +
			
 
				+                       (u(5,i+1,j,k,c)- square(i+1,j,k,c)-
			
 
				+                       u(5,i-1,j,k,c)+ square(i-1,j,k,c))*
			
 
				+                       c2);
			
 
				+
			
 
				+                  rhs(3,i,j,k,c) = rhs(3,i,j,k,c) + dx3tx1 * 
			
 
				+                       (u(3,i+1,j,k,c) - 2.0e0*u(3,i,j,k,c) +
			
 
				+                       u(3,i-1,j,k,c)) +
			
 
				+                       xxcon2 * (vs(i+1,j,k,c) - 2.0e0*vs(i,j,k,c) +
			
 
				+                       vs(i-1,j,k,c)) -
			
 
				+                       tx2 * (u(3,i+1,j,k,c)*up1 - 
			
 
				+                       u(3,i-1,j,k,c)*um1);
			
 
				+
			
 
				+                  rhs(4,i,j,k,c) = rhs(4,i,j,k,c) + dx4tx1 * 
			
 
				+                       (u(4,i+1,j,k,c) - 2.0e0*u(4,i,j,k,c) +
			
 
				+                       u(4,i-1,j,k,c)) +
			
 
				+                       xxcon2 * (ws(i+1,j,k,c) - 2.0e0*ws(i,j,k,c) +
			
 
				+                       ws(i-1,j,k,c)) -
			
 
				+                       tx2 * (u(4,i+1,j,k,c)*up1 - 
			
 
				+                       u(4,i-1,j,k,c)*um1);
			
 
				+
			
 
				+                  rhs(5,i,j,k,c) = rhs(5,i,j,k,c) + dx5tx1 * 
			
 
				+                       (u(5,i+1,j,k,c) - 2.0e0*u(5,i,j,k,c) +
			
 
				+                       u(5,i-1,j,k,c)) +
			
 
				+                       xxcon3 * (qs(i+1,j,k,c) - 2.0e0*qs(i,j,k,c) +
			
 
				+                       qs(i-1,j,k,c)) +
			
 
				+                       xxcon4 * (up1*up1 -       2.0e0*uijk*uijk + 
			
 
				+                       um1*um1) +
			
 
				+                       xxcon5 * (u(5,i+1,j,k,c)*rho_i(i+1,j,k,c) - 
			
 
				+                       2.0e0*u(5,i,j,k,c)*rho_i(i,j,k,c) +
			
 
				+                       u(5,i-1,j,k,c)*rho_i(i-1,j,k,c)) -
			
 
				+                       tx2 * ( (c1*u(5,i+1,j,k,c) - 
			
 
				+                       c2*square(i+1,j,k,c))*up1 -
			
 
				+                       (c1*u(5,i-1,j,k,c) - 
			
 
				+                       c2*square(i-1,j,k,c))*um1 );
			
 
				+               }
			
 
				+            }
			
 
				+         }
			
 
				+
			
 
				+//---------------------------------------------------------------------
			
 
				+//     add fourth order xi-direction dissipation               
			
 
				+//---------------------------------------------------------------------
			
 
				+         if (start(1,c) > 0) {
			
 
				+            for (k = start(3,c); k <= cell_size(3,c)-end(3,c)-1; k++) {
			
 
				+               for (j = start(2,c); j <= cell_size(2,c)-end(2,c)-1; j++) {
			
 
				+                  i = 1;
			
 
				+                  for (m = 1; m <= 5; m++) {
			
 
				+                     rhs(m,i,j,k,c) = rhs(m,i,j,k,c)- dssp * 
			
 
				+                          ( 5.0e0*u(m,i,j,k,c) - 4.0e0*u(m,i+1,j,k,c) +
			
 
				+                          u(m,i+2,j,k,c));
			
 
				+                  }
			
 
				+
			
 
				+                  i = 2;
			
 
				+                  for (m = 1; m <= 5; m++) {
			
 
				+                     rhs(m,i,j,k,c) = rhs(m,i,j,k,c) - dssp * 
			
 
				+                          (-4.0e0*u(m,i-1,j,k,c) + 6.0e0*u(m,i,j,k,c) -
			
 
				+                          4.0e0*u(m,i+1,j,k,c) + u(m,i+2,j,k,c));
			
 
				+                  }
			
 
				+               }
			
 
				+            }
			
 
				+         }
			
 
				+
			
 
				+         for (k = start(3,c); k <= cell_size(3,c)-end(3,c)-1; k++) {
			
 
				+            for (j = start(2,c); j <= cell_size(2,c)-end(2,c)-1; j++) {
			
 
				+               for (i = 3*start(1,c); i <= cell_size(1,c)-3*end(1,c)-1; i++) {
			
 
				+                  for (m = 1; m <= 5; m++) {
			
 
				+                     rhs(m,i,j,k,c) = rhs(m,i,j,k,c) - dssp * 
			
 
				+                          (  u(m,i-2,j,k,c) - 4.0e0*u(m,i-1,j,k,c) + 
			
 
				+                          6.0*u(m,i,j,k,c) - 4.0e0*u(m,i+1,j,k,c) + 
			
 
				+                          u(m,i+2,j,k,c) );
			
 
				+                  }
			
 
				+               }
			
 
				+            }
			
 
				+         }
			
 
				+         
			
 
				+
			
 
				+         if (end(1,c) > 0) {
			
 
				+            for (k = start(3,c); k <= cell_size(3,c)-end(3,c)-1; k++) {
			
 
				+               for (j = start(2,c); j <= cell_size(2,c)-end(2,c)-1; j++) {
			
 
				+                  i = cell_size(1,c)-3;
			
 
				+                  for (m = 1; m <= 5; m++) {
			
 
				+                     rhs(m,i,j,k,c) = rhs(m,i,j,k,c) - dssp *
			
 
				+                          ( u(m,i-2,j,k,c) - 4.0e0*u(m,i-1,j,k,c) + 
			
 
				+                          6.0e0*u(m,i,j,k,c) - 4.0e0*u(m,i+1,j,k,c) );
			
 
				+                  }
			
 
				+
			
 
				+                  i = cell_size(1,c)-2;
			
 
				+                  for (m = 1; m <= 5; m++) {
			
 
				+                     rhs(m,i,j,k,c) = rhs(m,i,j,k,c) - dssp *
			
 
				+                          ( u(m,i-2,j,k,c) - 4.e0*u(m,i-1,j,k,c) +
			
 
				+                          5.e0*u(m,i,j,k,c) );
			
 
				+                  }
			
 
				+               }
			
 
				+            }
			
 
				+         }
			
 
				+
			
 
				+//---------------------------------------------------------------------
			
 
				+//     compute eta-direction fluxes 
			
 
				+//---------------------------------------------------------------------
			
 
				+         for (k = start(3,c); k <= cell_size(3,c)-end(3,c)-1; k++) {
			
 
				+            for (j = start(2,c); j <= cell_size(2,c)-end(2,c)-1; j++) {
			
 
				+               for (i = start(1,c); i <= cell_size(1,c)-end(1,c)-1; i++) {
			
 
				+                  vijk = vs(i,j,k,c);
			
 
				+                  vp1  = vs(i,j+1,k,c);
			
 
				+                  vm1  = vs(i,j-1,k,c);
			
 
				+                  rhs(1,i,j,k,c) = rhs(1,i,j,k,c) + dy1ty1 * 
			
 
				+                       (u(1,i,j+1,k,c) - 2.0e0*u(1,i,j,k,c) + 
			
 
				+                       u(1,i,j-1,k,c)) -
			
 
				+                       ty2 * (u(3,i,j+1,k,c) - u(3,i,j-1,k,c));
			
 
				+                  rhs(2,i,j,k,c) = rhs(2,i,j,k,c) + dy2ty1 * 
			
 
				+                       (u(2,i,j+1,k,c) - 2.0e0*u(2,i,j,k,c) + 
			
 
				+                       u(2,i,j-1,k,c)) +
			
 
				+                       yycon2 * (us(i,j+1,k,c) - 2.0e0*us(i,j,k,c) + 
			
 
				+                       us(i,j-1,k,c)) -
			
 
				+                       ty2 * (u(2,i,j+1,k,c)*vp1 - 
			
 
				+                       u(2,i,j-1,k,c)*vm1);
			
 
				+                  rhs(3,i,j,k,c) = rhs(3,i,j,k,c) + dy3ty1 * 
			
 
				+                       (u(3,i,j+1,k,c) - 2.0e0*u(3,i,j,k,c) + 
			
 
				+                       u(3,i,j-1,k,c)) +
			
 
				+                       yycon2*con43 * (vp1 - 2.0e0*vijk + vm1) -
			
 
				+                       ty2 * (u(3,i,j+1,k,c)*vp1 - 
			
 
				+                       u(3,i,j-1,k,c)*vm1 +
			
 
				+                       (u(5,i,j+1,k,c) - square(i,j+1,k,c) - 
			
 
				+                       u(5,i,j-1,k,c) + square(i,j-1,k,c))
			
 
				+                       *c2);
			
 
				+                  rhs(4,i,j,k,c) = rhs(4,i,j,k,c) + dy4ty1 * 
			
 
				+                       (u(4,i,j+1,k,c) - 2.0e0*u(4,i,j,k,c) + 
			
 
				+                       u(4,i,j-1,k,c)) +
			
 
				+                       yycon2 * (ws(i,j+1,k,c) - 2.0e0*ws(i,j,k,c) + 
			
 
				+                       ws(i,j-1,k,c)) -
			
 
				+                       ty2 * (u(4,i,j+1,k,c)*vp1 - 
			
 
				+                       u(4,i,j-1,k,c)*vm1);
			
 
				+                  rhs(5,i,j,k,c) = rhs(5,i,j,k,c) + dy5ty1 * 
			
 
				+                       (u(5,i,j+1,k,c) - 2.0e0*u(5,i,j,k,c) + 
			
 
				+                       u(5,i,j-1,k,c)) +
			
 
				+                       yycon3 * (qs(i,j+1,k,c) - 2.0e0*qs(i,j,k,c) + 
			
 
				+                       qs(i,j-1,k,c)) +
			
 
				+                       yycon4 * (vp1*vp1       - 2.0e0*vijk*vijk + 
			
 
				+                       vm1*vm1) +
			
 
				+                       yycon5 * (u(5,i,j+1,k,c)*rho_i(i,j+1,k,c) - 
			
 
				+                       2.0e0*u(5,i,j,k,c)*rho_i(i,j,k,c) +
			
 
				+                       u(5,i,j-1,k,c)*rho_i(i,j-1,k,c)) -
			
 
				+                       ty2 * ((c1*u(5,i,j+1,k,c) - 
			
 
				+                       c2*square(i,j+1,k,c)) * vp1 -
			
 
				+                       (c1*u(5,i,j-1,k,c) - 
			
 
				+                       c2*square(i,j-1,k,c)) * vm1);
			
 
				+               }
			
 
				+            }
			
 
				+         }
			
 
				+
			
 
				+//---------------------------------------------------------------------
			
 
				+//     add fourth order eta-direction dissipation         
			
 
				+//---------------------------------------------------------------------
			
 
				+         if (start(2,c) > 0) {
			
 
				+            for (k = start(3,c); k <= cell_size(3,c)-end(3,c)-1; k++) {
			
 
				+               j = 1;
			
 
				+               for (i = start(1,c); i <= cell_size(1,c)-end(1,c)-1; i++) {
			
 
				+                  for (m = 1; m <= 5; m++) {
			
 
				+                     rhs(m,i,j,k,c) = rhs(m,i,j,k,c)- dssp * 
			
 
				+                          ( 5.0e0*u(m,i,j,k,c) - 4.0e0*u(m,i,j+1,k,c) +
			
 
				+                          u(m,i,j+2,k,c));
			
 
				+                  }
			
 
				+               }
			
 
				+
			
 
				+               j = 2;
			
 
				+               for (i = start(1,c); i <= cell_size(1,c)-end(1,c)-1; i++) {
			
 
				+                  for (m = 1; m <= 5; m++) {
			
 
				+                     rhs(m,i,j,k,c) = rhs(m,i,j,k,c) - dssp * 
			
 
				+                          (-4.0e0*u(m,i,j-1,k,c) + 6.0e0*u(m,i,j,k,c) -
			
 
				+                          4.0e0*u(m,i,j+1,k,c) + u(m,i,j+2,k,c));
			
 
				+                  }
			
 
				+               }
			
 
				+            }
			
 
				+         }
			
 
				+
			
 
				+         for (k = start(3,c); k <= cell_size(3,c)-end(3,c)-1; k++) {
			
 
				+            for (j = 3*start(2,c); j <= cell_size(2,c)-3*end(2,c)-1; j++) {
			
 
				+               for (i = start(1,c); i <= cell_size(1,c)-end(1,c)-1; i++) {
			
 
				+                  for (m = 1; m <= 5; m++) {
			
 
				+                     rhs(m,i,j,k,c) = rhs(m,i,j,k,c) - dssp * 
			
 
				+                          (  u(m,i,j-2,k,c) - 4.0e0*u(m,i,j-1,k,c) + 
			
 
				+                          6.0*u(m,i,j,k,c) - 4.0e0*u(m,i,j+1,k,c) + 
			
 
				+                          u(m,i,j+2,k,c) );
			
 
				+                  }
			
 
				+               }
			
 
				+            }
			
 
				+         }
			
 
				+         
			
 
				+         if (end(2,c) > 0) {
			
 
				+            for (k = start(3,c); k <= cell_size(3,c)-end(3,c)-1; k++) {
			
 
				+               j = cell_size(2,c)-3;
			
 
				+               for (i = start(1,c); i <= cell_size(1,c)-end(1,c)-1; i++) {
			
 
				+                  for (m = 1; m <= 5; m++) {
			
 
				+                     rhs(m,i,j,k,c) = rhs(m,i,j,k,c) - dssp *
			
 
				+                          ( u(m,i,j-2,k,c) - 4.0e0*u(m,i,j-1,k,c) + 
			
 
				+                          6.0e0*u(m,i,j,k,c) - 4.0e0*u(m,i,j+1,k,c) );
			
 
				+                  }
			
 
				+               }
			
 
				+
			
 
				+               j = cell_size(2,c)-2;
			
 
				+               for (i = start(1,c); i <= cell_size(1,c)-end(1,c)-1; i++) {
			
 
				+                  for (m = 1; m <= 5; m++) {
			
 
				+                     rhs(m,i,j,k,c) = rhs(m,i,j,k,c) - dssp *
			
 
				+                          ( u(m,i,j-2,k,c) - 4.e0*u(m,i,j-1,k,c) +
			
 
				+                          5.e0*u(m,i,j,k,c) );
			
 
				+                  }
			
 
				+               }
			
 
				+            }
			
 
				+         }
			
 
				+
			
 
				+//---------------------------------------------------------------------
			
 
				+//     compute zeta-direction fluxes 
			
 
				+//---------------------------------------------------------------------
			
 
				+         for (k = start(3,c); k <= cell_size(3,c)-end(3,c)-1; k++) {
			
 
				+            for (j = start(2,c); j <= cell_size(2,c)-end(2,c)-1; j++) {
			
 
				+               for (i = start(1,c); i <= cell_size(1,c)-end(1,c)-1; i++) {
			
 
				+                  wijk = ws(i,j,k,c);
			
 
				+                  wp1  = ws(i,j,k+1,c);
			
 
				+                  wm1  = ws(i,j,k-1,c);
			
 
				+
			
 
				+                  rhs(1,i,j,k,c) = rhs(1,i,j,k,c) + dz1tz1 * 
			
 
				+                       (u(1,i,j,k+1,c) - 2.0e0*u(1,i,j,k,c) + 
			
 
				+                       u(1,i,j,k-1,c)) -
			
 
				+                       tz2 * (u(4,i,j,k+1,c) - u(4,i,j,k-1,c));
			
 
				+                  rhs(2,i,j,k,c) = rhs(2,i,j,k,c) + dz2tz1 * 
			
 
				+                       (u(2,i,j,k+1,c) - 2.0e0*u(2,i,j,k,c) + 
			
 
				+                       u(2,i,j,k-1,c)) +
			
 
				+                       zzcon2 * (us(i,j,k+1,c) - 2.0e0*us(i,j,k,c) + 
			
 
				+                       us(i,j,k-1,c)) -
			
 
				+                       tz2 * (u(2,i,j,k+1,c)*wp1 - 
			
 
				+                       u(2,i,j,k-1,c)*wm1);
			
 
				+                  rhs(3,i,j,k,c) = rhs(3,i,j,k,c) + dz3tz1 * 
			
 
				+                       (u(3,i,j,k+1,c) - 2.0e0*u(3,i,j,k,c) + 
			
 
				+                       u(3,i,j,k-1,c)) +
			
 
				+                       zzcon2 * (vs(i,j,k+1,c) - 2.0e0*vs(i,j,k,c) + 
			
 
				+                       vs(i,j,k-1,c)) -
			
 
				+                       tz2 * (u(3,i,j,k+1,c)*wp1 - 
			
 
				+                       u(3,i,j,k-1,c)*wm1);
			
 
				+                  rhs(4,i,j,k,c) = rhs(4,i,j,k,c) + dz4tz1 * 
			
 
				+                       (u(4,i,j,k+1,c) - 2.0e0*u(4,i,j,k,c) + 
			
 
				+                       u(4,i,j,k-1,c)) +
			
 
				+                       zzcon2*con43 * (wp1 - 2.0e0*wijk + wm1) -
			
 
				+                       tz2 * (u(4,i,j,k+1,c)*wp1 - 
			
 
				+                       u(4,i,j,k-1,c)*wm1 +
			
 
				+                       (u(5,i,j,k+1,c) - square(i,j,k+1,c) - 
			
 
				+                       u(5,i,j,k-1,c) + square(i,j,k-1,c))
			
 
				+                       *c2);
			
 
				+                  rhs(5,i,j,k,c) = rhs(5,i,j,k,c) + dz5tz1 * 
			
 
				+                       (u(5,i,j,k+1,c) - 2.0e0*u(5,i,j,k,c) + 
			
 
				+                       u(5,i,j,k-1,c)) +
			
 
				+                       zzcon3 * (qs(i,j,k+1,c) - 2.0e0*qs(i,j,k,c) + 
			
 
				+                       qs(i,j,k-1,c)) +
			
 
				+                       zzcon4 * (wp1*wp1 - 2.0e0*wijk*wijk + 
			
 
				+                       wm1*wm1) +
			
 
				+                       zzcon5 * (u(5,i,j,k+1,c)*rho_i(i,j,k+1,c) - 
			
 
				+                       2.0e0*u(5,i,j,k,c)*rho_i(i,j,k,c) +
			
 
				+                       u(5,i,j,k-1,c)*rho_i(i,j,k-1,c)) -
			
 
				+                       tz2 * ( (c1*u(5,i,j,k+1,c) - 
			
 
				+                       c2*square(i,j,k+1,c))*wp1 -
			
 
				+                       (c1*u(5,i,j,k-1,c) - 
			
 
				+                       c2*square(i,j,k-1,c))*wm1);
			
 
				+               }
			
 
				+            }
			
 
				+         }
			
 
				+
			
 
				+//---------------------------------------------------------------------
			
 
				+//     add fourth order zeta-direction dissipation                
			
 
				+//---------------------------------------------------------------------
			
 
				+         if (start(3,c) > 0) {
			
 
				+            k = 1;
			
 
				+            for (j = start(2,c); j <= cell_size(2,c)-end(2,c)-1; j++) {
			
 
				+               for (i = start(1,c); i <= cell_size(1,c)-end(1,c)-1; i++) {
			
 
				+                  for (m = 1; m <= 5; m++) {
			
 
				+                     rhs(m,i,j,k,c) = rhs(m,i,j,k,c)- dssp * 
			
 
				+                          ( 5.0e0*u(m,i,j,k,c) - 4.0e0*u(m,i,j,k+1,c) +
			
 
				+                          u(m,i,j,k+2,c));
			
 
				+                  }
			
 
				+               }
			
 
				+            }
			
 
				+
			
 
				+            k = 2;
			
 
				+            for (j = start(2,c); j <= cell_size(2,c)-end(2,c)-1; j++) {
			
 
				+               for (i = start(1,c); i <= cell_size(1,c)-end(1,c)-1; i++) {
			
 
				+                  for (m = 1; m <= 5; m++) {
			
 
				+                     rhs(m,i,j,k,c) = rhs(m,i,j,k,c) - dssp * 
			
 
				+                          (-4.0e0*u(m,i,j,k-1,c) + 6.0e0*u(m,i,j,k,c) -
			
 
				+                          4.0e0*u(m,i,j,k+1,c) + u(m,i,j,k+2,c));
			
 
				+                  }
			
 
				+               }
			
 
				+            }
			
 
				+         }
			
 
				+
			
 
				+         for (k = 3*start(3,c); k <= cell_size(3,c)-3*end(3,c)-1; k++) {
			
 
				+            for (j = start(2,c); j <= cell_size(2,c)-end(2,c)-1; j++) {
			
 
				+               for (i = start(1,c); i <= cell_size(1,c)-end(1,c)-1; i++) {
			
 
				+                  for (m = 1; m <= 5; m++) {
			
 
				+                     rhs(m,i,j,k,c) = rhs(m,i,j,k,c) - dssp * 
			
 
				+                          (  u(m,i,j,k-2,c) - 4.0e0*u(m,i,j,k-1,c) + 
			
 
				+                          6.0*u(m,i,j,k,c) - 4.0e0*u(m,i,j,k+1,c) + 
			
 
				+                          u(m,i,j,k+2,c) );
			
 
				+                  }
			
 
				+               }
			
 
				+            }
			
 
				+         }
			
 
				+         
			
 
				+         if (end(3,c) > 0) {
			
 
				+            k = cell_size(3,c)-3;
			
 
				+            for (j = start(2,c); j <= cell_size(2,c)-end(2,c)-1; j++) {
			
 
				+               for (i = start(1,c); i <= cell_size(1,c)-end(1,c)-1; i++) {
			
 
				+                  for (m = 1; m <= 5; m++) {
			
 
				+                     rhs(m,i,j,k,c) = rhs(m,i,j,k,c) - dssp *
			
 
				+                          ( u(m,i,j,k-2,c) - 4.0e0*u(m,i,j,k-1,c) + 
			
 
				+                          6.0e0*u(m,i,j,k,c) - 4.0e0*u(m,i,j,k+1,c) );
			
 
				+                  }
			
 
				+               }
			
 
				+            }
			
 
				+
			
 
				+            k = cell_size(3,c)-2;
			
 
				+            for (j = start(2,c); j <= cell_size(2,c)-end(2,c)-1; j++) {
			
 
				+               for (i = start(1,c); i <= cell_size(1,c)-end(1,c)-1; i++) {
			
 
				+                  for (m = 1; m <= 5; m++) {
			
 
				+                     rhs(m,i,j,k,c) = rhs(m,i,j,k,c) - dssp *
			
 
				+                          ( u(m,i,j,k-2,c) - 4.e0*u(m,i,j,k-1,c) +
			
 
				+                          5.e0*u(m,i,j,k,c) );
			
 
				+                  }
			
 
				+               }
			
 
				+            }
			
 
				+         }
			
 
				+
			
 
				+         for (k = start(3,c); k <= cell_size(3,c)-end(3,c)-1; k++) {
			
 
				+            for (j = start(2,c); j <= cell_size(2,c)-end(2,c)-1; j++) {
			
 
				+               for (i = start(1,c); i <= cell_size(1,c)-end(1,c)-1; i++) {
			
 
				+                  for (m = 1; m <= 5; m++) {
			
 
				+                     rhs(m,i,j,k,c) = rhs(m,i,j,k,c) * dt;
			
 
				+                  }
			
 
				+               }
			
 
				+            }
			
 
				+         }
			
 
				+
			
 
				+      }
			
 
				+      
			
 
				+      return;
			
 
				+}
			
 
				+
			
 
				+
			
 
				+
			
 
				+
			
--- a/RCCE_V2.0/apps/NPB/BT/.svn/text-base/set_constants.c.svn-base
+++ b/RCCE_V2.0/apps/NPB/BT/.svn/text-base/set_constants.c.svn-base
@@ -0,0 +1,220 @@
 
				+//---------------------------------------------------------------------
			
 
				+// 
			
 
				+// Copyright 2010 Intel Corporation
			
 
				+// 
			
 
				+//    Licensed under the Apache License, Version 2.0 (the "License");
			
 
				+//    you may not use this file except in compliance with the License.
			
 
				+//    You may obtain a copy of the License at
			
 
				+// 
			
 
				+//        http://www.apache.org/licenses/LICENSE-2.0
			
 
				+// 
			
 
				+//    Unless required by applicable law or agreed to in writing, software
			
 
				+//    distributed under the License is distributed on an "AS IS" BASIS,
			
 
				+//    WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
			
 
				+//    See the License for the specific language governing permissions and
			
 
				+//    limitations under the License.
			
 
				+// 
			
 
				+//---------------------------------------------------------------------
			
 
				+#include <math.h>
			
 
				+#include "header.h"
			
 
				+
			
 
				+#define dmax1(x,y) ((x)>(y)? (x):(y))
			
 
				+
			
 
				+void  set_constants() {
			
 
				+
			
 
				+//---------------------------------------------------------------------
			
 
				+//---------------------------------------------------------------------
			
 
				+
			
 
				+      
			
 
				+      ce(1,1)  = 2.0e0;
			
 
				+      ce(1,2)  = 0.0e0;
			
 
				+      ce(1,3)  = 0.0e0;
			
 
				+      ce(1,4)  = 4.0e0;
			
 
				+      ce(1,5)  = 5.0e0;
			
 
				+      ce(1,6)  = 3.0e0;
			
 
				+      ce(1,7)  = 0.5e0;
			
 
				+      ce(1,8)  = 0.02e0;
			
 
				+      ce(1,9)  = 0.01e0;
			
 
				+      ce(1,10) = 0.03e0;
			
 
				+      ce(1,11) = 0.5e0;
			
 
				+      ce(1,12) = 0.4e0;
			
 
				+      ce(1,13) = 0.3e0;
			
 
				+      
			
 
				+      ce(2,1)  = 1.0e0;
			
 
				+      ce(2,2)  = 0.0e0;
			
 
				+      ce(2,3)  = 0.0e0;
			
 
				+      ce(2,4)  = 0.0e0;
			
 
				+      ce(2,5)  = 1.0e0;
			
 
				+      ce(2,6)  = 2.0e0;
			
 
				+      ce(2,7)  = 3.0e0;
			
 
				+      ce(2,8)  = 0.01e0;
			
 
				+      ce(2,9)  = 0.03e0;
			
 
				+      ce(2,10) = 0.02e0;
			
 
				+      ce(2,11) = 0.4e0;
			
 
				+      ce(2,12) = 0.3e0;
			
 
				+      ce(2,13) = 0.5e0;
			
 
				+
			
 
				+      ce(3,1)  = 2.0e0;
			
 
				+      ce(3,2)  = 2.0e0;
			
 
				+      ce(3,3)  = 0.0e0;
			
 
				+      ce(3,4)  = 0.0e0;
			
 
				+      ce(3,5)  = 0.0e0;
			
 
				+      ce(3,6)  = 2.0e0;
			
 
				+      ce(3,7)  = 3.0e0;
			
 
				+      ce(3,8)  = 0.04e0;
			
 
				+      ce(3,9)  = 0.03e0;
			
 
				+      ce(3,10) = 0.05e0;
			
 
				+      ce(3,11) = 0.3e0;
			
 
				+      ce(3,12) = 0.5e0;
			
 
				+      ce(3,13) = 0.4e0;
			
 
				+
			
 
				+      ce(4,1)  = 2.0e0;
			
 
				+      ce(4,2)  = 2.0e0;
			
 
				+      ce(4,3)  = 0.0e0;
			
 
				+      ce(4,4)  = 0.0e0;
			
 
				+      ce(4,5)  = 0.0e0;
			
 
				+      ce(4,6)  = 2.0e0;
			
 
				+      ce(4,7)  = 3.0e0;
			
 
				+      ce(4,8)  = 0.03e0;
			
 
				+      ce(4,9)  = 0.05e0;
			
 
				+      ce(4,10) = 0.04e0;
			
 
				+      ce(4,11) = 0.2e0;
			
 
				+      ce(4,12) = 0.1e0;
			
 
				+      ce(4,13) = 0.3e0;
			
 
				+
			
 
				+      ce(5,1)  = 5.0e0;
			
 
				+      ce(5,2)  = 4.0e0;
			
 
				+      ce(5,3)  = 3.0e0;
			
 
				+      ce(5,4)  = 2.0e0;
			
 
				+      ce(5,5)  = 0.1e0;
			
 
				+      ce(5,6)  = 0.4e0;
			
 
				+      ce(5,7)  = 0.3e0;
			
 
				+      ce(5,8)  = 0.05e0;
			
 
				+      ce(5,9)  = 0.04e0;
			
 
				+      ce(5,10) = 0.03e0;
			
 
				+      ce(5,11) = 0.1e0;
			
 
				+      ce(5,12) = 0.3e0;
			
 
				+      ce(5,13) = 0.2e0;
			
 
				+
			
 
				+      c1 = 1.4e0;
			
 
				+      c2 = 0.4e0;
			
 
				+      c3 = 0.1e0;
			
 
				+      c4 = 1.0e0;
			
 
				+      c5 = 1.4e0;
			
 
				+
			
 
				+      bt = sqrt(0.5e0);
			
 
				+
			
 
				+      dnxm1 = 1.0e0 / (double)(grid_points(1)-1);
			
 
				+      dnym1 = 1.0e0 / (double)(grid_points(2)-1);
			
 
				+      dnzm1 = 1.0e0 / (double)(grid_points(3)-1);
			
 
				+
			
 
				+      c1c2 = c1 * c2;
			
 
				+      c1c5 = c1 * c5;
			
 
				+      c3c4 = c3 * c4;
			
 
				+      c1345 = c1c5 * c3c4;
			
 
				+
			
 
				+      conz1 = (1.0e0-c1c5);
			
 
				+
			
 
				+      tx1 = 1.0e0 / (dnxm1 * dnxm1);
			
 
				+      tx2 = 1.0e0 / (2.0e0 * dnxm1);
			
 
				+      tx3 = 1.0e0 / dnxm1;
			
 
				+
			
 
				+      ty1 = 1.0e0 / (dnym1 * dnym1);
			
 
				+      ty2 = 1.0e0 / (2.0e0 * dnym1);
			
 
				+      ty3 = 1.0e0 / dnym1;
			
 
				+      
			
 
				+      tz1 = 1.0e0 / (dnzm1 * dnzm1);
			
 
				+      tz2 = 1.0e0 / (2.0e0 * dnzm1);
			
 
				+      tz3 = 1.0e0 / dnzm1;
			
 
				+
			
 
				+      dx1 = 0.75e0;
			
 
				+      dx2 = 0.75e0;
			
 
				+      dx3 = 0.75e0;
			
 
				+      dx4 = 0.75e0;
			
 
				+      dx5 = 0.75e0;
			
 
				+
			
 
				+      dy1 = 0.75e0;
			
 
				+      dy2 = 0.75e0;
			
 
				+      dy3 = 0.75e0;
			
 
				+      dy4 = 0.75e0;
			
 
				+      dy5 = 0.75e0;
			
 
				+
			
 
				+      dz1 = 1.0e0;
			
 
				+      dz2 = 1.0e0;
			
 
				+      dz3 = 1.0e0;
			
 
				+      dz4 = 1.0e0;
			
 
				+      dz5 = 1.0e0;
			
 
				+
			
 
				+      dxmax = dmax1(dx3, dx4);
			
 
				+      dymax = dmax1(dy2, dy4);
			
 
				+      dzmax = dmax1(dz2, dz3);
			
 
				+
			
 
				+      dssp = 0.25e0 * dmax1(dx1, dmax1(dy1, dz1) );
			
 
				+
			
 
				+      c4dssp = 4.0e0 * dssp;
			
 
				+      c5dssp = 5.0e0 * dssp;
			
 
				+
			
 
				+      dttx1 = dt*tx1;
			
 
				+      dttx2 = dt*tx2;
			
 
				+      dtty1 = dt*ty1;
			
 
				+      dtty2 = dt*ty2;
			
 
				+      dttz1 = dt*tz1;
			
 
				+      dttz2 = dt*tz2;
			
 
				+
			
 
				+      c2dttx1 = 2.0e0*dttx1;
			
 
				+      c2dtty1 = 2.0e0*dtty1;
			
 
				+      c2dttz1 = 2.0e0*dttz1;
			
 
				+
			
 
				+      dtdssp = dt*dssp;
			
 
				+
			
 
				+      comz1  = dtdssp;
			
 
				+      comz4  = 4.0e0*dtdssp;
			
 
				+      comz5  = 5.0e0*dtdssp;
			
 
				+      comz6  = 6.0e0*dtdssp;
			
 
				+
			
 
				+      c3c4tx3 = c3c4*tx3;
			
 
				+      c3c4ty3 = c3c4*ty3;
			
 
				+      c3c4tz3 = c3c4*tz3;
			
 
				+
			
 
				+      dx1tx1 = dx1*tx1;
			
 
				+      dx2tx1 = dx2*tx1;
			
 
				+      dx3tx1 = dx3*tx1;
			
 
				+      dx4tx1 = dx4*tx1;
			
 
				+      dx5tx1 = dx5*tx1;
			
 
				+      
			
 
				+      dy1ty1 = dy1*ty1;
			
 
				+      dy2ty1 = dy2*ty1;
			
 
				+      dy3ty1 = dy3*ty1;
			
 
				+      dy4ty1 = dy4*ty1;
			
 
				+      dy5ty1 = dy5*ty1;
			
 
				+      
			
 
				+      dz1tz1 = dz1*tz1;
			
 
				+      dz2tz1 = dz2*tz1;
			
 
				+      dz3tz1 = dz3*tz1;
			
 
				+      dz4tz1 = dz4*tz1;
			
 
				+      dz5tz1 = dz5*tz1;
			
 
				+
			
 
				+      c2iv  = 2.5e0;
			
 
				+      con43 = 4.0e0/3.0e0;
			
 
				+      con16 = 1.0e0/6.0e0;
			
 
				+      
			
 
				+      xxcon1 = c3c4tx3*con43*tx3;
			
 
				+      xxcon2 = c3c4tx3*tx3;
			
 
				+      xxcon3 = c3c4tx3*conz1*tx3;
			
 
				+      xxcon4 = c3c4tx3*con16*tx3;
			
 
				+      xxcon5 = c3c4tx3*c1c5*tx3;
			
 
				+
			
 
				+      yycon1 = c3c4ty3*con43*ty3;
			
 
				+      yycon2 = c3c4ty3*ty3;
			
 
				+      yycon3 = c3c4ty3*conz1*ty3;
			
 
				+      yycon4 = c3c4ty3*con16*ty3;
			
 
				+      yycon5 = c3c4ty3*c1c5*ty3;
			
 
				+
			
 
				+      zzcon1 = c3c4tz3*con43*tz3;
			
 
				+      zzcon2 = c3c4tz3*tz3;
			
 
				+      zzcon3 = c3c4tz3*conz1*tz3;
			
 
				+      zzcon4 = c3c4tz3*con16*tz3;
			
 
				+      zzcon5 = c3c4tz3*c1c5*tz3;
			
 
				+
			
 
				+      return;
			
 
				+}
			
--- a/RCCE_V2.0/apps/NPB/BT/.svn/text-base/setup_mpi.c.svn-base
+++ b/RCCE_V2.0/apps/NPB/BT/.svn/text-base/setup_mpi.c.svn-base
@@ -0,0 +1,60 @@
 
				+
			
 
				+//---------------------------------------------------------------------
			
 
				+// 
			
 
				+// Copyright 2010 Intel Corporation
			
 
				+// 
			
 
				+//    Licensed under the Apache License, Version 2.0 (the "License");
			
 
				+//    you may not use this file except in compliance with the License.
			
 
				+//    You may obtain a copy of the License at
			
 
				+// 
			
 
				+//        http://www.apache.org/licenses/LICENSE-2.0
			
 
				+// 
			
 
				+//    Unless required by applicable law or agreed to in writing, software
			
 
				+//    distributed under the License is distributed on an "AS IS" BASIS,
			
 
				+//    WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
			
 
				+//    See the License for the specific language governing permissions and
			
 
				+//    limitations under the License.
			
 
				+// 
			
 
				+//---------------------------------------------------------------------
			
 
				+#include <math.h>
			
 
				+#include "mpinpb.h"
			
 
				+#include "npbparams.h"
			
 
				+#include "RCCE.h"
			
 
				+
			
 
				+int setup_mpi(int *argc, char **argv[]) {
			
 
				+
			
 
				+//---------------------------------------------------------------------
			
 
				+//---------------------------------------------------------------------
			
 
				+
			
 
				+//---------------------------------------------------------------------
			
 
				+// set up MPI stuff
			
 
				+//---------------------------------------------------------------------
			
 
				+      int error, color, nc;
			
 
				+
			
 
				+      if (error = RCCE_init(argc, argv)) return(error);
			
 
				+
			
 
				+      total_nodes = RCCE_num_ues();
			
 
				+      node = RCCE_ue();      
			
 
				+
			
 
				+//---------------------------------------------------------------------
			
 
				+//     compute square root; add small number to allow for roundoff
			
 
				+//---------------------------------------------------------------------
			
 
				+      nc = (int)(sqrt((double)(total_nodes) + 0.00001e0));
			
 
				+
			
 
				+//---------------------------------------------------------------------
			
 
				+// We handle a non-square number of nodes by making the excess nodes
			
 
				+// inactive. However, we can never handle more cells than were compiled
			
 
				+// in. 
			
 
				+//---------------------------------------------------------------------
			
 
				+
			
 
				+      if (nc > MAXCELLS) nc = MAXCELLS;
			
 
				+      no_nodes = nc*nc;      
			
 
				+      
			
 
				+//---------------------------------------------------------------------
			
 
				+//     let node 0 be the root for the group (there is only one)
			
 
				+//---------------------------------------------------------------------
			
 
				+      root = 0;
			
 
				+
			
 
				+      return(0);
			
 
				+}
			
 
				+
			
--- a/RCCE_V2.0/apps/NPB/BT/.svn/text-base/solve_subs.c.svn-base
+++ b/RCCE_V2.0/apps/NPB/BT/.svn/text-base/solve_subs.c.svn-base
@@ -0,0 +1,647 @@
 
				+
			
 
				+//---------------------------------------------------------------------
			
 
				+// 
			
 
				+// Copyright 2010 Intel Corporation
			
 
				+// 
			
 
				+//    Licensed under the Apache License, Version 2.0 (the "License");
			
 
				+//    you may not use this file except in compliance with the License.
			
 
				+//    You may obtain a copy of the License at
			
 
				+// 
			
 
				+//        http://www.apache.org/licenses/LICENSE-2.0
			
 
				+// 
			
 
				+//    Unless required by applicable law or agreed to in writing, software
			
 
				+//    distributed under the License is distributed on an "AS IS" BASIS,
			
 
				+//    WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
			
 
				+//    See the License for the specific language governing permissions and
			
 
				+//    limitations under the License.
			
 
				+// 
			
 
				+//---------------------------------------------------------------------
			
 
				+#define ablock(m,n) ablock[(m-1)+5*(n-1)]
			
 
				+#define bblock(m,n) bblock[(m-1)+5*(n-1)]
			
 
				+#define cblock(m,n) cblock[(m-1)+5*(n-1)]
			
 
				+#define avec(m) avec[m-1]
			
 
				+#define bvec(m) bvec[m-1]
			
 
				+#define lhs(m,n) lhs[(m-1)+5*(n-1)]
			
 
				+#define c(m,n) c[(m-1)+5*(n-1)]
			
 
				+#define r(m) r[m-1]
			
 
				+
			
 
				+void matvec_sub(double ablock[],double avec[],double bvec[]) {
			
 
				+
			
 
				+//---------------------------------------------------------------------
			
 
				+//---------------------------------------------------------------------
			
 
				+
			
 
				+//---------------------------------------------------------------------
			
 
				+//     subtracts bvec=bvec - ablock*avec
			
 
				+//---------------------------------------------------------------------
			
 
				+
			
 
				+//---------------------------------------------------------------------
			
 
				+//            rhs(i,ic,jc,kc,ccell) = rhs(i,ic,jc,kc,ccell) 
			
 
				+//     $           - lhs(i,1,ablock,ia,ja,ka,acell)*
			
 
				+//---------------------------------------------------------------------
			
 
				+         bvec(1) = bvec(1) - ablock(1,1)*avec(1)
			
 
				+                           - ablock(1,2)*avec(2)
			
 
				+                           - ablock(1,3)*avec(3)
			
 
				+                           - ablock(1,4)*avec(4)
			
 
				+                           - ablock(1,5)*avec(5);
			
 
				+         bvec(2) = bvec(2) - ablock(2,1)*avec(1)
			
 
				+                           - ablock(2,2)*avec(2)
			
 
				+                           - ablock(2,3)*avec(3)
			
 
				+                           - ablock(2,4)*avec(4)
			
 
				+                           - ablock(2,5)*avec(5);
			
 
				+         bvec(3) = bvec(3) - ablock(3,1)*avec(1)
			
 
				+                           - ablock(3,2)*avec(2)
			
 
				+                           - ablock(3,3)*avec(3)
			
 
				+                           - ablock(3,4)*avec(4)
			
 
				+                           - ablock(3,5)*avec(5);
			
 
				+         bvec(4) = bvec(4) - ablock(4,1)*avec(1)
			
 
				+                           - ablock(4,2)*avec(2)
			
 
				+                           - ablock(4,3)*avec(3)
			
 
				+                           - ablock(4,4)*avec(4)
			
 
				+                           - ablock(4,5)*avec(5);
			
 
				+         bvec(5) = bvec(5) - ablock(5,1)*avec(1)
			
 
				+                           - ablock(5,2)*avec(2)
			
 
				+                           - ablock(5,3)*avec(3)
			
 
				+                           - ablock(5,4)*avec(4)
			
 
				+                           - ablock(5,5)*avec(5);
			
 
				+
			
 
				+
			
 
				+      return;
			
 
				+}
			
 
				+
			
 
				+//---------------------------------------------------------------------
			
 
				+//---------------------------------------------------------------------
			
 
				+
			
 
				+void matmul_sub(double ablock[], double bblock[], double cblock[]) {
			
 
				+
			
 
				+//---------------------------------------------------------------------
			
 
				+//---------------------------------------------------------------------
			
 
				+
			
 
				+//---------------------------------------------------------------------
			
 
				+//     subtracts a(i,j,k) X b(i,j,k) from c(i,j,k)
			
 
				+//---------------------------------------------------------------------
			
 
				+
			
 
				+
			
 
				+         cblock(1,1) = cblock(1,1) - ablock(1,1)*bblock(1,1)
			
 
				+                                   - ablock(1,2)*bblock(2,1)
			
 
				+                                   - ablock(1,3)*bblock(3,1)
			
 
				+                                   - ablock(1,4)*bblock(4,1)
			
 
				+                                   - ablock(1,5)*bblock(5,1);
			
 
				+         cblock(2,1) = cblock(2,1) - ablock(2,1)*bblock(1,1)
			
 
				+                                   - ablock(2,2)*bblock(2,1)
			
 
				+                                   - ablock(2,3)*bblock(3,1)
			
 
				+                                   - ablock(2,4)*bblock(4,1)
			
 
				+                                   - ablock(2,5)*bblock(5,1);
			
 
				+         cblock(3,1) = cblock(3,1) - ablock(3,1)*bblock(1,1)
			
 
				+                                   - ablock(3,2)*bblock(2,1)
			
 
				+                                   - ablock(3,3)*bblock(3,1)
			
 
				+                                   - ablock(3,4)*bblock(4,1)
			
 
				+                                   - ablock(3,5)*bblock(5,1);
			
 
				+         cblock(4,1) = cblock(4,1) - ablock(4,1)*bblock(1,1)
			
 
				+                                   - ablock(4,2)*bblock(2,1)
			
 
				+                                   - ablock(4,3)*bblock(3,1)
			
 
				+                                   - ablock(4,4)*bblock(4,1)
			
 
				+                                   - ablock(4,5)*bblock(5,1);
			
 
				+         cblock(5,1) = cblock(5,1) - ablock(5,1)*bblock(1,1)
			
 
				+                                   - ablock(5,2)*bblock(2,1)
			
 
				+                                   - ablock(5,3)*bblock(3,1)
			
 
				+                                   - ablock(5,4)*bblock(4,1)
			
 
				+                                   - ablock(5,5)*bblock(5,1);
			
 
				+         cblock(1,2) = cblock(1,2) - ablock(1,1)*bblock(1,2)
			
 
				+                                   - ablock(1,2)*bblock(2,2)
			
 
				+                                   - ablock(1,3)*bblock(3,2)
			
 
				+                                   - ablock(1,4)*bblock(4,2)
			
 
				+                                   - ablock(1,5)*bblock(5,2);
			
 
				+         cblock(2,2) = cblock(2,2) - ablock(2,1)*bblock(1,2)
			
 
				+                                   - ablock(2,2)*bblock(2,2)
			
 
				+                                   - ablock(2,3)*bblock(3,2)
			
 
				+                                   - ablock(2,4)*bblock(4,2)
			
 
				+                                   - ablock(2,5)*bblock(5,2);
			
 
				+         cblock(3,2) = cblock(3,2) - ablock(3,1)*bblock(1,2)
			
 
				+                                   - ablock(3,2)*bblock(2,2)
			
 
				+                                   - ablock(3,3)*bblock(3,2)
			
 
				+                                   - ablock(3,4)*bblock(4,2)
			
 
				+                                   - ablock(3,5)*bblock(5,2);
			
 
				+         cblock(4,2) = cblock(4,2) - ablock(4,1)*bblock(1,2)
			
 
				+                                   - ablock(4,2)*bblock(2,2)
			
 
				+                                   - ablock(4,3)*bblock(3,2)
			
 
				+                                   - ablock(4,4)*bblock(4,2)
			
 
				+                                   - ablock(4,5)*bblock(5,2);
			
 
				+         cblock(5,2) = cblock(5,2) - ablock(5,1)*bblock(1,2)
			
 
				+                                   - ablock(5,2)*bblock(2,2)
			
 
				+                                   - ablock(5,3)*bblock(3,2)
			
 
				+                                   - ablock(5,4)*bblock(4,2)
			
 
				+                                   - ablock(5,5)*bblock(5,2);
			
 
				+         cblock(1,3) = cblock(1,3) - ablock(1,1)*bblock(1,3)
			
 
				+                                   - ablock(1,2)*bblock(2,3)
			
 
				+                                   - ablock(1,3)*bblock(3,3)
			
 
				+                                   - ablock(1,4)*bblock(4,3)
			
 
				+                                   - ablock(1,5)*bblock(5,3);
			
 
				+         cblock(2,3) = cblock(2,3) - ablock(2,1)*bblock(1,3)
			
 
				+                                   - ablock(2,2)*bblock(2,3)
			
 
				+                                   - ablock(2,3)*bblock(3,3)
			
 
				+                                   - ablock(2,4)*bblock(4,3)
			
 
				+                                   - ablock(2,5)*bblock(5,3);
			
 
				+         cblock(3,3) = cblock(3,3) - ablock(3,1)*bblock(1,3)
			
 
				+                                   - ablock(3,2)*bblock(2,3)
			
 
				+                                   - ablock(3,3)*bblock(3,3)
			
 
				+                                   - ablock(3,4)*bblock(4,3)
			
 
				+                                   - ablock(3,5)*bblock(5,3);
			
 
				+         cblock(4,3) = cblock(4,3) - ablock(4,1)*bblock(1,3)
			
 
				+                                   - ablock(4,2)*bblock(2,3)
			
 
				+                                   - ablock(4,3)*bblock(3,3)
			
 
				+                                   - ablock(4,4)*bblock(4,3)
			
 
				+                                   - ablock(4,5)*bblock(5,3);
			
 
				+         cblock(5,3) = cblock(5,3) - ablock(5,1)*bblock(1,3)
			
 
				+                                   - ablock(5,2)*bblock(2,3)
			
 
				+                                   - ablock(5,3)*bblock(3,3)
			
 
				+                                   - ablock(5,4)*bblock(4,3)
			
 
				+                                   - ablock(5,5)*bblock(5,3);
			
 
				+         cblock(1,4) = cblock(1,4) - ablock(1,1)*bblock(1,4)
			
 
				+                                   - ablock(1,2)*bblock(2,4)
			
 
				+                                   - ablock(1,3)*bblock(3,4)
			
 
				+                                   - ablock(1,4)*bblock(4,4)
			
 
				+                                   - ablock(1,5)*bblock(5,4);
			
 
				+         cblock(2,4) = cblock(2,4) - ablock(2,1)*bblock(1,4)
			
 
				+                                   - ablock(2,2)*bblock(2,4)
			
 
				+                                   - ablock(2,3)*bblock(3,4)
			
 
				+                                   - ablock(2,4)*bblock(4,4)
			
 
				+                                   - ablock(2,5)*bblock(5,4);
			
 
				+         cblock(3,4) = cblock(3,4) - ablock(3,1)*bblock(1,4)
			
 
				+                                   - ablock(3,2)*bblock(2,4)
			
 
				+                                   - ablock(3,3)*bblock(3,4)
			
 
				+                                   - ablock(3,4)*bblock(4,4)
			
 
				+                                   - ablock(3,5)*bblock(5,4);
			
 
				+         cblock(4,4) = cblock(4,4) - ablock(4,1)*bblock(1,4)
			
 
				+                                   - ablock(4,2)*bblock(2,4)
			
 
				+                                   - ablock(4,3)*bblock(3,4)
			
 
				+                                   - ablock(4,4)*bblock(4,4)
			
 
				+                                   - ablock(4,5)*bblock(5,4);
			
 
				+         cblock(5,4) = cblock(5,4) - ablock(5,1)*bblock(1,4)
			
 
				+                                   - ablock(5,2)*bblock(2,4)
			
 
				+                                   - ablock(5,3)*bblock(3,4)
			
 
				+                                   - ablock(5,4)*bblock(4,4)
			
 
				+                                   - ablock(5,5)*bblock(5,4);
			
 
				+         cblock(1,5) = cblock(1,5) - ablock(1,1)*bblock(1,5)
			
 
				+                                   - ablock(1,2)*bblock(2,5)
			
 
				+                                   - ablock(1,3)*bblock(3,5)
			
 
				+                                   - ablock(1,4)*bblock(4,5)
			
 
				+                                   - ablock(1,5)*bblock(5,5);
			
 
				+         cblock(2,5) = cblock(2,5) - ablock(2,1)*bblock(1,5)
			
 
				+                                   - ablock(2,2)*bblock(2,5)
			
 
				+                                   - ablock(2,3)*bblock(3,5)
			
 
				+                                   - ablock(2,4)*bblock(4,5)
			
 
				+                                   - ablock(2,5)*bblock(5,5);
			
 
				+         cblock(3,5) = cblock(3,5) - ablock(3,1)*bblock(1,5)
			
 
				+                                   - ablock(3,2)*bblock(2,5)
			
 
				+                                   - ablock(3,3)*bblock(3,5)
			
 
				+                                   - ablock(3,4)*bblock(4,5)
			
 
				+                                   - ablock(3,5)*bblock(5,5);
			
 
				+         cblock(4,5) = cblock(4,5) - ablock(4,1)*bblock(1,5)
			
 
				+                                   - ablock(4,2)*bblock(2,5)
			
 
				+                                   - ablock(4,3)*bblock(3,5)
			
 
				+                                   - ablock(4,4)*bblock(4,5)
			
 
				+                                   - ablock(4,5)*bblock(5,5);
			
 
				+         cblock(5,5) = cblock(5,5) - ablock(5,1)*bblock(1,5)
			
 
				+                                   - ablock(5,2)*bblock(2,5)
			
 
				+                                   - ablock(5,3)*bblock(3,5)
			
 
				+                                   - ablock(5,4)*bblock(4,5)
			
 
				+                                   - ablock(5,5)*bblock(5,5);
			
 
				+
			
 
				+              
			
 
				+      return;
			
 
				+}
			
 
				+
			
 
				+
			
 
				+
			
 
				+//---------------------------------------------------------------------
			
 
				+//---------------------------------------------------------------------
			
 
				+
			
 
				+void binvcrhs( double lhs[],double c[],double r[] ) {
			
 
				+
			
 
				+//---------------------------------------------------------------------
			
 
				+//---------------------------------------------------------------------
			
 
				+
			
 
				+//---------------------------------------------------------------------
			
 
				+//     
			
 
				+//---------------------------------------------------------------------
			
 
				+
			
 
				+      double pivot, coeff;
			
 
				+
			
 
				+//---------------------------------------------------------------------
			
 
				+//     
			
 
				+//---------------------------------------------------------------------
			
 
				+
			
 
				+      pivot = 1.00e0/lhs(1,1);
			
 
				+      lhs(1,2) = lhs(1,2)*pivot;
			
 
				+      lhs(1,3) = lhs(1,3)*pivot;
			
 
				+      lhs(1,4) = lhs(1,4)*pivot;
			
 
				+      lhs(1,5) = lhs(1,5)*pivot;
			
 
				+      c(1,1) = c(1,1)*pivot;
			
 
				+      c(1,2) = c(1,2)*pivot;
			
 
				+      c(1,3) = c(1,3)*pivot;
			
 
				+      c(1,4) = c(1,4)*pivot;
			
 
				+      c(1,5) = c(1,5)*pivot;
			
 
				+      r(1)   = r(1)  *pivot;
			
 
				+
			
 
				+      coeff = lhs(2,1);
			
 
				+      lhs(2,2)= lhs(2,2) - coeff*lhs(1,2);
			
 
				+      lhs(2,3)= lhs(2,3) - coeff*lhs(1,3);
			
 
				+      lhs(2,4)= lhs(2,4) - coeff*lhs(1,4);
			
 
				+      lhs(2,5)= lhs(2,5) - coeff*lhs(1,5);
			
 
				+      c(2,1) = c(2,1) - coeff*c(1,1);
			
 
				+      c(2,2) = c(2,2) - coeff*c(1,2);
			
 
				+      c(2,3) = c(2,3) - coeff*c(1,3);
			
 
				+      c(2,4) = c(2,4) - coeff*c(1,4);
			
 
				+      c(2,5) = c(2,5) - coeff*c(1,5);
			
 
				+      r(2)   = r(2)   - coeff*r(1);
			
 
				+
			
 
				+      coeff = lhs(3,1);
			
 
				+      lhs(3,2)= lhs(3,2) - coeff*lhs(1,2);
			
 
				+      lhs(3,3)= lhs(3,3) - coeff*lhs(1,3);
			
 
				+      lhs(3,4)= lhs(3,4) - coeff*lhs(1,4);
			
 
				+      lhs(3,5)= lhs(3,5) - coeff*lhs(1,5);
			
 
				+      c(3,1) = c(3,1) - coeff*c(1,1);
			
 
				+      c(3,2) = c(3,2) - coeff*c(1,2);
			
 
				+      c(3,3) = c(3,3) - coeff*c(1,3);
			
 
				+      c(3,4) = c(3,4) - coeff*c(1,4);
			
 
				+      c(3,5) = c(3,5) - coeff*c(1,5);
			
 
				+      r(3)   = r(3)   - coeff*r(1);
			
 
				+
			
 
				+      coeff = lhs(4,1);
			
 
				+      lhs(4,2)= lhs(4,2) - coeff*lhs(1,2);
			
 
				+      lhs(4,3)= lhs(4,3) - coeff*lhs(1,3);
			
 
				+      lhs(4,4)= lhs(4,4) - coeff*lhs(1,4);
			
 
				+      lhs(4,5)= lhs(4,5) - coeff*lhs(1,5);
			
 
				+      c(4,1) = c(4,1) - coeff*c(1,1);
			
 
				+      c(4,2) = c(4,2) - coeff*c(1,2);
			
 
				+      c(4,3) = c(4,3) - coeff*c(1,3);
			
 
				+      c(4,4) = c(4,4) - coeff*c(1,4);
			
 
				+      c(4,5) = c(4,5) - coeff*c(1,5);
			
 
				+      r(4)   = r(4)   - coeff*r(1);
			
 
				+
			
 
				+      coeff = lhs(5,1);
			
 
				+      lhs(5,2)= lhs(5,2) - coeff*lhs(1,2);
			
 
				+      lhs(5,3)= lhs(5,3) - coeff*lhs(1,3);
			
 
				+      lhs(5,4)= lhs(5,4) - coeff*lhs(1,4);
			
 
				+      lhs(5,5)= lhs(5,5) - coeff*lhs(1,5);
			
 
				+      c(5,1) = c(5,1) - coeff*c(1,1);
			
 
				+      c(5,2) = c(5,2) - coeff*c(1,2);
			
 
				+      c(5,3) = c(5,3) - coeff*c(1,3);
			
 
				+      c(5,4) = c(5,4) - coeff*c(1,4);
			
 
				+      c(5,5) = c(5,5) - coeff*c(1,5);
			
 
				+      r(5)   = r(5)   - coeff*r(1);
			
 
				+
			
 
				+
			
 
				+      pivot = 1.00e0/lhs(2,2);
			
 
				+      lhs(2,3) = lhs(2,3)*pivot;
			
 
				+      lhs(2,4) = lhs(2,4)*pivot;
			
 
				+      lhs(2,5) = lhs(2,5)*pivot;
			
 
				+      c(2,1) = c(2,1)*pivot;
			
 
				+      c(2,2) = c(2,2)*pivot;
			
 
				+      c(2,3) = c(2,3)*pivot;
			
 
				+      c(2,4) = c(2,4)*pivot;
			
 
				+      c(2,5) = c(2,5)*pivot;
			
 
				+      r(2)   = r(2)  *pivot;
			
 
				+
			
 
				+      coeff = lhs(1,2);
			
 
				+      lhs(1,3)= lhs(1,3) - coeff*lhs(2,3);
			
 
				+      lhs(1,4)= lhs(1,4) - coeff*lhs(2,4);
			
 
				+      lhs(1,5)= lhs(1,5) - coeff*lhs(2,5);
			
 
				+      c(1,1) = c(1,1) - coeff*c(2,1);
			
 
				+      c(1,2) = c(1,2) - coeff*c(2,2);
			
 
				+      c(1,3) = c(1,3) - coeff*c(2,3);
			
 
				+      c(1,4) = c(1,4) - coeff*c(2,4);
			
 
				+      c(1,5) = c(1,5) - coeff*c(2,5);
			
 
				+      r(1)   = r(1)   - coeff*r(2);
			
 
				+
			
 
				+      coeff = lhs(3,2);
			
 
				+      lhs(3,3)= lhs(3,3) - coeff*lhs(2,3);
			
 
				+      lhs(3,4)= lhs(3,4) - coeff*lhs(2,4);
			
 
				+      lhs(3,5)= lhs(3,5) - coeff*lhs(2,5);
			
 
				+      c(3,1) = c(3,1) - coeff*c(2,1);
			
 
				+      c(3,2) = c(3,2) - coeff*c(2,2);
			
 
				+      c(3,3) = c(3,3) - coeff*c(2,3);
			
 
				+      c(3,4) = c(3,4) - coeff*c(2,4);
			
 
				+      c(3,5) = c(3,5) - coeff*c(2,5);
			
 
				+      r(3)   = r(3)   - coeff*r(2);
			
 
				+
			
 
				+      coeff = lhs(4,2);
			
 
				+      lhs(4,3)= lhs(4,3) - coeff*lhs(2,3);
			
 
				+      lhs(4,4)= lhs(4,4) - coeff*lhs(2,4);
			
 
				+      lhs(4,5)= lhs(4,5) - coeff*lhs(2,5);
			
 
				+      c(4,1) = c(4,1) - coeff*c(2,1);
			
 
				+      c(4,2) = c(4,2) - coeff*c(2,2);
			
 
				+      c(4,3) = c(4,3) - coeff*c(2,3);
			
 
				+      c(4,4) = c(4,4) - coeff*c(2,4);
			
 
				+      c(4,5) = c(4,5) - coeff*c(2,5);
			
 
				+      r(4)   = r(4)   - coeff*r(2);
			
 
				+
			
 
				+      coeff = lhs(5,2);
			
 
				+      lhs(5,3)= lhs(5,3) - coeff*lhs(2,3);
			
 
				+      lhs(5,4)= lhs(5,4) - coeff*lhs(2,4);
			
 
				+      lhs(5,5)= lhs(5,5) - coeff*lhs(2,5);
			
 
				+      c(5,1) = c(5,1) - coeff*c(2,1);
			
 
				+      c(5,2) = c(5,2) - coeff*c(2,2);
			
 
				+      c(5,3) = c(5,3) - coeff*c(2,3);
			
 
				+      c(5,4) = c(5,4) - coeff*c(2,4);
			
 
				+      c(5,5) = c(5,5) - coeff*c(2,5);
			
 
				+      r(5)   = r(5)   - coeff*r(2);
			
 
				+
			
 
				+
			
 
				+      pivot = 1.00e0/lhs(3,3);
			
 
				+      lhs(3,4) = lhs(3,4)*pivot;
			
 
				+      lhs(3,5) = lhs(3,5)*pivot;
			
 
				+      c(3,1) = c(3,1)*pivot;
			
 
				+      c(3,2) = c(3,2)*pivot;
			
 
				+      c(3,3) = c(3,3)*pivot;
			
 
				+      c(3,4) = c(3,4)*pivot;
			
 
				+      c(3,5) = c(3,5)*pivot;
			
 
				+      r(3)   = r(3)  *pivot;
			
 
				+
			
 
				+      coeff = lhs(1,3);
			
 
				+      lhs(1,4)= lhs(1,4) - coeff*lhs(3,4);
			
 
				+      lhs(1,5)= lhs(1,5) - coeff*lhs(3,5);
			
 
				+      c(1,1) = c(1,1) - coeff*c(3,1);
			
 
				+      c(1,2) = c(1,2) - coeff*c(3,2);
			
 
				+      c(1,3) = c(1,3) - coeff*c(3,3);
			
 
				+      c(1,4) = c(1,4) - coeff*c(3,4);
			
 
				+      c(1,5) = c(1,5) - coeff*c(3,5);
			
 
				+      r(1)   = r(1)   - coeff*r(3);
			
 
				+
			
 
				+      coeff = lhs(2,3);
			
 
				+      lhs(2,4)= lhs(2,4) - coeff*lhs(3,4);
			
 
				+      lhs(2,5)= lhs(2,5) - coeff*lhs(3,5);
			
 
				+      c(2,1) = c(2,1) - coeff*c(3,1);
			
 
				+      c(2,2) = c(2,2) - coeff*c(3,2);
			
 
				+      c(2,3) = c(2,3) - coeff*c(3,3);
			
 
				+      c(2,4) = c(2,4) - coeff*c(3,4);
			
 
				+      c(2,5) = c(2,5) - coeff*c(3,5);
			
 
				+      r(2)   = r(2)   - coeff*r(3);
			
 
				+
			
 
				+      coeff = lhs(4,3);
			
 
				+      lhs(4,4)= lhs(4,4) - coeff*lhs(3,4);
			
 
				+      lhs(4,5)= lhs(4,5) - coeff*lhs(3,5);
			
 
				+      c(4,1) = c(4,1) - coeff*c(3,1);
			
 
				+      c(4,2) = c(4,2) - coeff*c(3,2);
			
 
				+      c(4,3) = c(4,3) - coeff*c(3,3);
			
 
				+      c(4,4) = c(4,4) - coeff*c(3,4);
			
 
				+      c(4,5) = c(4,5) - coeff*c(3,5);
			
 
				+      r(4)   = r(4)   - coeff*r(3);
			
 
				+
			
 
				+      coeff = lhs(5,3);
			
 
				+      lhs(5,4)= lhs(5,4) - coeff*lhs(3,4);
			
 
				+      lhs(5,5)= lhs(5,5) - coeff*lhs(3,5);
			
 
				+      c(5,1) = c(5,1) - coeff*c(3,1);
			
 
				+      c(5,2) = c(5,2) - coeff*c(3,2);
			
 
				+      c(5,3) = c(5,3) - coeff*c(3,3);
			
 
				+      c(5,4) = c(5,4) - coeff*c(3,4);
			
 
				+      c(5,5) = c(5,5) - coeff*c(3,5);
			
 
				+      r(5)   = r(5)   - coeff*r(3);
			
 
				+
			
 
				+
			
 
				+      pivot = 1.00e0/lhs(4,4);
			
 
				+      lhs(4,5) = lhs(4,5)*pivot;
			
 
				+      c(4,1) = c(4,1)*pivot;
			
 
				+      c(4,2) = c(4,2)*pivot;
			
 
				+      c(4,3) = c(4,3)*pivot;
			
 
				+      c(4,4) = c(4,4)*pivot;
			
 
				+      c(4,5) = c(4,5)*pivot;
			
 
				+      r(4)   = r(4)  *pivot;
			
 
				+
			
 
				+      coeff = lhs(1,4);
			
 
				+      lhs(1,5)= lhs(1,5) - coeff*lhs(4,5);
			
 
				+      c(1,1) = c(1,1) - coeff*c(4,1);
			
 
				+      c(1,2) = c(1,2) - coeff*c(4,2);
			
 
				+      c(1,3) = c(1,3) - coeff*c(4,3);
			
 
				+      c(1,4) = c(1,4) - coeff*c(4,4);
			
 
				+      c(1,5) = c(1,5) - coeff*c(4,5);
			
 
				+      r(1)   = r(1)   - coeff*r(4);
			
 
				+
			
 
				+      coeff = lhs(2,4);
			
 
				+      lhs(2,5)= lhs(2,5) - coeff*lhs(4,5);
			
 
				+      c(2,1) = c(2,1) - coeff*c(4,1);
			
 
				+      c(2,2) = c(2,2) - coeff*c(4,2);
			
 
				+      c(2,3) = c(2,3) - coeff*c(4,3);
			
 
				+      c(2,4) = c(2,4) - coeff*c(4,4);
			
 
				+      c(2,5) = c(2,5) - coeff*c(4,5);
			
 
				+      r(2)   = r(2)   - coeff*r(4);
			
 
				+
			
 
				+      coeff = lhs(3,4);
			
 
				+      lhs(3,5)= lhs(3,5) - coeff*lhs(4,5);
			
 
				+      c(3,1) = c(3,1) - coeff*c(4,1);
			
 
				+      c(3,2) = c(3,2) - coeff*c(4,2);
			
 
				+      c(3,3) = c(3,3) - coeff*c(4,3);
			
 
				+      c(3,4) = c(3,4) - coeff*c(4,4);
			
 
				+      c(3,5) = c(3,5) - coeff*c(4,5);
			
 
				+      r(3)   = r(3)   - coeff*r(4);
			
 
				+
			
 
				+      coeff = lhs(5,4);
			
 
				+      lhs(5,5)= lhs(5,5) - coeff*lhs(4,5);
			
 
				+      c(5,1) = c(5,1) - coeff*c(4,1);
			
 
				+      c(5,2) = c(5,2) - coeff*c(4,2);
			
 
				+      c(5,3) = c(5,3) - coeff*c(4,3);
			
 
				+      c(5,4) = c(5,4) - coeff*c(4,4);
			
 
				+      c(5,5) = c(5,5) - coeff*c(4,5);
			
 
				+      r(5)   = r(5)   - coeff*r(4);
			
 
				+
			
 
				+
			
 
				+      pivot = 1.00e0/lhs(5,5);
			
 
				+      c(5,1) = c(5,1)*pivot;
			
 
				+      c(5,2) = c(5,2)*pivot;
			
 
				+      c(5,3) = c(5,3)*pivot;
			
 
				+      c(5,4) = c(5,4)*pivot;
			
 
				+      c(5,5) = c(5,5)*pivot;
			
 
				+      r(5)   = r(5)  *pivot;
			
 
				+
			
 
				+      coeff = lhs(1,5);
			
 
				+      c(1,1) = c(1,1) - coeff*c(5,1);
			
 
				+      c(1,2) = c(1,2) - coeff*c(5,2);
			
 
				+      c(1,3) = c(1,3) - coeff*c(5,3);
			
 
				+      c(1,4) = c(1,4) - coeff*c(5,4);
			
 
				+      c(1,5) = c(1,5) - coeff*c(5,5);
			
 
				+      r(1)   = r(1)   - coeff*r(5);
			
 
				+
			
 
				+      coeff = lhs(2,5);
			
 
				+      c(2,1) = c(2,1) - coeff*c(5,1);
			
 
				+      c(2,2) = c(2,2) - coeff*c(5,2);
			
 
				+      c(2,3) = c(2,3) - coeff*c(5,3);
			
 
				+      c(2,4) = c(2,4) - coeff*c(5,4);
			
 
				+      c(2,5) = c(2,5) - coeff*c(5,5);
			
 
				+      r(2)   = r(2)   - coeff*r(5);
			
 
				+
			
 
				+      coeff = lhs(3,5);
			
 
				+      c(3,1) = c(3,1) - coeff*c(5,1);
			
 
				+      c(3,2) = c(3,2) - coeff*c(5,2);
			
 
				+      c(3,3) = c(3,3) - coeff*c(5,3);
			
 
				+      c(3,4) = c(3,4) - coeff*c(5,4);
			
 
				+      c(3,5) = c(3,5) - coeff*c(5,5);
			
 
				+      r(3)   = r(3)   - coeff*r(5);
			
 
				+
			
 
				+      coeff = lhs(4,5);
			
 
				+      c(4,1) = c(4,1) - coeff*c(5,1);
			
 
				+      c(4,2) = c(4,2) - coeff*c(5,2);
			
 
				+      c(4,3) = c(4,3) - coeff*c(5,3);
			
 
				+      c(4,4) = c(4,4) - coeff*c(5,4);
			
 
				+      c(4,5) = c(4,5) - coeff*c(5,5);
			
 
				+      r(4)   = r(4)   - coeff*r(5);
			
 
				+
			
 
				+
			
 
				+      return;
			
 
				+}
			
 
				+
			
 
				+
			
 
				+
			
 
				+//---------------------------------------------------------------------
			
 
				+//---------------------------------------------------------------------
			
 
				+
			
 
				+void binvrhs( double lhs[],double r[] ) {
			
 
				+
			
 
				+//---------------------------------------------------------------------
			
 
				+//---------------------------------------------------------------------
			
 
				+
			
 
				+//---------------------------------------------------------------------
			
 
				+//     
			
 
				+//---------------------------------------------------------------------
			
 
				+
			
 
				+      double pivot, coeff;
			
 
				+
			
 
				+//---------------------------------------------------------------------
			
 
				+//     
			
 
				+//---------------------------------------------------------------------
			
 
				+
			
 
				+
			
 
				+      pivot = 1.00e0/lhs(1,1);
			
 
				+      lhs(1,2) = lhs(1,2)*pivot;
			
 
				+      lhs(1,3) = lhs(1,3)*pivot;
			
 
				+      lhs(1,4) = lhs(1,4)*pivot;
			
 
				+      lhs(1,5) = lhs(1,5)*pivot;
			
 
				+      r(1)   = r(1)  *pivot;
			
 
				+
			
 
				+      coeff = lhs(2,1);
			
 
				+      lhs(2,2)= lhs(2,2) - coeff*lhs(1,2);
			
 
				+      lhs(2,3)= lhs(2,3) - coeff*lhs(1,3);
			
 
				+      lhs(2,4)= lhs(2,4) - coeff*lhs(1,4);
			
 
				+      lhs(2,5)= lhs(2,5) - coeff*lhs(1,5);
			
 
				+      r(2)   = r(2)   - coeff*r(1);
			
 
				+
			
 
				+      coeff = lhs(3,1);
			
 
				+      lhs(3,2)= lhs(3,2) - coeff*lhs(1,2);
			
 
				+      lhs(3,3)= lhs(3,3) - coeff*lhs(1,3);
			
 
				+      lhs(3,4)= lhs(3,4) - coeff*lhs(1,4);
			
 
				+      lhs(3,5)= lhs(3,5) - coeff*lhs(1,5);
			
 
				+      r(3)   = r(3)   - coeff*r(1);
			
 
				+
			
 
				+      coeff = lhs(4,1);
			
 
				+      lhs(4,2)= lhs(4,2) - coeff*lhs(1,2);
			
 
				+      lhs(4,3)= lhs(4,3) - coeff*lhs(1,3);
			
 
				+      lhs(4,4)= lhs(4,4) - coeff*lhs(1,4);
			
 
				+      lhs(4,5)= lhs(4,5) - coeff*lhs(1,5);
			
 
				+      r(4)   = r(4)   - coeff*r(1);
			
 
				+
			
 
				+      coeff = lhs(5,1);
			
 
				+      lhs(5,2)= lhs(5,2) - coeff*lhs(1,2);
			
 
				+      lhs(5,3)= lhs(5,3) - coeff*lhs(1,3);
			
 
				+      lhs(5,4)= lhs(5,4) - coeff*lhs(1,4);
			
 
				+      lhs(5,5)= lhs(5,5) - coeff*lhs(1,5);
			
 
				+      r(5)   = r(5)   - coeff*r(1);
			
 
				+
			
 
				+
			
 
				+      pivot = 1.00e0/lhs(2,2);
			
 
				+      lhs(2,3) = lhs(2,3)*pivot;
			
 
				+      lhs(2,4) = lhs(2,4)*pivot;
			
 
				+      lhs(2,5) = lhs(2,5)*pivot;
			
 
				+      r(2)   = r(2)  *pivot;
			
 
				+
			
 
				+      coeff = lhs(1,2);
			
 
				+      lhs(1,3)= lhs(1,3) - coeff*lhs(2,3);
			
 
				+      lhs(1,4)= lhs(1,4) - coeff*lhs(2,4);
			
 
				+      lhs(1,5)= lhs(1,5) - coeff*lhs(2,5);
			
 
				+      r(1)   = r(1)   - coeff*r(2);
			
 
				+
			
 
				+      coeff = lhs(3,2);
			
 
				+      lhs(3,3)= lhs(3,3) - coeff*lhs(2,3);
			
 
				+      lhs(3,4)= lhs(3,4) - coeff*lhs(2,4);
			
 
				+      lhs(3,5)= lhs(3,5) - coeff*lhs(2,5);
			
 
				+      r(3)   = r(3)   - coeff*r(2);
			
 
				+
			
 
				+      coeff = lhs(4,2);
			
 
				+      lhs(4,3)= lhs(4,3) - coeff*lhs(2,3);
			
 
				+      lhs(4,4)= lhs(4,4) - coeff*lhs(2,4);
			
 
				+      lhs(4,5)= lhs(4,5) - coeff*lhs(2,5);
			
 
				+      r(4)   = r(4)   - coeff*r(2);
			
 
				+
			
 
				+      coeff = lhs(5,2);
			
 
				+      lhs(5,3)= lhs(5,3) - coeff*lhs(2,3);
			
 
				+      lhs(5,4)= lhs(5,4) - coeff*lhs(2,4);
			
 
				+      lhs(5,5)= lhs(5,5) - coeff*lhs(2,5);
			
 
				+      r(5)   = r(5)   - coeff*r(2);
			
 
				+
			
 
				+
			
 
				+      pivot = 1.00e0/lhs(3,3);
			
 
				+      lhs(3,4) = lhs(3,4)*pivot;
			
 
				+      lhs(3,5) = lhs(3,5)*pivot;
			
 
				+      r(3)   = r(3)  *pivot;
			
 
				+
			
 
				+      coeff = lhs(1,3);
			
 
				+      lhs(1,4)= lhs(1,4) - coeff*lhs(3,4);
			
 
				+      lhs(1,5)= lhs(1,5) - coeff*lhs(3,5);
			
 
				+      r(1)   = r(1)   - coeff*r(3);
			
 
				+
			
 
				+      coeff = lhs(2,3);
			
 
				+      lhs(2,4)= lhs(2,4) - coeff*lhs(3,4);
			
 
				+      lhs(2,5)= lhs(2,5) - coeff*lhs(3,5);
			
 
				+      r(2)   = r(2)   - coeff*r(3);
			
 
				+
			
 
				+      coeff = lhs(4,3);
			
 
				+      lhs(4,4)= lhs(4,4) - coeff*lhs(3,4);
			
 
				+      lhs(4,5)= lhs(4,5) - coeff*lhs(3,5);
			
 
				+      r(4)   = r(4)   - coeff*r(3);
			
 
				+
			
 
				+      coeff = lhs(5,3);
			
 
				+      lhs(5,4)= lhs(5,4) - coeff*lhs(3,4);
			
 
				+      lhs(5,5)= lhs(5,5) - coeff*lhs(3,5);
			
 
				+      r(5)   = r(5)   - coeff*r(3);
			
 
				+
			
 
				+
			
 
				+      pivot = 1.00e0/lhs(4,4);
			
 
				+      lhs(4,5) = lhs(4,5)*pivot;
			
 
				+      r(4)   = r(4)  *pivot;
			
 
				+
			
 
				+      coeff = lhs(1,4);
			
 
				+      lhs(1,5)= lhs(1,5) - coeff*lhs(4,5);
			
 
				+      r(1)   = r(1)   - coeff*r(4);
			
 
				+
			
 
				+      coeff = lhs(2,4);
			
 
				+      lhs(2,5)= lhs(2,5) - coeff*lhs(4,5);
			
 
				+      r(2)   = r(2)   - coeff*r(4);
			
 
				+
			
 
				+      coeff = lhs(3,4);
			
 
				+      lhs(3,5)= lhs(3,5) - coeff*lhs(4,5);
			
 
				+      r(3)   = r(3)   - coeff*r(4);
			
 
				+
			
 
				+      coeff = lhs(5,4);
			
 
				+      lhs(5,5)= lhs(5,5) - coeff*lhs(4,5);
			
 
				+      r(5)   = r(5)   - coeff*r(4);
			
 
				+
			
 
				+
			
 
				+      pivot = 1.00e0/lhs(5,5);
			
 
				+      r(5)   = r(5)  *pivot;
			
 
				+
			
 
				+      coeff = lhs(1,5);
			
 
				+      r(1)   = r(1)   - coeff*r(5);
			
 
				+
			
 
				+      coeff = lhs(2,5);
			
 
				+      r(2)   = r(2)   - coeff*r(5);
			
 
				+
			
 
				+      coeff = lhs(3,5);
			
 
				+      r(3)   = r(3)   - coeff*r(5);
			
 
				+
			
 
				+      coeff = lhs(4,5);
			
 
				+      r(4)   = r(4)   - coeff*r(5);
			
 
				+
			
 
				+
			
 
				+      return;
			
 
				+}
			
 
				+
			
 
				+
			
 
				+
			
--- a/RCCE_V2.0/apps/NPB/BT/.svn/text-base/timers.c.svn-base
+++ b/RCCE_V2.0/apps/NPB/BT/.svn/text-base/timers.c.svn-base
@@ -0,0 +1,59 @@
 
				+// 
			
 
				+// Copyright 2010 Intel Corporation
			
 
				+// 
			
 
				+//    Licensed under the Apache License, Version 2.0 (the "License");
			
 
				+//    you may not use this file except in compliance with the License.
			
 
				+//    You may obtain a copy of the License at
			
 
				+// 
			
 
				+//        http://www.apache.org/licenses/LICENSE-2.0
			
 
				+// 
			
 
				+//    Unless required by applicable law or agreed to in writing, software
			
 
				+//    distributed under the License is distributed on an "AS IS" BASIS,
			
 
				+//    WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
			
 
				+//    See the License for the specific language governing permissions and
			
 
				+//    limitations under the License.
			
 
				+// 
			
 
				+#include "RCCE.h" 
			
 
				+#ifdef _OPENMP
			
 
				+#include "omp.h"
			
 
				+#endif
			
 
				+#include "timers.h"
			
 
				+#define elapsed(n) elapsed[n-1]
			
 
				+#define start_time(n)   start_time[n-1]
			
 
				+      
			
 
				+void timer_clear(int np){
			
 
				+      
			
 
				+      int n = np;
			
 
				+      elapsed(n) = 0.0;
			
 
				+      return;
			
 
				+}
			
 
				+
			
 
				+
			
 
				+void timer_start(int np) {
			
 
				+
			
 
				+      int n = np;
			
 
				+
			
 
				+      start_time(n) = RCCE_wtime();
			
 
				+
			
 
				+      return;
			
 
				+}
			
 
				+
			
 
				+void timer_stop(int np) {
			
 
				+
			
 
				+      int n = np;
			
 
				+
			
 
				+      double t, now;
			
 
				+      now = RCCE_wtime();
			
 
				+      t = now - start_time(n);
			
 
				+      elapsed(n) = elapsed(n) + t;
			
 
				+
			
 
				+      return;
			
 
				+}
			
 
				+
			
 
				+
			
 
				+double timer_read(int np) {
			
 
				+
			
 
				+      int n = np;      
			
 
				+      return( elapsed(n));
			
 
				+}
			
 
				+
			
--- a/RCCE_V2.0/apps/NPB/BT/.svn/text-base/timers.h.svn-base
+++ b/RCCE_V2.0/apps/NPB/BT/.svn/text-base/timers.h.svn-base
@@ -0,0 +1,4 @@
 
				+double start_time[64], elapsed[64];
			
 
				+#ifdef _OPENMP
			
 
				+#pragma omp threadprivate (start_time, elapsed)
			
 
				+#endif
			
--- a/RCCE_V2.0/apps/NPB/BT/.svn/text-base/verify.c.svn-base
+++ b/RCCE_V2.0/apps/NPB/BT/.svn/text-base/verify.c.svn-base
@@ -0,0 +1,380 @@
 
				+
			
 
				+//---------------------------------------------------------------------
			
 
				+// 
			
 
				+// Copyright 2010 Intel Corporation
			
 
				+// 
			
 
				+//    Licensed under the Apache License, Version 2.0 (the "License");
			
 
				+//    you may not use this file except in compliance with the License.
			
 
				+//    You may obtain a copy of the License at
			
 
				+// 
			
 
				+//        http://www.apache.org/licenses/LICENSE-2.0
			
 
				+// 
			
 
				+//    Unless required by applicable law or agreed to in writing, software
			
 
				+//    distributed under the License is distributed on an "AS IS" BASIS,
			
 
				+//    WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
			
 
				+//    See the License for the specific language governing permissions and
			
 
				+//    limitations under the License.
			
 
				+// 
			
 
				+//---------------------------------------------------------------------
			
 
				+#include <stdio.h>
			
 
				+#include "header.h"
			
 
				+#include "mpinpb.h"
			
 
				+
			
 
				+#define FABS(x) ((x)>0 ? (x) : -(x))
			
 
				+void verify(int no_time_steps, char *class_r, int *verified_r) {
			
 
				+
			
 
				+//---------------------------------------------------------------------
			
 
				+//---------------------------------------------------------------------
			
 
				+
			
 
				+//---------------------------------------------------------------------
			
 
				+//  verification routine                         
			
 
				+//---------------------------------------------------------------------
			
 
				+
			
 
				+        double xcrref[5],xceref[5],xcrdif[5],xcedif[5], 
			
 
				+               epsilon, xce[5], xcr[5], dtref;
			
 
				+        int m;
			
 
				+        char class;
			
 
				+        int verified;
			
 
				+#define xcrref(m) xcrref[m-1]
			
 
				+#define xceref(m) xceref[m-1]
			
 
				+#define xcrdif(m) xcrdif[m-1]
			
 
				+#define xcedif(m) xcedif[m-1]
			
 
				+#define xce(m) xce[m-1]
			
 
				+#define xcr(m) xcr[m-1]
			
 
				+
			
 
				+//---------------------------------------------------------------------
			
 
				+//   tolerance level
			
 
				+//---------------------------------------------------------------------
			
 
				+        epsilon = 1.0e-08;
			
 
				+        verified = 1;
			
 
				+
			
 
				+//---------------------------------------------------------------------
			
 
				+//   compute the error norm and the residual norm, and exit if not printing
			
 
				+//---------------------------------------------------------------------
			
 
				+
			
 
				+        error_norm(xce);
			
 
				+
			
 
				+        copy_faces();
			
 
				+
			
 
				+        rhs_norm(xcr);
			
 
				+
			
 
				+        for (m = 1; m <= 5; m++) {
			
 
				+           xcr(m) = xcr(m) / dt;
			
 
				+        }
			
 
				+
			
 
				+        if (node != root) return;
			
 
				+
			
 
				+        class = 'U';
			
 
				+
			
 
				+        for (m = 1; m <= 5; m++) {
			
 
				+           xcrref(m) = 1.0;
			
 
				+           xceref(m) = 1.0;
			
 
				+        }
			
 
				+
			
 
				+//---------------------------------------------------------------------
			
 
				+//    reference data for 12X12X12 grids after 60 time steps, with DT = 1.0e-02
			
 
				+//---------------------------------------------------------------------
			
 
				+        if ( (grid_points(1)  == 12     ) && 
			
 
				+             (grid_points(2)  == 12     ) &&
			
 
				+             (grid_points(3)  == 12     ) &&
			
 
				+             (no_time_steps   == 60    )) {
			
 
				+
			
 
				+           class = 'S';
			
 
				+           dtref = 1.0e-2;
			
 
				+
			
 
				+//---------------------------------------------------------------------
			
 
				+//  Reference values of RMS-norms of residual.
			
 
				+//---------------------------------------------------------------------
			
 
				+         xcrref(1) = 1.7034283709541311e-01;
			
 
				+         xcrref(2) = 1.2975252070034097e-02;
			
 
				+         xcrref(3) = 3.2527926989486055e-02;
			
 
				+         xcrref(4) = 2.6436421275166801e-02;
			
 
				+         xcrref(5) = 1.9211784131744430e-01;
			
 
				+
			
 
				+//---------------------------------------------------------------------
			
 
				+//  Reference values of RMS-norms of solution error.
			
 
				+//---------------------------------------------------------------------
			
 
				+
			
 
				+           xceref(1) = 4.9976913345811579e-04;
			
 
				+           xceref(2) = 4.5195666782961927e-05;
			
 
				+           xceref(3) = 7.3973765172921357e-05;
			
 
				+           xceref(4) = 7.3821238632439731e-05;
			
 
				+           xceref(5) = 8.9269630987491446e-04;
			
 
				+
			
 
				+//---------------------------------------------------------------------
			
 
				+//    reference data for 24X24X24 grids after 200 time steps, with DT = 0.8e-3
			
 
				+//---------------------------------------------------------------------
			
 
				+        } else if ( (grid_points(1) == 24) && 
			
 
				+                 (grid_points(2) == 24) &&
			
 
				+                 (grid_points(3) == 24) &&
			
 
				+                 (no_time_steps == 200) ) {
			
 
				+
			
 
				+           class = 'W';
			
 
				+           dtref = 0.8e-3;
			
 
				+//---------------------------------------------------------------------
			
 
				+//  Reference values of RMS-norms of residual.
			
 
				+//---------------------------------------------------------------------
			
 
				+           xcrref(1) = 0.1125590409344e+03;
			
 
				+           xcrref(2) = 0.1180007595731e+02;
			
 
				+           xcrref(3) = 0.2710329767846e+02;
			
 
				+           xcrref(4) = 0.2469174937669e+02;
			
 
				+           xcrref(5) = 0.2638427874317e+03;
			
 
				+
			
 
				+//---------------------------------------------------------------------
			
 
				+//  Reference values of RMS-norms of solution error.
			
 
				+//---------------------------------------------------------------------
			
 
				+
			
 
				+           xceref(1) = 0.4419655736008e+01;
			
 
				+           xceref(2) = 0.4638531260002e+00;
			
 
				+           xceref(3) = 0.1011551749967e+01;
			
 
				+           xceref(4) = 0.9235878729944e+00;
			
 
				+           xceref(5) = 0.1018045837718e+02;
			
 
				+
			
 
				+//---------------------------------------------------------------------
			
 
				+//    reference data for 64X64X64 grids after 200 time steps, with DT = 0.8e-3
			
 
				+//---------------------------------------------------------------------
			
 
				+        } else if ( (grid_points(1) == 64) && 
			
 
				+                 (grid_points(2) == 64) &&
			
 
				+                 (grid_points(3) == 64) &&
			
 
				+                 (no_time_steps == 200) ) {
			
 
				+
			
 
				+           class = 'A';
			
 
				+           dtref = 0.8e-3;
			
 
				+//---------------------------------------------------------------------
			
 
				+//  Reference values of RMS-norms of residual.
			
 
				+//---------------------------------------------------------------------
			
 
				+         xcrref(1) = 1.0806346714637264e+02;
			
 
				+         xcrref(2) = 1.1319730901220813e+01;
			
 
				+         xcrref(3) = 2.5974354511582465e+01;
			
 
				+         xcrref(4) = 2.3665622544678910e+01;
			
 
				+         xcrref(5) = 2.5278963211748344e+02;
			
 
				+
			
 
				+//---------------------------------------------------------------------
			
 
				+//  Reference values of RMS-norms of solution error.
			
 
				+//---------------------------------------------------------------------
			
 
				+
			
 
				+           xceref(1) = 4.2348416040525025e+00;
			
 
				+           xceref(2) = 4.4390282496995698e-01;
			
 
				+           xceref(3) = 9.6692480136345650e-01;
			
 
				+           xceref(4) = 8.8302063039765474e-01;
			
 
				+           xceref(5) = 9.7379901770829278e+00;
			
 
				+
			
 
				+//---------------------------------------------------------------------
			
 
				+//    reference data for 102X102X102 grids after 200 time steps,
			
 
				+//    with DT = 3.0e-04
			
 
				+//---------------------------------------------------------------------
			
 
				+        } else if ( (grid_points(1) == 102) && 
			
 
				+                 (grid_points(2) == 102) &&
			
 
				+                 (grid_points(3) == 102) &&
			
 
				+                 (no_time_steps == 200) ) {
			
 
				+
			
 
				+           class = 'B';
			
 
				+           dtref = 3.0e-4;
			
 
				+
			
 
				+//---------------------------------------------------------------------
			
 
				+//  Reference values of RMS-norms of residual.
			
 
				+//---------------------------------------------------------------------
			
 
				+         xcrref(1) = 1.4233597229287254e+03;
			
 
				+         xcrref(2) = 9.9330522590150238e+01;
			
 
				+         xcrref(3) = 3.5646025644535285e+02;
			
 
				+         xcrref(4) = 3.2485447959084092e+02;
			
 
				+         xcrref(5) = 3.2707541254659363e+03;
			
 
				+
			
 
				+//---------------------------------------------------------------------
			
 
				+//  Reference values of RMS-norms of solution error.
			
 
				+//---------------------------------------------------------------------
			
 
				+
			
 
				+           xceref(1) = 5.2969847140936856e+01;
			
 
				+           xceref(2) = 4.4632896115670668e+00;
			
 
				+           xceref(3) = 1.3122573342210174e+01;
			
 
				+           xceref(4) = 1.2006925323559144e+01;
			
 
				+           xceref(5) = 1.2459576151035986e+02;
			
 
				+
			
 
				+//---------------------------------------------------------------------
			
 
				+//    reference data for 162X162X162 grids after 200 time steps,
			
 
				+//    with DT = 1.0e-04
			
 
				+//---------------------------------------------------------------------
			
 
				+        } else if ( (grid_points(1) == 162) && 
			
 
				+                 (grid_points(2) == 162) &&
			
 
				+                 (grid_points(3) == 162) &&
			
 
				+                 (no_time_steps == 200) ) {
			
 
				+
			
 
				+           class = 'C';
			
 
				+           dtref = 1.0e-4;
			
 
				+
			
 
				+//---------------------------------------------------------------------
			
 
				+//  Reference values of RMS-norms of residual.
			
 
				+//---------------------------------------------------------------------
			
 
				+         xcrref(1) = 0.62398116551764615e+04;
			
 
				+         xcrref(2) = 0.50793239190423964e+03;
			
 
				+         xcrref(3) = 0.15423530093013596e+04;
			
 
				+         xcrref(4) = 0.13302387929291190e+04;
			
 
				+         xcrref(5) = 0.11604087428436455e+05;
			
 
				+
			
 
				+//---------------------------------------------------------------------
			
 
				+//  Reference values of RMS-norms of solution error.
			
 
				+//---------------------------------------------------------------------
			
 
				+
			
 
				+           xceref(1) = 0.16462008369091265e+03;
			
 
				+           xceref(2) = 0.11497107903824313e+02;
			
 
				+           xceref(3) = 0.41207446207461508e+02;
			
 
				+           xceref(4) = 0.37087651059694167e+02;
			
 
				+           xceref(5) = 0.36211053051841265e+03;
			
 
				+
			
 
				+//---------------------------------------------------------------------
			
 
				+//    reference data for 408x408x408 grids after 250 time steps,
			
 
				+//    with DT = 0.2e-04
			
 
				+//---------------------------------------------------------------------
			
 
				+        } else if ( (grid_points(1) == 408) && 
			
 
				+                 (grid_points(2) == 408) &&
			
 
				+                 (grid_points(3) == 408) &&
			
 
				+                 (no_time_steps == 250) ) {
			
 
				+
			
 
				+           class = 'D';
			
 
				+           dtref = 0.2e-4;
			
 
				+
			
 
				+//---------------------------------------------------------------------
			
 
				+//  Reference values of RMS-norms of residual.
			
 
				+//---------------------------------------------------------------------
			
 
				+         xcrref(1) = 0.2533188551738e+05;
			
 
				+         xcrref(2) = 0.2346393716980e+04;
			
 
				+         xcrref(3) = 0.6294554366904e+04;
			
 
				+         xcrref(4) = 0.5352565376030e+04;
			
 
				+         xcrref(5) = 0.3905864038618e+05;
			
 
				+
			
 
				+//---------------------------------------------------------------------
			
 
				+//  Reference values of RMS-norms of solution error.
			
 
				+//---------------------------------------------------------------------
			
 
				+
			
 
				+           xceref(1) = 0.3100009377557e+03;
			
 
				+           xceref(2) = 0.2424086324913e+02;
			
 
				+           xceref(3) = 0.7782212022645e+02;
			
 
				+           xceref(4) = 0.6835623860116e+02;
			
 
				+           xceref(5) = 0.6065737200368e+03;
			
 
				+
			
 
				+//---------------------------------------------------------------------
			
 
				+//    reference data for 1020x1020x1020 grids after 250 time steps,
			
 
				+//    with DT = 0.4e-05
			
 
				+//---------------------------------------------------------------------
			
 
				+        } else if ( (grid_points(1) == 1020) && 
			
 
				+                 (grid_points(2) == 1020) &&
			
 
				+                 (grid_points(3) == 1020) &&
			
 
				+                 (no_time_steps == 250) ) {
			
 
				+
			
 
				+           class = 'E';
			
 
				+           dtref = 0.4e-5;
			
 
				+
			
 
				+//---------------------------------------------------------------------
			
 
				+//  Reference values of RMS-norms of residual.
			
 
				+//---------------------------------------------------------------------
			
 
				+         xcrref(1) = 0.9795372484517e+05;
			
 
				+         xcrref(2) = 0.9739814511521e+04;
			
 
				+         xcrref(3) = 0.2467606342965e+05;
			
 
				+         xcrref(4) = 0.2092419572860e+05;
			
 
				+         xcrref(5) = 0.1392138856939e+06;
			
 
				+
			
 
				+//---------------------------------------------------------------------
			
 
				+//  Reference values of RMS-norms of solution error.
			
 
				+//---------------------------------------------------------------------
			
 
				+
			
 
				+           xceref(1) = 0.4327562208414e+03;
			
 
				+           xceref(2) = 0.3699051964887e+02;
			
 
				+           xceref(3) = 0.1089845040954e+03;
			
 
				+           xceref(4) = 0.9462517622043e+02;
			
 
				+           xceref(5) = 0.7765512765309e+03;
			
 
				+
			
 
				+        } else {
			
 
				+           verified = 0;
			
 
				+        }
			
 
				+
			
 
				+//---------------------------------------------------------------------
			
 
				+//    verification test for residuals if gridsize is one of 
			
 
				+//    the defined grid sizes above (class != 'U')
			
 
				+//---------------------------------------------------------------------
			
 
				+
			
 
				+//---------------------------------------------------------------------
			
 
				+//    Compute the difference of solution values and the known reference 
			
 
				+//    values.
			
 
				+//---------------------------------------------------------------------
			
 
				+        for (m = 1; m <= 5; m++) {
			
 
				+           
			
 
				+           xcrdif(m) = FABS((xcr(m)-xcrref(m))/xcrref(m)) ;
			
 
				+           xcedif(m) = FABS((xce(m)-xceref(m))/xceref(m));
			
 
				+           
			
 
				+        }
			
 
				+
			
 
				+//---------------------------------------------------------------------
			
 
				+//    Output the comparison of computed results to known cases.
			
 
				+//---------------------------------------------------------------------
			
 
				+
			
 
				+        if (class != 'U') {
			
 
				+           printf(" Verification being performed for class %c\n", class);
			
 
				+           printf(" accuracy setting for epsilon = %20.13e\n", epsilon);
			
 
				+           verified = (FABS(dt-dtref) <= epsilon);
			
 
				+           if (!verified) {
			
 
				+              class = 'U';
			
 
				+              printf(" DT does not match the reference value of %15.8e\n", 
			
 
				+                       dtref);
			
 
				+           }
			
 
				+        } else {
			
 
				+           printf(" Unknown class\n");
			
 
				+        }
			
 
				+
			
 
				+
			
 
				+        if (class != 'U') {
			
 
				+           printf(" Comparison of RMS-norms of residual\n");
			
 
				+        } else {
			
 
				+           printf(" RMS-norms of residual\n");
			
 
				+        }
			
 
				+
			
 
				+        for (m = 1; m <= 5; m++) {
			
 
				+           if (class == 'U') {
			
 
				+              printf("          %2d %20.13e\n",
			
 
				+                      m, xcr(m));
			
 
				+           } else if (xcrdif(m) <= epsilon) {
			
 
				+              printf("          %2d %20.13e %20.13e %20.13e\n",
			
 
				+                      m,xcr(m),xcrref(m),xcrdif(m));
			
 
				+           } else {
			
 
				+              verified = 0;
			
 
				+              printf(" FAILURE: %2d %20.13e %20.13e %20.13e\n",
			
 
				+                      m,xcr(m),xcrref(m),xcrdif(m));
			
 
				+           }
			
 
				+        }
			
 
				+
			
 
				+        if (class != 'U') {
			
 
				+           printf(" Comparison of RMS-norms of solution error\n");
			
 
				+        } else {
			
 
				+           printf(" RMS-norms of solution error\n");
			
 
				+        }
			
 
				+        
			
 
				+        for (m = 1; m <= 5; m++) {
			
 
				+           if (class == 'U') {
			
 
				+              printf("          %2d %20.13e\n",
			
 
				+                      m, xce(m));
			
 
				+           } else if (xcedif(m) <= epsilon) {
			
 
				+              printf("          %2d %20.13e %20.13e %20.13e\n",
			
 
				+                      m,xce(m),xceref(m),xcedif(m));
			
 
				+           } else {
			
 
				+              verified = 0;
			
 
				+              printf(" FAILURE: %2d %20.13e %20.13e %20.13e\n",
			
 
				+                      m,xce(m),xceref(m),xcedif(m));
			
 
				+           }
			
 
				+        }
			
 
				+        
			
 
				+        
			
 
				+        if (class == 'U') {
			
 
				+           printf(" No reference values provided\n");
			
 
				+           printf(" No verification performed\n");
			
 
				+        } else if (verified) {
			
 
				+           printf(" Verification Successful\n");
			
 
				+        } else {
			
 
				+           printf(" Verification failed\n");
			
 
				+        }
			
 
				+
			
 
				+        *class_r = class;
			
 
				+        *verified_r = verified;
			
 
				+
			
 
				+        return;
			
 
				+
			
 
				+
			
 
				+}
			
--- a/RCCE_V2.0/apps/NPB/BT/.svn/text-base/work_lhs.h.svn-base
+++ b/RCCE_V2.0/apps/NPB/BT/.svn/text-base/work_lhs.h.svn-base
@@ -0,0 +1,33 @@
 
				+//---------------------------------------------------------------------
			
 
				+//---------------------------------------------------------------------
			
 
				+//
			
 
				+//  work_lhs.h
			
 
				+//
			
 
				+//---------------------------------------------------------------------
			
 
				+//---------------------------------------------------------------------
			
 
				+#ifndef __WORK_LHS_H
			
 
				+#define __WORK_LHS_H
			
 
				+
			
 
				+#define fjac(m,n,i) fjac[(m-1)+5*((n-1)+5*(i+2))]
			
 
				+#define njac(m,n,i) njac[(m-1)+5*((n-1)+5*(i+2))]
			
 
				+#define lhsa(m,n,i) lhsa[(m-1)+5*((n-1)+5*(i+1))]
			
 
				+#define lhsb(m,n,i) lhsb[(m-1)+5*((n-1)+5*(i+1))]
			
 
				+
			
 
				+#ifdef G_MAIN
			
 
				+      double fjac[5*5*(MAX_CELL_DIM+4)],
			
 
				+                       njac[5*5*(MAX_CELL_DIM+4)],
			
 
				+                       lhsa[5*5*(MAX_CELL_DIM+2)],
			
 
				+                       lhsb[5*5*(MAX_CELL_DIM+2)],
			
 
				+                       tmp1, tmp2, tmp3;
			
 
				+//      common /work_lhs/ fjac, njac, lhsa, lhsb, tmp1, tmp2, tmp3;
			
 
				+#else
			
 
				+extern double fjac[5*5*(MAX_CELL_DIM+4)],
			
 
				+                       njac[5*5*(MAX_CELL_DIM+4)],
			
 
				+                       lhsa[5*5*(MAX_CELL_DIM+2)],
			
 
				+                       lhsb[5*5*(MAX_CELL_DIM+2)],
			
 
				+                       tmp1, tmp2, tmp3;
			
 
				+#endif /*G_MAIN*/
			
 
				+#ifdef _OPENMP
			
 
				+#pragma omp threadprivate (fjac, njac, lhsa, lhsb, tmp1, tmp2, tmp3)
			
 
				+#endif
			
 
				+#endif
			
--- a/RCCE_V2.0/apps/NPB/BT/.svn/text-base/x_solve.c.svn-base
+++ b/RCCE_V2.0/apps/NPB/BT/.svn/text-base/x_solve.c.svn-base
@@ -0,0 +1,632 @@
 
				+//---------------------------------------------------------------------
			
 
				+// 
			
 
				+// Copyright 2010 Intel Corporation
			
 
				+// 
			
 
				+//    Licensed under the Apache License, Version 2.0 (the "License");
			
 
				+//    you may not use this file except in compliance with the License.
			
 
				+//    You may obtain a copy of the License at
			
 
				+// 
			
 
				+//        http://www.apache.org/licenses/LICENSE-2.0
			
 
				+// 
			
 
				+//    Unless required by applicable law or agreed to in writing, software
			
 
				+//    distributed under the License is distributed on an "AS IS" BASIS,
			
 
				+//    WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
			
 
				+//    See the License for the specific language governing permissions and
			
 
				+//    limitations under the License.
			
 
				+// 
			
 
				+//---------------------------------------------------------------------
			
 
				+#include "header.h"
			
 
				+#include "mpinpb.h"
			
 
				+#define G_MAIN
			
 
				+#include "work_lhs.h"
			
 
				+#undef G_MAIN
			
 
				+
			
 
				+extern void x_sendrecv_solve(int c, int cprev);
			
 
				+extern void x_sendrecv_back(int c, int cprev);
			
 
				+extern void x_backsubstitute(int first, int last, int c);
			
 
				+extern void x_solve_cell(int first, int last, int c);
			
 
				+
			
 
				+void x_solve() {
			
 
				+
			
 
				+//---------------------------------------------------------------------
			
 
				+//---------------------------------------------------------------------
			
 
				+
			
 
				+//---------------------------------------------------------------------
			
 
				+//     Performs line solves in X direction by first factoring
			
 
				+//     the block-tridiagonal matrix into an upper triangular matrix, 
			
 
				+//     and then performing back substitution to solve for the unknown
			
 
				+//     vectors of each line.  
			
 
				+//     
			
 
				+//     Make sure we treat elements zero to cell_size in the direction
			
 
				+//     of the sweep.
			
 
				+//---------------------------------------------------------------------
			
 
				+
			
 
				+      int  c, cprev, stage, first, last, error;
			
 
				+
			
 
				+//---------------------------------------------------------------------
			
 
				+//     in our terminology stage is the number of the cell in the x-direction
			
 
				+//     i.e. stage = 1 means the start of the line stage=ncells means end
			
 
				+//---------------------------------------------------------------------
			
 
				+      for (stage = 1; stage <= ncells; stage++) {
			
 
				+         c = slice(1,stage);
			
 
				+//---------------------------------------------------------------------
			
 
				+//     set first/last-cell flags
			
 
				+//---------------------------------------------------------------------
			
 
				+         first = (stage == 1);
			
 
				+         last =  (stage == ncells);
			
 
				+
			
 
				+        if (stage >1) {
			
 
				+           cprev = slice(1,stage-1);
			
 
				+           x_sendrecv_solve(c, cprev);
			
 
				+        }
			
 
				+        x_solve_cell(first,last,c);
			
 
				+      }
			
 
				+
			
 
				+//---------------------------------------------------------------------
			
 
				+//     now perform backsubstitution in reverse direction
			
 
				+//---------------------------------------------------------------------
			
 
				+      for (stage = ncells; stage >= 1; stage--) {
			
 
				+         c = slice(1,stage);
			
 
				+         first = (stage == 1);
			
 
				+         last =  (stage == ncells);
			
 
				+
			
 
				+         if (stage <ncells) {
			
 
				+            cprev = slice(1,stage+1);
			
 
				+            x_sendrecv_back(c, cprev);
			
 
				+         }
			
 
				+
			
 
				+         x_backsubstitute(first,last,c);
			
 
				+      }
			
 
				+
			
 
				+      return;
			
 
				+}
			
 
				+      
			
 
				+      
			
 
				+void x_sendrecv_solve(int c, int cprev) {
			
 
				+
			
 
				+//---------------------------------------------------------------------
			
 
				+//     pack up and send C'(iend) and rhs'(iend) for
			
 
				+//     all j and k of previous cell
			
 
				+//---------------------------------------------------------------------
			
 
				+
			
 
				+      int j,k,m,n,isize,ptr, istart;
			
 
				+      int phase;
			
 
				+      int error, buffer_size;
			
 
				+
			
 
				+      isize = cell_size(1,cprev)-1;
			
 
				+      buffer_size=MAX_CELL_DIM*MAX_CELL_DIM*
			
 
				+           (BLOCK_SIZE*BLOCK_SIZE + BLOCK_SIZE);
			
 
				+
			
 
				+//---------------------------------------------------------------------
			
 
				+//     pack up buffer
			
 
				+//---------------------------------------------------------------------
			
 
				+      ptr = 0;
			
 
				+      for (k = 0; k <= KMAX-1; k++) {
			
 
				+         for (j = 0; j <= JMAX-1; j++) {
			
 
				+            for (m = 1; m <= BLOCK_SIZE; m++) {
			
 
				+               for (n = 1; n <= BLOCK_SIZE; n++) {
			
 
				+                  in_buffer(ptr+n) = lhsc(m,n,isize,j,k,cprev);
			
 
				+               }
			
 
				+               ptr = ptr+BLOCK_SIZE;
			
 
				+            }
			
 
				+            for (n = 1; n <= BLOCK_SIZE; n++) {
			
 
				+               in_buffer(ptr+n) = rhs(n,isize,j,k,cprev);
			
 
				+            }
			
 
				+            ptr = ptr+BLOCK_SIZE;
			
 
				+         }
			
 
				+      }
			
 
				+
			
 
				+//---------------------------------------------------------------------
			
 
				+//     send and receive buffer 
			
 
				+//---------------------------------------------------------------------
			
 
				+
			
 
				+      for (phase = 0; phase < 3; phase++) {
			
 
				+
			
 
				+        if (send_color[EASTDIR]==phase) 
			
 
				+          RCCE_send((char*)in_buffer, buffer_size*sizeof(double), successor(1));
			
 
				+        if (recv_color[EASTDIR]==phase) 
			
 
				+          RCCE_recv((char*)out_buffer, buffer_size*sizeof(double), predecessor(1));
			
 
				+      }
			
 
				+
			
 
				+//---------------------------------------------------------------------
			
 
				+//     unpack buffer
			
 
				+//---------------------------------------------------------------------
			
 
				+      istart = 0;
			
 
				+      ptr = 0;
			
 
				+      for (k = 0; k <= KMAX-1; k++) {
			
 
				+         for (j = 0; j <= JMAX-1; j++) {
			
 
				+            for (m = 1; m <= BLOCK_SIZE; m++) {
			
 
				+               for (n = 1; n <= BLOCK_SIZE; n++) {
			
 
				+                  lhsc(m,n,istart-1,j,k,c) = out_buffer(ptr+n);
			
 
				+               }
			
 
				+               ptr = ptr+BLOCK_SIZE;
			
 
				+            }
			
 
				+            for (n = 1; n <= BLOCK_SIZE; n++) {
			
 
				+               rhs(n,istart-1,j,k,c) = out_buffer(ptr+n);
			
 
				+            }
			
 
				+            ptr = ptr+BLOCK_SIZE;
			
 
				+         }
			
 
				+      }
			
 
				+
			
 
				+      return;
			
 
				+}
			
 
				+
			
 
				+//---------------------------------------------------------------------
			
 
				+//---------------------------------------------------------------------
			
 
				+
			
 
				+void x_sendrecv_back(int c, int cprev) {
			
 
				+
			
 
				+//---------------------------------------------------------------------
			
 
				+//     pack up and send U(istart) for all j and k
			
 
				+//---------------------------------------------------------------------
			
 
				+
			
 
				+      int j,k,n,ptr,istart,jp,kp;
			
 
				+      int phase;
			
 
				+      int error, buffer_size;
			
 
				+
			
 
				+//---------------------------------------------------------------------
			
 
				+//     Send element 0 to previous processor
			
 
				+//---------------------------------------------------------------------
			
 
				+      istart = 0;
			
 
				+      buffer_size=MAX_CELL_DIM*MAX_CELL_DIM*BLOCK_SIZE;
			
 
				+      ptr = 0;
			
 
				+      for (k = 0; k <= KMAX-1; k++) {
			
 
				+         for (j = 0; j <= JMAX-1; j++) {
			
 
				+            for (n = 1; n <= BLOCK_SIZE; n++) {
			
 
				+               in_buffer(ptr+n) = rhs(n,istart,j,k,cprev);
			
 
				+            }
			
 
				+            ptr = ptr+BLOCK_SIZE;
			
 
				+         }
			
 
				+      }
			
 
				+
			
 
				+//---------------------------------------------------------------------
			
 
				+//     send and receive buffer 
			
 
				+//---------------------------------------------------------------------
			
 
				+
			
 
				+      for (phase = 0; phase < 3; phase++) {
			
 
				+
			
 
				+        if (send_color[WESTDIR]==phase) 
			
 
				+          RCCE_send((char*)in_buffer, buffer_size*sizeof(double), predecessor(1));
			
 
				+        if (recv_color[WESTDIR]==phase) 
			
 
				+          RCCE_recv((char*)out_buffer, buffer_size*sizeof(double), successor(1));
			
 
				+      }
			
 
				+
			
 
				+//---------------------------------------------------------------------
			
 
				+//     unpack U(isize) for all j and k
			
 
				+//---------------------------------------------------------------------
			
 
				+
			
 
				+      ptr = 0;
			
 
				+      for (k = 0; k <= KMAX-1; k++) {
			
 
				+         for (j = 0; j <= JMAX-1; j++) {
			
 
				+            for (n = 1; n <= BLOCK_SIZE; n++) {
			
 
				+               backsub_info(n,j,k,c) = out_buffer(ptr+n);
			
 
				+            }
			
 
				+            ptr = ptr+BLOCK_SIZE;
			
 
				+         }
			
 
				+      }
			
 
				+
			
 
				+      return;
			
 
				+}
			
 
				+      
			
 
				+void x_backsubstitute(int first, int last, int c) {
			
 
				+
			
 
				+//---------------------------------------------------------------------
			
 
				+//---------------------------------------------------------------------
			
 
				+
			
 
				+//---------------------------------------------------------------------
			
 
				+//     back solve: if last cell, then generate U(isize)=rhs(isize)
			
 
				+//     else assume U(isize) is loaded in un pack backsub_info
			
 
				+//     so just use it
			
 
				+//     after call u(istart) will be sent to next cell
			
 
				+//---------------------------------------------------------------------
			
 
				+
			
 
				+      int i, j, k;
			
 
				+      int m,n,isize,jsize,ksize,istart;
			
 
				+      
			
 
				+      istart = 0;
			
 
				+      isize = cell_size(1,c)-1;
			
 
				+      jsize = cell_size(2,c)-end(2,c)-1      ;
			
 
				+      ksize = cell_size(3,c)-end(3,c)-1;
			
 
				+      if (last == 0) {
			
 
				+         for (k = start(3,c); k <= ksize; k++) {
			
 
				+            for (j = start(2,c); j <= jsize; j++) {
			
 
				+//---------------------------------------------------------------------
			
 
				+//     U(isize) uses info from previous cell if not last cell
			
 
				+//---------------------------------------------------------------------
			
 
				+               for (m = 1; m <= BLOCK_SIZE; m++) {
			
 
				+                  for (n = 1; n <= BLOCK_SIZE; n++) {
			
 
				+                     rhs(m,isize,j,k,c) = rhs(m,isize,j,k,c) 
			
 
				+                          - lhsc(m,n,isize,j,k,c)*
			
 
				+                          backsub_info(n,j,k,c);
			
 
				+                  }
			
 
				+               }
			
 
				+            }
			
 
				+         }
			
 
				+      }
			
 
				+      for (k = start(3,c); k <= ksize; k++) {
			
 
				+         for (j = start(2,c); j <= jsize; j++) {
			
 
				+            for (i = isize-1; i >= istart; i--) {
			
 
				+               for (m = 1; m <= BLOCK_SIZE; m++) {
			
 
				+                  for (n = 1; n <= BLOCK_SIZE; n++) {
			
 
				+                     rhs(m,i,j,k,c) = rhs(m,i,j,k,c) 
			
 
				+                          - lhsc(m,n,i,j,k,c)*rhs(n,i+1,j,k,c);
			
 
				+                  }
			
 
				+               }
			
 
				+            }
			
 
				+         }
			
 
				+      }
			
 
				+
			
 
				+      return;
			
 
				+}
			
 
				+
			
 
				+
			
 
				+//---------------------------------------------------------------------
			
 
				+//---------------------------------------------------------------------
			
 
				+
			
 
				+void x_solve_cell(int first, int last, int c) {
			
 
				+
			
 
				+//---------------------------------------------------------------------
			
 
				+//---------------------------------------------------------------------
			
 
				+
			
 
				+//---------------------------------------------------------------------
			
 
				+//     performs guaussian elimination on this cell.
			
 
				+//     
			
 
				+//     assumes that unpacking routines for non-first cells 
			
 
				+//     preload C' and rhs' from previous cell.
			
 
				+//     
			
 
				+//     assumed send happens outside this routine, but that
			
 
				+//     c'(IMAX) and rhs'(IMAX) will be sent to next cell
			
 
				+//---------------------------------------------------------------------
			
 
				+
			
 
				+      int i,j,k,isize,ksize,jsize,istart;
			
 
				+
			
 
				+      istart = 0;
			
 
				+      isize = cell_size(1,c)-1;
			
 
				+      jsize = cell_size(2,c)-end(2,c)-1;
			
 
				+      ksize = cell_size(3,c)-end(3,c)-1;
			
 
				+
			
 
				+      lhsabinit(lhsa, lhsb, isize);
			
 
				+
			
 
				+      for (k = start(3,c); k <= ksize; k++) {
			
 
				+         for (j = start(2,c); j <= jsize; j++) {
			
 
				+
			
 
				+//---------------------------------------------------------------------
			
 
				+//     This function computes the left hand side in the xi-direction
			
 
				+//---------------------------------------------------------------------
			
 
				+
			
 
				+//---------------------------------------------------------------------
			
 
				+//     determine a (labeled f) and n jacobians for cell c
			
 
				+//---------------------------------------------------------------------
			
 
				+            for (i = start(1,c)-1; i <= cell_size(1,c) - end(1,c); i++) {
			
 
				+
			
 
				+               tmp1 = rho_i(i,j,k,c);
			
 
				+               tmp2 = tmp1 * tmp1;
			
 
				+               tmp3 = tmp1 * tmp2;
			
 
				+//---------------------------------------------------------------------
			
 
				+//     
			
 
				+//---------------------------------------------------------------------
			
 
				+               fjac(1,1,i) = 0.0e+00;
			
 
				+               fjac(1,2,i) = 1.0e+00;
			
 
				+               fjac(1,3,i) = 0.0e+00;
			
 
				+               fjac(1,4,i) = 0.0e+00;
			
 
				+               fjac(1,5,i) = 0.0e+00;
			
 
				+
			
 
				+               fjac(2,1,i) = -(u(2,i,j,k,c) * tmp2 * 
			
 
				+                    u(2,i,j,k,c))
			
 
				+                    + c2 * qs(i,j,k,c);
			
 
				+               fjac(2,2,i) = ( 2.0e+00 - c2 )
			
 
				+                    * ( u(2,i,j,k,c) * tmp1 );
			
 
				+               fjac(2,3,i) = - c2 * ( u(3,i,j,k,c) * tmp1 );
			
 
				+               fjac(2,4,i) = - c2 * ( u(4,i,j,k,c) * tmp1 );
			
 
				+               fjac(2,5,i) = c2;
			
 
				+
			
 
				+               fjac(3,1,i) = - ( u(2,i,j,k,c)*u(3,i,j,k,c) ) * tmp2;
			
 
				+               fjac(3,2,i) = u(3,i,j,k,c) * tmp1;
			
 
				+               fjac(3,3,i) = u(2,i,j,k,c) * tmp1;
			
 
				+               fjac(3,4,i) = 0.0e+00;
			
 
				+               fjac(3,5,i) = 0.0e+00;
			
 
				+
			
 
				+               fjac(4,1,i) = - ( u(2,i,j,k,c)*u(4,i,j,k,c) ) * tmp2;
			
 
				+               fjac(4,2,i) = u(4,i,j,k,c) * tmp1;
			
 
				+               fjac(4,3,i) = 0.0e+00;
			
 
				+               fjac(4,4,i) = u(2,i,j,k,c) * tmp1;
			
 
				+               fjac(4,5,i) = 0.0e+00;
			
 
				+
			
 
				+               fjac(5,1,i) = ( c2 * 2.0e0 * qs(i,j,k,c)
			
 
				+                    - c1 * ( u(5,i,j,k,c) * tmp1 ) )
			
 
				+                    * ( u(2,i,j,k,c) * tmp1 );
			
 
				+               fjac(5,2,i) = c1 *  u(5,i,j,k,c) * tmp1 
			
 
				+                    - c2
			
 
				+                    * ( u(2,i,j,k,c)*u(2,i,j,k,c) * tmp2
			
 
				+                    + qs(i,j,k,c) );
			
 
				+               fjac(5,3,i) = - c2 * ( u(3,i,j,k,c)*u(2,i,j,k,c) )
			
 
				+                    * tmp2;
			
 
				+               fjac(5,4,i) = - c2 * ( u(4,i,j,k,c)*u(2,i,j,k,c) )
			
 
				+                    * tmp2;
			
 
				+               fjac(5,5,i) = c1 * ( u(2,i,j,k,c) * tmp1 );
			
 
				+
			
 
				+               njac(1,1,i) = 0.0e+00;
			
 
				+               njac(1,2,i) = 0.0e+00;
			
 
				+               njac(1,3,i) = 0.0e+00;
			
 
				+               njac(1,4,i) = 0.0e+00;
			
 
				+               njac(1,5,i) = 0.0e+00;
			
 
				+
			
 
				+               njac(2,1,i) = - con43 * c3c4 * tmp2 * u(2,i,j,k,c);
			
 
				+               njac(2,2,i) =   con43 * c3c4 * tmp1;
			
 
				+               njac(2,3,i) =   0.0e+00;
			
 
				+               njac(2,4,i) =   0.0e+00;
			
 
				+               njac(2,5,i) =   0.0e+00;
			
 
				+
			
 
				+               njac(3,1,i) = - c3c4 * tmp2 * u(3,i,j,k,c);
			
 
				+               njac(3,2,i) =   0.0e+00;
			
 
				+               njac(3,3,i) =   c3c4 * tmp1;
			
 
				+               njac(3,4,i) =   0.0e+00;
			
 
				+               njac(3,5,i) =   0.0e+00;
			
 
				+
			
 
				+               njac(4,1,i) = - c3c4 * tmp2 * u(4,i,j,k,c);
			
 
				+               njac(4,2,i) =   0.0e+00 ;
			
 
				+               njac(4,3,i) =   0.0e+00;
			
 
				+               njac(4,4,i) =   c3c4 * tmp1;
			
 
				+               njac(4,5,i) =   0.0e+00;
			
 
				+
			
 
				+               njac(5,1,i) = - ( con43 * c3c4
			
 
				+                    - c1345 ) * tmp3 * SQR(u(2,i,j,k,c))
			
 
				+                    - ( c3c4 - c1345 ) * tmp3 * SQR(u(3,i,j,k,c))
			
 
				+                    - ( c3c4 - c1345 ) * tmp3 * SQR(u(4,i,j,k,c))
			
 
				+                    - c1345 * tmp2 * u(5,i,j,k,c);
			
 
				+
			
 
				+               njac(5,2,i) = ( con43 * c3c4
			
 
				+                    - c1345 ) * tmp2 * u(2,i,j,k,c);
			
 
				+               njac(5,3,i) = ( c3c4 - c1345 ) * tmp2 * u(3,i,j,k,c);
			
 
				+               njac(5,4,i) = ( c3c4 - c1345 ) * tmp2 * u(4,i,j,k,c);
			
 
				+               njac(5,5,i) = ( c1345 ) * tmp1;
			
 
				+
			
 
				+            }
			
 
				+//---------------------------------------------------------------------
			
 
				+//     now jacobians set, so form left hand side in x direction
			
 
				+//---------------------------------------------------------------------
			
 
				+            for (i = start(1,c); i <= isize - end(1,c); i++) {
			
 
				+
			
 
				+               tmp1 = dt * tx1;
			
 
				+               tmp2 = dt * tx2;
			
 
				+
			
 
				+               lhsa(1,1,i) = - tmp2 * fjac(1,1,i-1)
			
 
				+                    - tmp1 * njac(1,1,i-1)
			
 
				+                    - tmp1 * dx1 ;
			
 
				+               lhsa(1,2,i) = - tmp2 * fjac(1,2,i-1)
			
 
				+                    - tmp1 * njac(1,2,i-1);
			
 
				+               lhsa(1,3,i) = - tmp2 * fjac(1,3,i-1)
			
 
				+                    - tmp1 * njac(1,3,i-1);
			
 
				+               lhsa(1,4,i) = - tmp2 * fjac(1,4,i-1)
			
 
				+                    - tmp1 * njac(1,4,i-1);
			
 
				+               lhsa(1,5,i) = - tmp2 * fjac(1,5,i-1)
			
 
				+                    - tmp1 * njac(1,5,i-1);
			
 
				+
			
 
				+               lhsa(2,1,i) = - tmp2 * fjac(2,1,i-1)
			
 
				+                    - tmp1 * njac(2,1,i-1);
			
 
				+               lhsa(2,2,i) = - tmp2 * fjac(2,2,i-1)
			
 
				+                    - tmp1 * njac(2,2,i-1)
			
 
				+                    - tmp1 * dx2;
			
 
				+               lhsa(2,3,i) = - tmp2 * fjac(2,3,i-1)
			
 
				+                    - tmp1 * njac(2,3,i-1);
			
 
				+               lhsa(2,4,i) = - tmp2 * fjac(2,4,i-1)
			
 
				+                    - tmp1 * njac(2,4,i-1);
			
 
				+               lhsa(2,5,i) = - tmp2 * fjac(2,5,i-1)
			
 
				+                    - tmp1 * njac(2,5,i-1);
			
 
				+
			
 
				+               lhsa(3,1,i) = - tmp2 * fjac(3,1,i-1)
			
 
				+                    - tmp1 * njac(3,1,i-1);
			
 
				+               lhsa(3,2,i) = - tmp2 * fjac(3,2,i-1)
			
 
				+                    - tmp1 * njac(3,2,i-1);
			
 
				+               lhsa(3,3,i) = - tmp2 * fjac(3,3,i-1)
			
 
				+                    - tmp1 * njac(3,3,i-1)
			
 
				+                    - tmp1 * dx3 ;
			
 
				+               lhsa(3,4,i) = - tmp2 * fjac(3,4,i-1)
			
 
				+                    - tmp1 * njac(3,4,i-1);
			
 
				+               lhsa(3,5,i) = - tmp2 * fjac(3,5,i-1)
			
 
				+                    - tmp1 * njac(3,5,i-1);
			
 
				+
			
 
				+               lhsa(4,1,i) = - tmp2 * fjac(4,1,i-1)
			
 
				+                    - tmp1 * njac(4,1,i-1);
			
 
				+               lhsa(4,2,i) = - tmp2 * fjac(4,2,i-1)
			
 
				+                    - tmp1 * njac(4,2,i-1);
			
 
				+               lhsa(4,3,i) = - tmp2 * fjac(4,3,i-1)
			
 
				+                    - tmp1 * njac(4,3,i-1);
			
 
				+               lhsa(4,4,i) = - tmp2 * fjac(4,4,i-1)
			
 
				+                    - tmp1 * njac(4,4,i-1)
			
 
				+                    - tmp1 * dx4;
			
 
				+               lhsa(4,5,i) = - tmp2 * fjac(4,5,i-1)
			
 
				+                    - tmp1 * njac(4,5,i-1);
			
 
				+
			
 
				+               lhsa(5,1,i) = - tmp2 * fjac(5,1,i-1)
			
 
				+                    - tmp1 * njac(5,1,i-1);
			
 
				+               lhsa(5,2,i) = - tmp2 * fjac(5,2,i-1)
			
 
				+                    - tmp1 * njac(5,2,i-1);
			
 
				+               lhsa(5,3,i) = - tmp2 * fjac(5,3,i-1)
			
 
				+                    - tmp1 * njac(5,3,i-1);
			
 
				+               lhsa(5,4,i) = - tmp2 * fjac(5,4,i-1)
			
 
				+                    - tmp1 * njac(5,4,i-1);
			
 
				+               lhsa(5,5,i) = - tmp2 * fjac(5,5,i-1)
			
 
				+                    - tmp1 * njac(5,5,i-1)
			
 
				+                    - tmp1 * dx5;
			
 
				+
			
 
				+               lhsb(1,1,i) = 1.0e+00
			
 
				+                    + tmp1 * 2.0e+00 * njac(1,1,i)
			
 
				+                    + tmp1 * 2.0e+00 * dx1;
			
 
				+               lhsb(1,2,i) = tmp1 * 2.0e+00 * njac(1,2,i);
			
 
				+               lhsb(1,3,i) = tmp1 * 2.0e+00 * njac(1,3,i);
			
 
				+               lhsb(1,4,i) = tmp1 * 2.0e+00 * njac(1,4,i);
			
 
				+               lhsb(1,5,i) = tmp1 * 2.0e+00 * njac(1,5,i);
			
 
				+
			
 
				+               lhsb(2,1,i) = tmp1 * 2.0e+00 * njac(2,1,i);
			
 
				+               lhsb(2,2,i) = 1.0e+00
			
 
				+                    + tmp1 * 2.0e+00 * njac(2,2,i)
			
 
				+                    + tmp1 * 2.0e+00 * dx2;
			
 
				+               lhsb(2,3,i) = tmp1 * 2.0e+00 * njac(2,3,i);
			
 
				+               lhsb(2,4,i) = tmp1 * 2.0e+00 * njac(2,4,i);
			
 
				+               lhsb(2,5,i) = tmp1 * 2.0e+00 * njac(2,5,i);
			
 
				+
			
 
				+               lhsb(3,1,i) = tmp1 * 2.0e+00 * njac(3,1,i);
			
 
				+               lhsb(3,2,i) = tmp1 * 2.0e+00 * njac(3,2,i);
			
 
				+               lhsb(3,3,i) = 1.0e+00
			
 
				+                    + tmp1 * 2.0e+00 * njac(3,3,i)
			
 
				+                    + tmp1 * 2.0e+00 * dx3;
			
 
				+               lhsb(3,4,i) = tmp1 * 2.0e+00 * njac(3,4,i);
			
 
				+               lhsb(3,5,i) = tmp1 * 2.0e+00 * njac(3,5,i);
			
 
				+
			
 
				+               lhsb(4,1,i) = tmp1 * 2.0e+00 * njac(4,1,i);
			
 
				+               lhsb(4,2,i) = tmp1 * 2.0e+00 * njac(4,2,i);
			
 
				+               lhsb(4,3,i) = tmp1 * 2.0e+00 * njac(4,3,i);
			
 
				+               lhsb(4,4,i) = 1.0e+00
			
 
				+                    + tmp1 * 2.0e+00 * njac(4,4,i)
			
 
				+                    + tmp1 * 2.0e+00 * dx4;
			
 
				+               lhsb(4,5,i) = tmp1 * 2.0e+00 * njac(4,5,i);
			
 
				+
			
 
				+               lhsb(5,1,i) = tmp1 * 2.0e+00 * njac(5,1,i);
			
 
				+               lhsb(5,2,i) = tmp1 * 2.0e+00 * njac(5,2,i);
			
 
				+               lhsb(5,3,i) = tmp1 * 2.0e+00 * njac(5,3,i);
			
 
				+               lhsb(5,4,i) = tmp1 * 2.0e+00 * njac(5,4,i);
			
 
				+               lhsb(5,5,i) = 1.0e+00
			
 
				+                    + tmp1 * 2.0e+00 * njac(5,5,i)
			
 
				+                    + tmp1 * 2.0e+00 * dx5;
			
 
				+
			
 
				+               lhsc(1,1,i,j,k,c) =  tmp2 * fjac(1,1,i+1)
			
 
				+                    - tmp1 * njac(1,1,i+1)
			
 
				+                    - tmp1 * dx1;
			
 
				+               lhsc(1,2,i,j,k,c) =  tmp2 * fjac(1,2,i+1)
			
 
				+                    - tmp1 * njac(1,2,i+1);
			
 
				+               lhsc(1,3,i,j,k,c) =  tmp2 * fjac(1,3,i+1)
			
 
				+                    - tmp1 * njac(1,3,i+1);
			
 
				+               lhsc(1,4,i,j,k,c) =  tmp2 * fjac(1,4,i+1)
			
 
				+                    - tmp1 * njac(1,4,i+1);
			
 
				+               lhsc(1,5,i,j,k,c) =  tmp2 * fjac(1,5,i+1)
			
 
				+                    - tmp1 * njac(1,5,i+1);
			
 
				+
			
 
				+               lhsc(2,1,i,j,k,c) =  tmp2 * fjac(2,1,i+1)
			
 
				+                    - tmp1 * njac(2,1,i+1);
			
 
				+               lhsc(2,2,i,j,k,c) =  tmp2 * fjac(2,2,i+1)
			
 
				+                    - tmp1 * njac(2,2,i+1)
			
 
				+                    - tmp1 * dx2;
			
 
				+               lhsc(2,3,i,j,k,c) =  tmp2 * fjac(2,3,i+1)
			
 
				+                    - tmp1 * njac(2,3,i+1);
			
 
				+               lhsc(2,4,i,j,k,c) =  tmp2 * fjac(2,4,i+1)
			
 
				+                    - tmp1 * njac(2,4,i+1);
			
 
				+               lhsc(2,5,i,j,k,c) =  tmp2 * fjac(2,5,i+1)
			
 
				+                    - tmp1 * njac(2,5,i+1);
			
 
				+
			
 
				+               lhsc(3,1,i,j,k,c) =  tmp2 * fjac(3,1,i+1)
			
 
				+                    - tmp1 * njac(3,1,i+1);
			
 
				+               lhsc(3,2,i,j,k,c) =  tmp2 * fjac(3,2,i+1)
			
 
				+                    - tmp1 * njac(3,2,i+1);
			
 
				+               lhsc(3,3,i,j,k,c) =  tmp2 * fjac(3,3,i+1)
			
 
				+                    - tmp1 * njac(3,3,i+1)
			
 
				+                    - tmp1 * dx3;
			
 
				+               lhsc(3,4,i,j,k,c) =  tmp2 * fjac(3,4,i+1)
			
 
				+                    - tmp1 * njac(3,4,i+1);
			
 
				+               lhsc(3,5,i,j,k,c) =  tmp2 * fjac(3,5,i+1)
			
 
				+                    - tmp1 * njac(3,5,i+1);
			
 
				+
			
 
				+               lhsc(4,1,i,j,k,c) =  tmp2 * fjac(4,1,i+1)
			
 
				+                    - tmp1 * njac(4,1,i+1);
			
 
				+               lhsc(4,2,i,j,k,c) =  tmp2 * fjac(4,2,i+1)
			
 
				+                    - tmp1 * njac(4,2,i+1);
			
 
				+               lhsc(4,3,i,j,k,c) =  tmp2 * fjac(4,3,i+1)
			
 
				+                    - tmp1 * njac(4,3,i+1);
			
 
				+               lhsc(4,4,i,j,k,c) =  tmp2 * fjac(4,4,i+1)
			
 
				+                    - tmp1 * njac(4,4,i+1)
			
 
				+                    - tmp1 * dx4;
			
 
				+               lhsc(4,5,i,j,k,c) =  tmp2 * fjac(4,5,i+1)
			
 
				+                    - tmp1 * njac(4,5,i+1);
			
 
				+
			
 
				+               lhsc(5,1,i,j,k,c) =  tmp2 * fjac(5,1,i+1)
			
 
				+                    - tmp1 * njac(5,1,i+1);
			
 
				+               lhsc(5,2,i,j,k,c) =  tmp2 * fjac(5,2,i+1)
			
 
				+                    - tmp1 * njac(5,2,i+1);
			
 
				+               lhsc(5,3,i,j,k,c) =  tmp2 * fjac(5,3,i+1)
			
 
				+                    - tmp1 * njac(5,3,i+1);
			
 
				+               lhsc(5,4,i,j,k,c) =  tmp2 * fjac(5,4,i+1)
			
 
				+                    - tmp1 * njac(5,4,i+1);
			
 
				+               lhsc(5,5,i,j,k,c) =  tmp2 * fjac(5,5,i+1)
			
 
				+                    - tmp1 * njac(5,5,i+1)
			
 
				+                    - tmp1 * dx5;
			
 
				+
			
 
				+            }
			
 
				+
			
 
				+
			
 
				+//---------------------------------------------------------------------
			
 
				+//     outer most do loops - sweeping in i direction
			
 
				+//---------------------------------------------------------------------
			
 
				+            if (first == 1) {
			
 
				+
			
 
				+//---------------------------------------------------------------------
			
 
				+//     multiply c(istart,j,k) by b_inverse and copy back to c
			
 
				+//     multiply rhs(istart) by b_inverse(istart) and copy to rhs
			
 
				+//---------------------------------------------------------------------
			
 
				+               binvcrhs( &lhsb(1,1,istart),
			
 
				+                              &lhsc(1,1,istart,j,k,c),
			
 
				+                              &rhs(1,istart,j,k,c) );
			
 
				+
			
 
				+            }
			
 
				+
			
 
				+//---------------------------------------------------------------------
			
 
				+//     begin inner most do loop
			
 
				+//     do all the elements of the cell unless last 
			
 
				+//---------------------------------------------------------------------
			
 
				+            for (i = istart+first; i <= isize-last; i++) {
			
 
				+
			
 
				+//---------------------------------------------------------------------
			
 
				+//     rhs(i) = rhs(i) - A*rhs(i-1)
			
 
				+//---------------------------------------------------------------------
			
 
				+               matvec_sub(&lhsa(1,1,i),
			
 
				+                               &rhs(1,i-1,j,k,c),&rhs(1,i,j,k,c));
			
 
				+
			
 
				+//---------------------------------------------------------------------
			
 
				+//     B(i) = B(i) - C(i-1)*A(i)
			
 
				+//---------------------------------------------------------------------
			
 
				+               matmul_sub(&lhsa(1,1,i),
			
 
				+                               &lhsc(1,1,i-1,j,k,c),
			
 
				+                               &lhsb(1,1,i));
			
 
				+
			
 
				+
			
 
				+//---------------------------------------------------------------------
			
 
				+//     multiply c(i,j,k) by b_inverse and copy back to c
			
 
				+//     multiply rhs(1,j,k) by b_inverse(1,j,k) and copy to rhs
			
 
				+//---------------------------------------------------------------------
			
 
				+               binvcrhs( &lhsb(1,1,i),
			
 
				+                              &lhsc(1,1,i,j,k,c),
			
 
				+                              &rhs(1,i,j,k,c) );
			
 
				+
			
 
				+            }
			
 
				+
			
 
				+//---------------------------------------------------------------------
			
 
				+//     Now finish up special cases for last cell
			
 
				+//---------------------------------------------------------------------
			
 
				+            if (last == 1) {
			
 
				+
			
 
				+//---------------------------------------------------------------------
			
 
				+//     rhs(isize) = rhs(isize) - A*rhs(isize-1)
			
 
				+//---------------------------------------------------------------------
			
 
				+               matvec_sub(&lhsa(1,1,isize),
			
 
				+                               &rhs(1,isize-1,j,k,c),&rhs(1,isize,j,k,c));
			
 
				+
			
 
				+//---------------------------------------------------------------------
			
 
				+//     B(isize) = B(isize) - C(isize-1)*A(isize)
			
 
				+//---------------------------------------------------------------------
			
 
				+               matmul_sub(&lhsa(1,1,isize),
			
 
				+                               &lhsc(1,1,isize-1,j,k,c),
			
 
				+                               &lhsb(1,1,isize));
			
 
				+
			
 
				+//---------------------------------------------------------------------
			
 
				+//     multiply rhs() by b_inverse() and copy to rhs
			
 
				+//---------------------------------------------------------------------
			
 
				+               binvrhs( &lhsb(1,1,isize),
			
 
				+                             &rhs(1,isize,j,k,c) );
			
 
				+
			
 
				+            }
			
 
				+         }
			
 
				+      }
			
 
				+
			
 
				+
			
 
				+      return;
			
 
				+}
			
 
				+      
			
--- a/RCCE_V2.0/apps/NPB/BT/.svn/text-base/y_solve.c.svn-base
+++ b/RCCE_V2.0/apps/NPB/BT/.svn/text-base/y_solve.c.svn-base
@@ -0,0 +1,646 @@
 
				+//---------------------------------------------------------------------
			
 
				+// 
			
 
				+// Copyright 2010 Intel Corporation
			
 
				+// 
			
 
				+//    Licensed under the Apache License, Version 2.0 (the "License");
			
 
				+//    you may not use this file except in compliance with the License.
			
 
				+//    You may obtain a copy of the License at
			
 
				+// 
			
 
				+//        http://www.apache.org/licenses/LICENSE-2.0
			
 
				+// 
			
 
				+//    Unless required by applicable law or agreed to in writing, software
			
 
				+//    distributed under the License is distributed on an "AS IS" BASIS,
			
 
				+//    WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
			
 
				+//    See the License for the specific language governing permissions and
			
 
				+//    limitations under the License.
			
 
				+// 
			
 
				+//---------------------------------------------------------------------
			
 
				+#include "header.h"
			
 
				+#include "mpinpb.h"
			
 
				+#include "work_lhs.h"
			
 
				+
			
 
				+extern void y_sendrecv_solve(int c, int cprev);
			
 
				+extern void y_sendrecv_back(int c, int cprev);
			
 
				+extern void y_backsubstitute(int first, int last, int c);
			
 
				+extern void y_solve_cell(int first, int last, int c);
			
 
				+
			
 
				+void y_solve() {
			
 
				+
			
 
				+//---------------------------------------------------------------------
			
 
				+//---------------------------------------------------------------------
			
 
				+
			
 
				+//---------------------------------------------------------------------
			
 
				+//     Performs line solves in Y direction by first factoring
			
 
				+//     the block-tridiagonal matrix into an upper triangular matrix, 
			
 
				+//     and then performing back substitution to solve for the unknow
			
 
				+//     vectors of each line.  
			
 
				+//     
			
 
				+//     Make sure we treat elements zero to cell_size in the direction
			
 
				+//     of the sweep.
			
 
				+//---------------------------------------------------------------------
			
 
				+
			
 
				+      int  c, cprev, stage, first, last, error;
			
 
				+
			
 
				+//---------------------------------------------------------------------
			
 
				+//     in our terminology stage is the number of the cell in the y-direction
			
 
				+//     i.e. stage = 1 means the start of the line stage=ncells means end
			
 
				+//---------------------------------------------------------------------
			
 
				+      for (stage = 1; stage <= ncells; stage++) {
			
 
				+         c = slice(2,stage);
			
 
				+//---------------------------------------------------------------------
			
 
				+//     set last-cell flag
			
 
				+//---------------------------------------------------------------------
			
 
				+         first = (stage == 1);
			
 
				+         last =  (stage == ncells);
			
 
				+
			
 
				+        if (stage >1) {
			
 
				+           cprev = slice(2,stage-1);
			
 
				+           y_sendrecv_solve(c, cprev);
			
 
				+        }
			
 
				+        y_solve_cell(first,last,c);
			
 
				+      }
			
 
				+
			
 
				+//---------------------------------------------------------------------
			
 
				+//     now perform backsubstitution in reverse direction
			
 
				+//---------------------------------------------------------------------
			
 
				+      for (stage = ncells; stage >= 1; stage--) {
			
 
				+         c = slice(2,stage);
			
 
				+         first = (stage == 1);
			
 
				+         last =  (stage == ncells);
			
 
				+
			
 
				+         if (stage <ncells) {
			
 
				+            cprev = slice(2,stage+1);
			
 
				+            y_sendrecv_back(c, cprev);
			
 
				+         }
			
 
				+
			
 
				+         y_backsubstitute(first,last,c);
			
 
				+      }
			
 
				+
			
 
				+      return;
			
 
				+}
			
 
				+      
			
 
				+      
			
 
				+void y_sendrecv_solve(int c, int cprev) {
			
 
				+
			
 
				+//---------------------------------------------------------------------
			
 
				+//     pack up and send C'(jend) and rhs'(jend) for
			
 
				+//     all i and k
			
 
				+//---------------------------------------------------------------------
			
 
				+
			
 
				+      int i,k,m,n,jsize,ptr,jstart;
			
 
				+      int phase;
			
 
				+      int error,buffer_size;
			
 
				+
			
 
				+      jsize = cell_size(2,cprev)-1;
			
 
				+      buffer_size=MAX_CELL_DIM*MAX_CELL_DIM*
			
 
				+           (BLOCK_SIZE*BLOCK_SIZE + BLOCK_SIZE);
			
 
				+
			
 
				+//---------------------------------------------------------------------
			
 
				+//     pack up buffer
			
 
				+//---------------------------------------------------------------------
			
 
				+      ptr = 0;
			
 
				+      for (k = 0; k <= KMAX-1; k++) {
			
 
				+         for (i = 0; i <= IMAX-1; i++) {
			
 
				+            for (m = 1; m <= BLOCK_SIZE; m++) {
			
 
				+               for (n = 1; n <= BLOCK_SIZE; n++) {
			
 
				+                  in_buffer(ptr+n) = lhsc(m,n,i,jsize,k,cprev);
			
 
				+               }
			
 
				+               ptr = ptr+BLOCK_SIZE;
			
 
				+            }
			
 
				+            for (n = 1; n <= BLOCK_SIZE; n++) {
			
 
				+               in_buffer(ptr+n) = rhs(n,i,jsize,k,cprev);
			
 
				+            }
			
 
				+            ptr = ptr+BLOCK_SIZE;
			
 
				+         }
			
 
				+      }
			
 
				+
			
 
				+//---------------------------------------------------------------------
			
 
				+//     send and receive buffer 
			
 
				+//---------------------------------------------------------------------
			
 
				+
			
 
				+      for (phase = 0; phase < 3; phase++) {
			
 
				+
			
 
				+        if (send_color[NORTHDIR]==phase) 
			
 
				+          RCCE_send((char*)in_buffer, buffer_size*sizeof(double), successor(2));
			
 
				+        if (recv_color[NORTHDIR]==phase) 
			
 
				+          RCCE_recv((char*)out_buffer, buffer_size*sizeof(double), predecessor(2));
			
 
				+      }
			
 
				+
			
 
				+//---------------------------------------------------------------------
			
 
				+//     unpack buffer
			
 
				+//---------------------------------------------------------------------
			
 
				+      jstart = 0;
			
 
				+      ptr = 0;
			
 
				+      for (k = 0; k <= KMAX-1; k++) {
			
 
				+         for (i = 0; i <= IMAX-1; i++) {
			
 
				+            for (m = 1; m <= BLOCK_SIZE; m++) {
			
 
				+               for (n = 1; n <= BLOCK_SIZE; n++) {
			
 
				+                  lhsc(m,n,i,jstart-1,k,c) = out_buffer(ptr+n);
			
 
				+               }
			
 
				+               ptr = ptr+BLOCK_SIZE;
			
 
				+            }
			
 
				+            for (n = 1; n <= BLOCK_SIZE; n++) {
			
 
				+               rhs(n,i,jstart-1,k,c) = out_buffer(ptr+n);
			
 
				+            }
			
 
				+            ptr = ptr+BLOCK_SIZE;
			
 
				+         }
			
 
				+      }
			
 
				+
			
 
				+      return;
			
 
				+}
			
 
				+
			
 
				+//---------------------------------------------------------------------
			
 
				+//---------------------------------------------------------------------
			
 
				+
			
 
				+void y_sendrecv_back(int c, int cprev) {
			
 
				+
			
 
				+//---------------------------------------------------------------------
			
 
				+//     pack up and send U(jstart) for all i and k
			
 
				+//---------------------------------------------------------------------
			
 
				+
			
 
				+      int i,k,n,ptr,jstart;
			
 
				+      int phase;
			
 
				+      int error,buffer_size;
			
 
				+
			
 
				+//---------------------------------------------------------------------
			
 
				+//     Send element 0 to previous processor
			
 
				+//---------------------------------------------------------------------
			
 
				+      jstart = 0;
			
 
				+      buffer_size=MAX_CELL_DIM*MAX_CELL_DIM*BLOCK_SIZE;
			
 
				+      ptr = 0;
			
 
				+      for (k = 0; k <= KMAX-1; k++) {
			
 
				+         for (i = 0; i <= IMAX-1; i++) {
			
 
				+            for (n = 1; n <= BLOCK_SIZE; n++) {
			
 
				+               in_buffer(ptr+n) = rhs(n,i,jstart,k,cprev);
			
 
				+            }
			
 
				+            ptr = ptr+BLOCK_SIZE;
			
 
				+         }
			
 
				+      }
			
 
				+
			
 
				+//---------------------------------------------------------------------
			
 
				+//     send and receive buffer 
			
 
				+//---------------------------------------------------------------------
			
 
				+
			
 
				+      for (phase = 0; phase < 3; phase++) {
			
 
				+
			
 
				+        if (send_color[SOUTHDIR]==phase) 
			
 
				+          RCCE_send((char*)in_buffer, buffer_size*sizeof(double), predecessor(2));
			
 
				+        if (recv_color[SOUTHDIR]==phase) 
			
 
				+          RCCE_recv((char*)out_buffer, buffer_size*sizeof(double), successor(2));
			
 
				+      }
			
 
				+
			
 
				+//---------------------------------------------------------------------
			
 
				+//     unpack U(jsize) for all i and k
			
 
				+//---------------------------------------------------------------------
			
 
				+
			
 
				+      ptr = 0;
			
 
				+      for (k = 0; k <= KMAX-1; k++) {
			
 
				+         for (i = 0; i <= IMAX-1; i++) {
			
 
				+            for (n = 1; n <= BLOCK_SIZE; n++) {
			
 
				+               backsub_info(n,i,k,c) = out_buffer(ptr+n);
			
 
				+            }
			
 
				+            ptr = ptr+BLOCK_SIZE;
			
 
				+         }
			
 
				+      }
			
 
				+
			
 
				+      return;
			
 
				+}
			
 
				+
			
 
				+void y_backsubstitute(int first, int last, int c) {
			
 
				+
			
 
				+//---------------------------------------------------------------------
			
 
				+//---------------------------------------------------------------------
			
 
				+
			
 
				+//---------------------------------------------------------------------
			
 
				+//     back solve: if last cell, then generate U(jsize)=rhs(jsize)
			
 
				+//     else assume U(jsize) is loaded in un pack backsub_info
			
 
				+//     so just use it
			
 
				+//     after call u(jstart) will be sent to next cell
			
 
				+//---------------------------------------------------------------------
			
 
				+
			
 
				+      int i, k;
			
 
				+      int m,n,j,jsize,isize,ksize,jstart;
			
 
				+      
			
 
				+      jstart = 0;
			
 
				+      isize = cell_size(1,c)-end(1,c)-1      ;
			
 
				+      jsize = cell_size(2,c)-1;
			
 
				+      ksize = cell_size(3,c)-end(3,c)-1;
			
 
				+      if (last == 0) {
			
 
				+         for (k = start(3,c); k <= ksize; k++) {
			
 
				+            for (i = start(1,c); i <= isize; i++) {
			
 
				+//---------------------------------------------------------------------
			
 
				+//     U(jsize) uses info from previous cell if not last cell
			
 
				+//---------------------------------------------------------------------
			
 
				+               for (m = 1; m <= BLOCK_SIZE; m++) {
			
 
				+                  for (n = 1; n <= BLOCK_SIZE; n++) {
			
 
				+                     rhs(m,i,jsize,k,c) = rhs(m,i,jsize,k,c) 
			
 
				+                          - lhsc(m,n,i,jsize,k,c)*
			
 
				+                          backsub_info(n,i,k,c);
			
 
				+                  }
			
 
				+               }
			
 
				+            }
			
 
				+         }
			
 
				+      }
			
 
				+      for (k = start(3,c); k <= ksize; k++) {
			
 
				+         for (j = jsize-1; j >= jstart; j--) {
			
 
				+            for (i = start(1,c); i <= isize; i++) {
			
 
				+               for (m = 1; m <= BLOCK_SIZE; m++) {
			
 
				+                  for (n = 1; n <= BLOCK_SIZE; n++) {
			
 
				+                     rhs(m,i,j,k,c) = rhs(m,i,j,k,c) 
			
 
				+                          - lhsc(m,n,i,j,k,c)*rhs(n,i,j+1,k,c);
			
 
				+                  }
			
 
				+               }
			
 
				+            }
			
 
				+         }
			
 
				+      }
			
 
				+
			
 
				+      return;
			
 
				+}
			
 
				+
			
 
				+//---------------------------------------------------------------------
			
 
				+//---------------------------------------------------------------------
			
 
				+
			
 
				+void y_solve_cell(int first,int last,int c) {
			
 
				+
			
 
				+//---------------------------------------------------------------------
			
 
				+//---------------------------------------------------------------------
			
 
				+
			
 
				+//---------------------------------------------------------------------
			
 
				+//     performs guaussian elimination on this cell.
			
 
				+//     
			
 
				+//     assumes that unpacking routines for non-first cells 
			
 
				+//     preload C' and rhs' from previous cell.
			
 
				+//     
			
 
				+//     assumed send happens outside this routine, but that
			
 
				+//     c'(JMAX) and rhs'(JMAX) will be sent to next cell
			
 
				+//---------------------------------------------------------------------
			
 
				+
			
 
				+      int i,j,k,isize,ksize,jsize,jstart;
			
 
				+      double utmp[6*(JMAX+4)];
			
 
				+#define utmp(m,i) utmp[(m-1)+6*(i+2)]
			
 
				+
			
 
				+      jstart = 0;
			
 
				+      isize = cell_size(1,c)-end(1,c)-1;
			
 
				+      jsize = cell_size(2,c)-1;
			
 
				+      ksize = cell_size(3,c)-end(3,c)-1;
			
 
				+
			
 
				+      lhsabinit(lhsa, lhsb, jsize);
			
 
				+
			
 
				+      for (k = start(3,c); k <= ksize; k++) {
			
 
				+         for (i = start(1,c); i <= isize; i++) {
			
 
				+
			
 
				+//---------------------------------------------------------------------
			
 
				+//     This function computes the left hand side for the three y-factors 
			
 
				+//---------------------------------------------------------------------
			
 
				+
			
 
				+//---------------------------------------------------------------------
			
 
				+//     Compute the indices for storing the tri-diagonal matrix;
			
 
				+//     determine a (labeled f) and n jacobians for cell c
			
 
				+//---------------------------------------------------------------------
			
 
				+            for (j = start(2,c)-1; j <= cell_size(2,c)-end(2,c); j++) {
			
 
				+               utmp(1,j) = 1.0e0 / u(1,i,j,k,c);
			
 
				+               utmp(2,j) = u(2,i,j,k,c);
			
 
				+               utmp(3,j) = u(3,i,j,k,c);
			
 
				+               utmp(4,j) = u(4,i,j,k,c);
			
 
				+               utmp(5,j) = u(5,i,j,k,c);
			
 
				+               utmp(6,j) = qs(i,j,k,c);
			
 
				+            }
			
 
				+
			
 
				+            for (j = start(2,c)-1; j <= cell_size(2,c)-end(2,c); j++) {
			
 
				+
			
 
				+               tmp1 = utmp(1,j);
			
 
				+               tmp2 = tmp1 * tmp1;
			
 
				+               tmp3 = tmp1 * tmp2;
			
 
				+
			
 
				+               fjac(1,1,j) = 0.0e+00;
			
 
				+               fjac(1,2,j) = 0.0e+00;
			
 
				+               fjac(1,3,j) = 1.0e+00;
			
 
				+               fjac(1,4,j) = 0.0e+00;
			
 
				+               fjac(1,5,j) = 0.0e+00;
			
 
				+
			
 
				+               fjac(2,1,j) = - ( utmp(2,j)*utmp(3,j) )
			
 
				+                    * tmp2;
			
 
				+               fjac(2,2,j) = utmp(3,j) * tmp1;
			
 
				+               fjac(2,3,j) = utmp(2,j) * tmp1;
			
 
				+               fjac(2,4,j) = 0.0e+00;
			
 
				+               fjac(2,5,j) = 0.0e+00;
			
 
				+
			
 
				+               fjac(3,1,j) = - ( utmp(3,j)*utmp(3,j)*tmp2)
			
 
				+                    + c2 * utmp(6,j);
			
 
				+               fjac(3,2,j) = - c2 *  utmp(2,j) * tmp1;
			
 
				+               fjac(3,3,j) = ( 2.0e+00 - c2 )
			
 
				+                    *  utmp(3,j) * tmp1 ;
			
 
				+               fjac(3,4,j) = - c2 * utmp(4,j) * tmp1 ;
			
 
				+               fjac(3,5,j) = c2;
			
 
				+
			
 
				+               fjac(4,1,j) = - ( utmp(3,j)*utmp(4,j) )
			
 
				+                    * tmp2;
			
 
				+               fjac(4,2,j) = 0.0e+00;
			
 
				+               fjac(4,3,j) = utmp(4,j) * tmp1;
			
 
				+               fjac(4,4,j) = utmp(3,j) * tmp1;
			
 
				+               fjac(4,5,j) = 0.0e+00;
			
 
				+
			
 
				+               fjac(5,1,j) = ( c2 * 2.0e0 * utmp(6,j)
			
 
				+                    - c1 * utmp(5,j) * tmp1 ) 
			
 
				+                    * utmp(3,j) * tmp1 ;
			
 
				+               fjac(5,2,j) = - c2 * utmp(2,j)*utmp(3,j) 
			
 
				+                    * tmp2;
			
 
				+               fjac(5,3,j) = c1 * utmp(5,j) * tmp1 
			
 
				+                    - c2 * ( utmp(6,j)
			
 
				+                    + utmp(3,j)*utmp(3,j) * tmp2 );
			
 
				+               fjac(5,4,j) = - c2 * ( utmp(3,j)*utmp(4,j) )
			
 
				+                    * tmp2;
			
 
				+               fjac(5,5,j) = c1 * utmp(3,j) * tmp1 ;
			
 
				+
			
 
				+               njac(1,1,j) = 0.0e+00;
			
 
				+               njac(1,2,j) = 0.0e+00;
			
 
				+               njac(1,3,j) = 0.0e+00;
			
 
				+               njac(1,4,j) = 0.0e+00;
			
 
				+               njac(1,5,j) = 0.0e+00;
			
 
				+
			
 
				+               njac(2,1,j) = - c3c4 * tmp2 * utmp(2,j);
			
 
				+               njac(2,2,j) =   c3c4 * tmp1;
			
 
				+               njac(2,3,j) =   0.0e+00;
			
 
				+               njac(2,4,j) =   0.0e+00;
			
 
				+               njac(2,5,j) =   0.0e+00;
			
 
				+
			
 
				+               njac(3,1,j) = - con43 * c3c4 * tmp2 * utmp(3,j);
			
 
				+               njac(3,2,j) =   0.0e+00;
			
 
				+               njac(3,3,j) =   con43 * c3c4 * tmp1;
			
 
				+               njac(3,4,j) =   0.0e+00;
			
 
				+               njac(3,5,j) =   0.0e+00;
			
 
				+
			
 
				+               njac(4,1,j) = - c3c4 * tmp2 * utmp(4,j);
			
 
				+               njac(4,2,j) =   0.0e+00;
			
 
				+               njac(4,3,j) =   0.0e+00;
			
 
				+               njac(4,4,j) =   c3c4 * tmp1;
			
 
				+               njac(4,5,j) =   0.0e+00;
			
 
				+
			
 
				+               njac(5,1,j) = - (  c3c4
			
 
				+                    - c1345 ) * tmp3 * SQR(utmp(2,j))
			
 
				+                    - ( con43 * c3c4
			
 
				+                    - c1345 ) * tmp3 * SQR(utmp(3,j))
			
 
				+                    - ( c3c4 - c1345 ) * tmp3 * SQR(utmp(4,j))
			
 
				+                    - c1345 * tmp2 * utmp(5,j);
			
 
				+
			
 
				+               njac(5,2,j) = (  c3c4 - c1345 ) * tmp2 * utmp(2,j);
			
 
				+               njac(5,3,j) = ( con43 * c3c4
			
 
				+                    - c1345 ) * tmp2 * utmp(3,j);
			
 
				+               njac(5,4,j) = ( c3c4 - c1345 ) * tmp2 * utmp(4,j);
			
 
				+               njac(5,5,j) = ( c1345 ) * tmp1;
			
 
				+
			
 
				+            }
			
 
				+
			
 
				+//---------------------------------------------------------------------
			
 
				+//     now joacobians set, so form left hand side in y direction
			
 
				+//---------------------------------------------------------------------
			
 
				+            for (j = start(2,c); j <= jsize-end(2,c); j++) {
			
 
				+
			
 
				+               tmp1 = dt * ty1;
			
 
				+               tmp2 = dt * ty2;
			
 
				+
			
 
				+               lhsa(1,1,j) = - tmp2 * fjac(1,1,j-1)
			
 
				+                    - tmp1 * njac(1,1,j-1)
			
 
				+                    - tmp1 * dy1 ;
			
 
				+               lhsa(1,2,j) = - tmp2 * fjac(1,2,j-1)
			
 
				+                    - tmp1 * njac(1,2,j-1);
			
 
				+               lhsa(1,3,j) = - tmp2 * fjac(1,3,j-1)
			
 
				+                    - tmp1 * njac(1,3,j-1);
			
 
				+               lhsa(1,4,j) = - tmp2 * fjac(1,4,j-1)
			
 
				+                    - tmp1 * njac(1,4,j-1);
			
 
				+               lhsa(1,5,j) = - tmp2 * fjac(1,5,j-1)
			
 
				+                    - tmp1 * njac(1,5,j-1);
			
 
				+
			
 
				+               lhsa(2,1,j) = - tmp2 * fjac(2,1,j-1)
			
 
				+                    - tmp1 * njac(2,1,j-1);
			
 
				+               lhsa(2,2,j) = - tmp2 * fjac(2,2,j-1)
			
 
				+                    - tmp1 * njac(2,2,j-1)
			
 
				+                    - tmp1 * dy2;
			
 
				+               lhsa(2,3,j) = - tmp2 * fjac(2,3,j-1)
			
 
				+                    - tmp1 * njac(2,3,j-1);
			
 
				+               lhsa(2,4,j) = - tmp2 * fjac(2,4,j-1)
			
 
				+                    - tmp1 * njac(2,4,j-1);
			
 
				+               lhsa(2,5,j) = - tmp2 * fjac(2,5,j-1)
			
 
				+                    - tmp1 * njac(2,5,j-1);
			
 
				+
			
 
				+               lhsa(3,1,j) = - tmp2 * fjac(3,1,j-1)
			
 
				+                    - tmp1 * njac(3,1,j-1);
			
 
				+               lhsa(3,2,j) = - tmp2 * fjac(3,2,j-1)
			
 
				+                    - tmp1 * njac(3,2,j-1);
			
 
				+               lhsa(3,3,j) = - tmp2 * fjac(3,3,j-1)
			
 
				+                    - tmp1 * njac(3,3,j-1)
			
 
				+                    - tmp1 * dy3 ;
			
 
				+               lhsa(3,4,j) = - tmp2 * fjac(3,4,j-1)
			
 
				+                    - tmp1 * njac(3,4,j-1);
			
 
				+               lhsa(3,5,j) = - tmp2 * fjac(3,5,j-1)
			
 
				+                    - tmp1 * njac(3,5,j-1);
			
 
				+
			
 
				+               lhsa(4,1,j) = - tmp2 * fjac(4,1,j-1)
			
 
				+                    - tmp1 * njac(4,1,j-1);
			
 
				+               lhsa(4,2,j) = - tmp2 * fjac(4,2,j-1)
			
 
				+                    - tmp1 * njac(4,2,j-1);
			
 
				+               lhsa(4,3,j) = - tmp2 * fjac(4,3,j-1)
			
 
				+                    - tmp1 * njac(4,3,j-1);
			
 
				+               lhsa(4,4,j) = - tmp2 * fjac(4,4,j-1)
			
 
				+                    - tmp1 * njac(4,4,j-1)
			
 
				+                    - tmp1 * dy4;
			
 
				+               lhsa(4,5,j) = - tmp2 * fjac(4,5,j-1)
			
 
				+                    - tmp1 * njac(4,5,j-1);
			
 
				+
			
 
				+               lhsa(5,1,j) = - tmp2 * fjac(5,1,j-1)
			
 
				+                    - tmp1 * njac(5,1,j-1);
			
 
				+               lhsa(5,2,j) = - tmp2 * fjac(5,2,j-1)
			
 
				+                    - tmp1 * njac(5,2,j-1);
			
 
				+               lhsa(5,3,j) = - tmp2 * fjac(5,3,j-1)
			
 
				+                    - tmp1 * njac(5,3,j-1);
			
 
				+               lhsa(5,4,j) = - tmp2 * fjac(5,4,j-1)
			
 
				+                    - tmp1 * njac(5,4,j-1);
			
 
				+               lhsa(5,5,j) = - tmp2 * fjac(5,5,j-1)
			
 
				+                    - tmp1 * njac(5,5,j-1)
			
 
				+                    - tmp1 * dy5;
			
 
				+
			
 
				+               lhsb(1,1,j) = 1.0e+00
			
 
				+                    + tmp1 * 2.0e+00 * njac(1,1,j)
			
 
				+                    + tmp1 * 2.0e+00 * dy1;
			
 
				+               lhsb(1,2,j) = tmp1 * 2.0e+00 * njac(1,2,j);
			
 
				+               lhsb(1,3,j) = tmp1 * 2.0e+00 * njac(1,3,j);
			
 
				+               lhsb(1,4,j) = tmp1 * 2.0e+00 * njac(1,4,j);
			
 
				+               lhsb(1,5,j) = tmp1 * 2.0e+00 * njac(1,5,j);
			
 
				+
			
 
				+               lhsb(2,1,j) = tmp1 * 2.0e+00 * njac(2,1,j);
			
 
				+               lhsb(2,2,j) = 1.0e+00
			
 
				+                    + tmp1 * 2.0e+00 * njac(2,2,j)
			
 
				+                    + tmp1 * 2.0e+00 * dy2;
			
 
				+               lhsb(2,3,j) = tmp1 * 2.0e+00 * njac(2,3,j);
			
 
				+               lhsb(2,4,j) = tmp1 * 2.0e+00 * njac(2,4,j);
			
 
				+               lhsb(2,5,j) = tmp1 * 2.0e+00 * njac(2,5,j);
			
 
				+
			
 
				+               lhsb(3,1,j) = tmp1 * 2.0e+00 * njac(3,1,j);
			
 
				+               lhsb(3,2,j) = tmp1 * 2.0e+00 * njac(3,2,j);
			
 
				+               lhsb(3,3,j) = 1.0e+00
			
 
				+                    + tmp1 * 2.0e+00 * njac(3,3,j)
			
 
				+                    + tmp1 * 2.0e+00 * dy3;
			
 
				+               lhsb(3,4,j) = tmp1 * 2.0e+00 * njac(3,4,j);
			
 
				+               lhsb(3,5,j) = tmp1 * 2.0e+00 * njac(3,5,j);
			
 
				+
			
 
				+               lhsb(4,1,j) = tmp1 * 2.0e+00 * njac(4,1,j);
			
 
				+               lhsb(4,2,j) = tmp1 * 2.0e+00 * njac(4,2,j);
			
 
				+               lhsb(4,3,j) = tmp1 * 2.0e+00 * njac(4,3,j);
			
 
				+               lhsb(4,4,j) = 1.0e+00
			
 
				+                    + tmp1 * 2.0e+00 * njac(4,4,j)
			
 
				+                    + tmp1 * 2.0e+00 * dy4;
			
 
				+               lhsb(4,5,j) = tmp1 * 2.0e+00 * njac(4,5,j);
			
 
				+
			
 
				+               lhsb(5,1,j) = tmp1 * 2.0e+00 * njac(5,1,j);
			
 
				+               lhsb(5,2,j) = tmp1 * 2.0e+00 * njac(5,2,j);
			
 
				+               lhsb(5,3,j) = tmp1 * 2.0e+00 * njac(5,3,j);
			
 
				+               lhsb(5,4,j) = tmp1 * 2.0e+00 * njac(5,4,j);
			
 
				+               lhsb(5,5,j) = 1.0e+00
			
 
				+                    + tmp1 * 2.0e+00 * njac(5,5,j) 
			
 
				+                    + tmp1 * 2.0e+00 * dy5;
			
 
				+
			
 
				+               lhsc(1,1,i,j,k,c) =  tmp2 * fjac(1,1,j+1)
			
 
				+                    - tmp1 * njac(1,1,j+1)
			
 
				+                    - tmp1 * dy1;
			
 
				+               lhsc(1,2,i,j,k,c) =  tmp2 * fjac(1,2,j+1)
			
 
				+                    - tmp1 * njac(1,2,j+1);
			
 
				+               lhsc(1,3,i,j,k,c) =  tmp2 * fjac(1,3,j+1)
			
 
				+                    - tmp1 * njac(1,3,j+1);
			
 
				+               lhsc(1,4,i,j,k,c) =  tmp2 * fjac(1,4,j+1)
			
 
				+                    - tmp1 * njac(1,4,j+1);
			
 
				+               lhsc(1,5,i,j,k,c) =  tmp2 * fjac(1,5,j+1)
			
 
				+                    - tmp1 * njac(1,5,j+1);
			
 
				+
			
 
				+               lhsc(2,1,i,j,k,c) =  tmp2 * fjac(2,1,j+1)
			
 
				+                    - tmp1 * njac(2,1,j+1);
			
 
				+               lhsc(2,2,i,j,k,c) =  tmp2 * fjac(2,2,j+1)
			
 
				+                    - tmp1 * njac(2,2,j+1)
			
 
				+                    - tmp1 * dy2;
			
 
				+               lhsc(2,3,i,j,k,c) =  tmp2 * fjac(2,3,j+1)
			
 
				+                    - tmp1 * njac(2,3,j+1);
			
 
				+               lhsc(2,4,i,j,k,c) =  tmp2 * fjac(2,4,j+1)
			
 
				+                    - tmp1 * njac(2,4,j+1);
			
 
				+               lhsc(2,5,i,j,k,c) =  tmp2 * fjac(2,5,j+1)
			
 
				+                    - tmp1 * njac(2,5,j+1);
			
 
				+
			
 
				+               lhsc(3,1,i,j,k,c) =  tmp2 * fjac(3,1,j+1)
			
 
				+                    - tmp1 * njac(3,1,j+1);
			
 
				+               lhsc(3,2,i,j,k,c) =  tmp2 * fjac(3,2,j+1)
			
 
				+                    - tmp1 * njac(3,2,j+1);
			
 
				+               lhsc(3,3,i,j,k,c) =  tmp2 * fjac(3,3,j+1)
			
 
				+                    - tmp1 * njac(3,3,j+1)
			
 
				+                    - tmp1 * dy3;
			
 
				+               lhsc(3,4,i,j,k,c) =  tmp2 * fjac(3,4,j+1)
			
 
				+                    - tmp1 * njac(3,4,j+1);
			
 
				+               lhsc(3,5,i,j,k,c) =  tmp2 * fjac(3,5,j+1)
			
 
				+                    - tmp1 * njac(3,5,j+1);
			
 
				+
			
 
				+               lhsc(4,1,i,j,k,c) =  tmp2 * fjac(4,1,j+1)
			
 
				+                    - tmp1 * njac(4,1,j+1);
			
 
				+               lhsc(4,2,i,j,k,c) =  tmp2 * fjac(4,2,j+1)
			
 
				+                    - tmp1 * njac(4,2,j+1);
			
 
				+               lhsc(4,3,i,j,k,c) =  tmp2 * fjac(4,3,j+1)
			
 
				+                    - tmp1 * njac(4,3,j+1);
			
 
				+               lhsc(4,4,i,j,k,c) =  tmp2 * fjac(4,4,j+1)
			
 
				+                    - tmp1 * njac(4,4,j+1)
			
 
				+                    - tmp1 * dy4;
			
 
				+               lhsc(4,5,i,j,k,c) =  tmp2 * fjac(4,5,j+1)
			
 
				+                    - tmp1 * njac(4,5,j+1);
			
 
				+
			
 
				+               lhsc(5,1,i,j,k,c) =  tmp2 * fjac(5,1,j+1)
			
 
				+                    - tmp1 * njac(5,1,j+1);
			
 
				+               lhsc(5,2,i,j,k,c) =  tmp2 * fjac(5,2,j+1)
			
 
				+                    - tmp1 * njac(5,2,j+1);
			
 
				+               lhsc(5,3,i,j,k,c) =  tmp2 * fjac(5,3,j+1)
			
 
				+                    - tmp1 * njac(5,3,j+1);
			
 
				+               lhsc(5,4,i,j,k,c) =  tmp2 * fjac(5,4,j+1)
			
 
				+                    - tmp1 * njac(5,4,j+1);
			
 
				+               lhsc(5,5,i,j,k,c) =  tmp2 * fjac(5,5,j+1)
			
 
				+                    - tmp1 * njac(5,5,j+1)
			
 
				+                    - tmp1 * dy5;
			
 
				+
			
 
				+            }
			
 
				+
			
 
				+
			
 
				+//---------------------------------------------------------------------
			
 
				+//     outer most do loops - sweeping in i direction
			
 
				+//---------------------------------------------------------------------
			
 
				+            if (first == 1) {
			
 
				+
			
 
				+//---------------------------------------------------------------------
			
 
				+//     multiply c(i,jstart,k) by b_inverse and copy back to c
			
 
				+//     multiply rhs(jstart) by b_inverse(jstart) and copy to rhs
			
 
				+//---------------------------------------------------------------------
			
 
				+               binvcrhs( &lhsb(1,1,jstart),
			
 
				+                              &lhsc(1,1,i,jstart,k,c),
			
 
				+                              &rhs(1,i,jstart,k,c) );
			
 
				+
			
 
				+            }
			
 
				+
			
 
				+//---------------------------------------------------------------------
			
 
				+//     begin inner most do loop
			
 
				+//     do all the elements of the cell unless last 
			
 
				+//---------------------------------------------------------------------
			
 
				+            for (j = jstart+first; j <= jsize-last; j++) {
			
 
				+
			
 
				+//---------------------------------------------------------------------
			
 
				+//     subtract A*lhs_vector(j-1) from lhs_vector(j)
			
 
				+//     
			
 
				+//     rhs(j) = rhs(j) - A*rhs(j-1)
			
 
				+//---------------------------------------------------------------------
			
 
				+               matvec_sub(&lhsa(1,1,j),
			
 
				+                               &rhs(1,i,j-1,k,c),&rhs(1,i,j,k,c));
			
 
				+
			
 
				+//---------------------------------------------------------------------
			
 
				+//     B(j) = B(j) - C(j-1)*A(j)
			
 
				+//---------------------------------------------------------------------
			
 
				+               matmul_sub(&lhsa(1,1,j),
			
 
				+                               &lhsc(1,1,i,j-1,k,c),
			
 
				+                               &lhsb(1,1,j));
			
 
				+
			
 
				+//---------------------------------------------------------------------
			
 
				+//     multiply c(i,j,k) by b_inverse and copy back to c
			
 
				+//     multiply rhs(i,1,k) by b_inverse(i,1,k) and copy to rhs
			
 
				+//---------------------------------------------------------------------
			
 
				+               binvcrhs( &lhsb(1,1,j),
			
 
				+                              &lhsc(1,1,i,j,k,c),
			
 
				+                              &rhs(1,i,j,k,c) );
			
 
				+
			
 
				+            }
			
 
				+
			
 
				+//---------------------------------------------------------------------
			
 
				+//     Now finish up special cases for last cell
			
 
				+//---------------------------------------------------------------------
			
 
				+            if (last == 1) {
			
 
				+
			
 
				+//---------------------------------------------------------------------
			
 
				+//     rhs(jsize) = rhs(jsize) - A*rhs(jsize-1)
			
 
				+//---------------------------------------------------------------------
			
 
				+               matvec_sub(&lhsa(1,1,jsize),
			
 
				+                               &rhs(1,i,jsize-1,k,c),&rhs(1,i,jsize,k,c));
			
 
				+
			
 
				+//---------------------------------------------------------------------
			
 
				+//     B(jsize) = B(jsize) - C(jsize-1)*A(jsize)
			
 
				+//     call matmul_sub(aa,i,jsize,k,c,
			
 
				+//     $              cc,i,jsize-1,k,c,bb,i,jsize,k,c)
			
 
				+//---------------------------------------------------------------------
			
 
				+               matmul_sub(&lhsa(1,1,jsize),
			
 
				+                               &lhsc(1,1,i,jsize-1,k,c),
			
 
				+                               &lhsb(1,1,jsize));
			
 
				+
			
 
				+//---------------------------------------------------------------------
			
 
				+//     multiply rhs(jsize) by b_inverse(jsize) and copy to rhs
			
 
				+//---------------------------------------------------------------------
			
 
				+               binvrhs( &lhsb(1,1,jsize),
			
 
				+                             &rhs(1,i,jsize,k,c) );
			
 
				+
			
 
				+            }
			
 
				+         }
			
 
				+      }
			
 
				+
			
 
				+
			
 
				+      return;
			
 
				+}
			
 
				+      
			
 
				+
			
 
				+
			
--- a/RCCE_V2.0/apps/NPB/BT/.svn/text-base/z_solve.c.svn-base
+++ b/RCCE_V2.0/apps/NPB/BT/.svn/text-base/z_solve.c.svn-base
@@ -0,0 +1,689 @@
 
				+//---------------------------------------------------------------------
			
 
				+// 
			
 
				+// Copyright 2010 Intel Corporation
			
 
				+// 
			
 
				+//    Licensed under the Apache License, Version 2.0 (the "License");
			
 
				+//    you may not use this file except in compliance with the License.
			
 
				+//    You may obtain a copy of the License at
			
 
				+// 
			
 
				+//        http://www.apache.org/licenses/LICENSE-2.0
			
 
				+// 
			
 
				+//    Unless required by applicable law or agreed to in writing, software
			
 
				+//    distributed under the License is distributed on an "AS IS" BASIS,
			
 
				+//    WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
			
 
				+//    See the License for the specific language governing permissions and
			
 
				+//    limitations under the License.
			
 
				+// 
			
 
				+//---------------------------------------------------------------------
			
 
				+#include "header.h"
			
 
				+#include "mpinpb.h"
			
 
				+#include "work_lhs.h"
			
 
				+
			
 
				+extern void z_sendrecv_solve(int c, int cprev);
			
 
				+extern void z_sendrecv_back(int c, int cprev);
			
 
				+extern void z_backsubstitute(int first, int last, int c);
			
 
				+extern void z_solve_cell(int first, int last, int c);
			
 
				+
			
 
				+void z_solve() {
			
 
				+
			
 
				+//---------------------------------------------------------------------
			
 
				+//---------------------------------------------------------------------
			
 
				+
			
 
				+//---------------------------------------------------------------------
			
 
				+//     Performs line solves in Z direction by first factoring
			
 
				+//     the block-tridiagonal matrix into an upper triangular matrix, 
			
 
				+//     and then performing back substitution to solve for the unknow
			
 
				+//     vectors of each line.  
			
 
				+//     
			
 
				+//     Make sure we treat elements zero to cell_size in the direction
			
 
				+//     of the sweep.
			
 
				+//---------------------------------------------------------------------
			
 
				+
			
 
				+      int  c, cprev, stage, first, last, error;
			
 
				+
			
 
				+//---------------------------------------------------------------------
			
 
				+//     in our terminology stage is the number of the cell in the y-direction
			
 
				+//     i.e. stage = 1 means the start of the line stage=ncells means end
			
 
				+//---------------------------------------------------------------------
			
 
				+      for (stage = 1; stage <= ncells; stage++) {
			
 
				+         c = slice(3,stage);
			
 
				+//---------------------------------------------------------------------
			
 
				+//     set last-cell flag
			
 
				+//---------------------------------------------------------------------
			
 
				+         first = (stage == 1);
			
 
				+         last =  (stage == ncells);
			
 
				+
			
 
				+        if (stage >1) {
			
 
				+           cprev = slice(3,stage-1);
			
 
				+           z_sendrecv_solve(c, cprev);
			
 
				+        }
			
 
				+        z_solve_cell(first,last,c);
			
 
				+      }
			
 
				+
			
 
				+//---------------------------------------------------------------------
			
 
				+//     now perform backsubstitution in reverse direction
			
 
				+//---------------------------------------------------------------------
			
 
				+      for (stage = ncells; stage >= 1; stage--) {
			
 
				+         c = slice(3,stage);
			
 
				+         first = (stage == 1);
			
 
				+         last =  (stage == ncells);
			
 
				+
			
 
				+         if (stage <ncells) {
			
 
				+            cprev = slice(3,stage+1);
			
 
				+            z_sendrecv_back(c, cprev);
			
 
				+         }
			
 
				+
			
 
				+         z_backsubstitute(first,last,c);
			
 
				+      }
			
 
				+
			
 
				+      return;
			
 
				+}
			
 
				+      
			
 
				+//---------------------------------------------------------------------
			
 
				+//---------------------------------------------------------------------
			
 
				+      
			
 
				+void z_unpack_solve_info(int c) {
			
 
				+//---------------------------------------------------------------------
			
 
				+//---------------------------------------------------------------------
			
 
				+
			
 
				+//---------------------------------------------------------------------
			
 
				+//     unpack C'(-1) and rhs'(-1) for
			
 
				+//     all i and j
			
 
				+//---------------------------------------------------------------------
			
 
				+
			
 
				+      int i,j,m,n,ptr,kstart ;
			
 
				+
			
 
				+      kstart = 0;
			
 
				+      ptr = 0;
			
 
				+      for (j = 0; j <= JMAX-1; j++) {
			
 
				+         for (i = 0; i <= IMAX-1; i++) {
			
 
				+            for (m = 1; m <= BLOCK_SIZE; m++) {
			
 
				+               for (n = 1; n <= BLOCK_SIZE; n++) {
			
 
				+                  lhsc(m,n,i,j,kstart-1,c) = out_buffer(ptr+n);
			
 
				+               }
			
 
				+               ptr = ptr+BLOCK_SIZE;
			
 
				+            }
			
 
				+            for (n = 1; n <= BLOCK_SIZE; n++) {
			
 
				+               rhs(n,i,j,kstart-1,c) = out_buffer(ptr+n);
			
 
				+            }
			
 
				+            ptr = ptr+BLOCK_SIZE;
			
 
				+         }
			
 
				+      }
			
 
				+
			
 
				+      return;
			
 
				+}
			
 
				+
			
 
				+      
			
 
				+void z_sendrecv_solve(int c, int cprev) {
			
 
				+
			
 
				+//---------------------------------------------------------------------
			
 
				+//     pack up and send C'(kend) and rhs'(kend) for
			
 
				+//     all i and j
			
 
				+//---------------------------------------------------------------------
			
 
				+
			
 
				+      int i,j,m,n,ksize,ptr,kstart;
			
 
				+      int phase;
			
 
				+      int error,buffer_size;
			
 
				+
			
 
				+      ksize = cell_size(3,cprev)-1;
			
 
				+      buffer_size=MAX_CELL_DIM*MAX_CELL_DIM*
			
 
				+           (BLOCK_SIZE*BLOCK_SIZE + BLOCK_SIZE);
			
 
				+
			
 
				+//---------------------------------------------------------------------
			
 
				+//     pack up buffer
			
 
				+//---------------------------------------------------------------------
			
 
				+      ptr = 0;
			
 
				+      for (j = 0; j <= JMAX-1; j++) {
			
 
				+         for (i = 0; i <= IMAX-1; i++) {
			
 
				+            for (m = 1; m <= BLOCK_SIZE; m++) {
			
 
				+               for (n = 1; n <= BLOCK_SIZE; n++) {
			
 
				+                  in_buffer(ptr+n) = lhsc(m,n,i,j,ksize,cprev);
			
 
				+               }
			
 
				+               ptr = ptr+BLOCK_SIZE;
			
 
				+            }
			
 
				+            for (n = 1; n <= BLOCK_SIZE; n++) {
			
 
				+               in_buffer(ptr+n) = rhs(n,i,j,ksize,cprev);
			
 
				+            }
			
 
				+            ptr = ptr+BLOCK_SIZE;
			
 
				+         }
			
 
				+      }
			
 
				+
			
 
				+//---------------------------------------------------------------------
			
 
				+//     send and receive buffer 
			
 
				+//---------------------------------------------------------------------
			
 
				+
			
 
				+      for (phase = 0; phase < 3; phase++) {
			
 
				+
			
 
				+        if (send_color[TOPDIR]==phase) 
			
 
				+          RCCE_send((char*)in_buffer, buffer_size*sizeof(double), successor(3));
			
 
				+        if (recv_color[TOPDIR]==phase) 
			
 
				+          RCCE_recv((char*)out_buffer, buffer_size*sizeof(double), predecessor(3));
			
 
				+      }
			
 
				+
			
 
				+//---------------------------------------------------------------------
			
 
				+//     unpack buffer
			
 
				+//---------------------------------------------------------------------
			
 
				+      kstart = 0;
			
 
				+      ptr = 0;
			
 
				+      for (j = 0; j <= JMAX-1; j++) {
			
 
				+         for (i = 0; i <= IMAX-1; i++) {
			
 
				+            for (m = 1; m <= BLOCK_SIZE; m++) {
			
 
				+               for (n = 1; n <= BLOCK_SIZE; n++) {
			
 
				+                  lhsc(m,n,i,j,kstart-1,c) = out_buffer(ptr+n);
			
 
				+               }
			
 
				+               ptr = ptr+BLOCK_SIZE;
			
 
				+            }
			
 
				+            for (n = 1; n <= BLOCK_SIZE; n++) {
			
 
				+               rhs(n,i,j,kstart-1,c) = out_buffer(ptr+n);
			
 
				+            }
			
 
				+            ptr = ptr+BLOCK_SIZE;
			
 
				+         }
			
 
				+      }
			
 
				+
			
 
				+      return;
			
 
				+}
			
 
				+
			
 
				+//---------------------------------------------------------------------
			
 
				+//---------------------------------------------------------------------
			
 
				+
			
 
				+void z_sendrecv_back(int c, int cprev) {
			
 
				+
			
 
				+//---------------------------------------------------------------------
			
 
				+//     pack up and send U(jstart) for all i and j
			
 
				+//---------------------------------------------------------------------
			
 
				+
			
 
				+      int i,j,n,ptr,kstart;
			
 
				+      int phase;
			
 
				+      int error,buffer_size;
			
 
				+
			
 
				+//---------------------------------------------------------------------
			
 
				+//     Send element 0 to previous processor
			
 
				+//---------------------------------------------------------------------
			
 
				+      kstart = 0;
			
 
				+      buffer_size=MAX_CELL_DIM*MAX_CELL_DIM*BLOCK_SIZE;
			
 
				+      ptr = 0;
			
 
				+      for (j = 0; j <= JMAX-1; j++) {
			
 
				+         for (i = 0; i <= IMAX-1; i++) {
			
 
				+            for (n = 1; n <= BLOCK_SIZE; n++) {
			
 
				+               in_buffer(ptr+n) = rhs(n,i,j,kstart,cprev);
			
 
				+            }
			
 
				+            ptr = ptr+BLOCK_SIZE;
			
 
				+         }
			
 
				+      }
			
 
				+
			
 
				+//---------------------------------------------------------------------
			
 
				+//     send and receive buffer 
			
 
				+//---------------------------------------------------------------------
			
 
				+
			
 
				+      for (phase = 0; phase < 3; phase++) {
			
 
				+
			
 
				+        if (send_color[BOTTOMDIR]==phase) 
			
 
				+          RCCE_send((char*)in_buffer, buffer_size*sizeof(double), predecessor(3));
			
 
				+        if (recv_color[BOTTOMDIR]==phase) 
			
 
				+          RCCE_recv((char*)out_buffer, buffer_size*sizeof(double), successor(3));
			
 
				+      }
			
 
				+
			
 
				+//---------------------------------------------------------------------
			
 
				+//     unpack U(ksize) for all i and j
			
 
				+//---------------------------------------------------------------------
			
 
				+
			
 
				+      ptr = 0;
			
 
				+      for (j = 0; j <= JMAX-1; j++) {
			
 
				+         for (i = 0; i <= IMAX-1; i++) {
			
 
				+            for (n = 1; n <= BLOCK_SIZE; n++) {
			
 
				+               backsub_info(n,i,j,c) = out_buffer(ptr+n);
			
 
				+            }
			
 
				+            ptr = ptr+BLOCK_SIZE;
			
 
				+         }
			
 
				+      }
			
 
				+
			
 
				+      return;
			
 
				+}
			
 
				+
			
 
				+
			
 
				+//---------------------------------------------------------------------
			
 
				+//---------------------------------------------------------------------
			
 
				+
			
 
				+void z_backsubstitute(int first, int last, int c) {
			
 
				+
			
 
				+//---------------------------------------------------------------------
			
 
				+//---------------------------------------------------------------------
			
 
				+
			
 
				+//---------------------------------------------------------------------
			
 
				+//     back solve: if last cell, then generate U(ksize)=rhs(ksize)
			
 
				+//     else assume U(ksize) is loaded in un pack backsub_info
			
 
				+//     so just use it
			
 
				+//     after call u(kstart) will be sent to next cell
			
 
				+//---------------------------------------------------------------------
			
 
				+
			
 
				+      int i, k;
			
 
				+      int m,n,j,jsize,isize,ksize,kstart;
			
 
				+      
			
 
				+      kstart = 0;
			
 
				+      isize = cell_size(1,c)-end(1,c)-1      ;
			
 
				+      jsize = cell_size(2,c)-end(2,c)-1;
			
 
				+      ksize = cell_size(3,c)-1;
			
 
				+      if (last == 0) {
			
 
				+         for (j = start(2,c); j <= jsize; j++) {
			
 
				+            for (i = start(1,c); i <= isize; i++) {
			
 
				+//---------------------------------------------------------------------
			
 
				+//     U(jsize) uses info from previous cell if not last cell
			
 
				+//---------------------------------------------------------------------
			
 
				+               for (m = 1; m <= BLOCK_SIZE; m++) {
			
 
				+                  for (n = 1; n <= BLOCK_SIZE; n++) {
			
 
				+                     rhs(m,i,j,ksize,c) = rhs(m,i,j,ksize,c) 
			
 
				+                          - lhsc(m,n,i,j,ksize,c)*
			
 
				+                          backsub_info(n,i,j,c);
			
 
				+                  }
			
 
				+               }
			
 
				+            }
			
 
				+         }
			
 
				+      }
			
 
				+      for (k = ksize-1; k >= kstart; k--) {
			
 
				+         for (j = start(2,c); j <= jsize; j++) {
			
 
				+            for (i = start(1,c); i <= isize; i++) {
			
 
				+               for (m = 1; m <= BLOCK_SIZE; m++) {
			
 
				+                  for (n = 1; n <= BLOCK_SIZE; n++) {
			
 
				+                     rhs(m,i,j,k,c) = rhs(m,i,j,k,c) 
			
 
				+                          - lhsc(m,n,i,j,k,c)*rhs(n,i,j,k+1,c);
			
 
				+                  }
			
 
				+               }
			
 
				+            }
			
 
				+         }
			
 
				+      }
			
 
				+
			
 
				+      return;
			
 
				+}
			
 
				+
			
 
				+//---------------------------------------------------------------------
			
 
				+//---------------------------------------------------------------------
			
 
				+
			
 
				+void z_solve_cell(int first,int last,int c) {
			
 
				+
			
 
				+//---------------------------------------------------------------------
			
 
				+//---------------------------------------------------------------------
			
 
				+
			
 
				+//---------------------------------------------------------------------
			
 
				+//     performs guaussian elimination on this cell.
			
 
				+//     
			
 
				+//     assumes that unpacking routines for non-first cells 
			
 
				+//     preload C' and rhs' from previous cell.
			
 
				+//     
			
 
				+//     assumed send happens outside this routine, but that
			
 
				+//     c'(KMAX) and rhs'(KMAX) will be sent to next cell.
			
 
				+//---------------------------------------------------------------------
			
 
				+
			
 
				+      int i,j,k,isize,ksize,jsize,kstart;
			
 
				+      double utmp[6*(KMAX+4)];
			
 
				+#define utmp(m,i) utmp[(m-1)+6*(i+2)]
			
 
				+
			
 
				+      kstart = 0;
			
 
				+      isize = cell_size(1,c)-end(1,c)-1;
			
 
				+      jsize = cell_size(2,c)-end(2,c)-1;
			
 
				+      ksize = cell_size(3,c)-1;
			
 
				+
			
 
				+      lhsabinit(lhsa, lhsb, ksize);
			
 
				+
			
 
				+      for (j = start(2,c); j <= jsize; j++) {
			
 
				+         for (i = start(1,c); i <= isize; i++) {
			
 
				+
			
 
				+//---------------------------------------------------------------------
			
 
				+//     This function computes the left hand side for the three z-factors 
			
 
				+//---------------------------------------------------------------------
			
 
				+
			
 
				+//---------------------------------------------------------------------
			
 
				+//     Compute the indices for storing the block-diagonal matrix;
			
 
				+//     determine c (labeled f) and s jacobians for cell c
			
 
				+//---------------------------------------------------------------------
			
 
				+            for (k = start(3,c)-1; k <= cell_size(3,c)-end(3,c); k++) {
			
 
				+               utmp(1,k) = 1.0e0 / u(1,i,j,k,c);
			
 
				+               utmp(2,k) = u(2,i,j,k,c);
			
 
				+               utmp(3,k) = u(3,i,j,k,c);
			
 
				+               utmp(4,k) = u(4,i,j,k,c);
			
 
				+               utmp(5,k) = u(5,i,j,k,c);
			
 
				+               utmp(6,k) = qs(i,j,k,c);
			
 
				+            }
			
 
				+
			
 
				+            for (k = start(3,c)-1; k <= cell_size(3,c)-end(3,c); k++) {
			
 
				+
			
 
				+               tmp1 = utmp(1,k);
			
 
				+               tmp2 = tmp1 * tmp1;
			
 
				+               tmp3 = tmp1 * tmp2;
			
 
				+
			
 
				+               fjac(1,1,k) = 0.0e+00;
			
 
				+               fjac(1,2,k) = 0.0e+00;
			
 
				+               fjac(1,3,k) = 0.0e+00;
			
 
				+               fjac(1,4,k) = 1.0e+00;
			
 
				+               fjac(1,5,k) = 0.0e+00;
			
 
				+
			
 
				+               fjac(2,1,k) = - ( utmp(2,k)*utmp(4,k) ) 
			
 
				+                    * tmp2 ;
			
 
				+               fjac(2,2,k) = utmp(4,k) * tmp1;
			
 
				+               fjac(2,3,k) = 0.0e+00;
			
 
				+               fjac(2,4,k) = utmp(2,k) * tmp1;
			
 
				+               fjac(2,5,k) = 0.0e+00;
			
 
				+
			
 
				+               fjac(3,1,k) = - ( utmp(3,k)*utmp(4,k) )
			
 
				+                    * tmp2 ;
			
 
				+               fjac(3,2,k) = 0.0e+00;
			
 
				+               fjac(3,3,k) = utmp(4,k) * tmp1;
			
 
				+               fjac(3,4,k) = utmp(3,k) * tmp1;
			
 
				+               fjac(3,5,k) = 0.0e+00;
			
 
				+
			
 
				+               fjac(4,1,k) = - (utmp(4,k)*utmp(4,k) * tmp2 ) 
			
 
				+                    + c2 * utmp(6,k);
			
 
				+               fjac(4,2,k) = - c2 *  utmp(2,k) * tmp1 ;
			
 
				+               fjac(4,3,k) = - c2 *  utmp(3,k) * tmp1;
			
 
				+               fjac(4,4,k) = ( 2.0e+00 - c2 )
			
 
				+                    *  utmp(4,k) * tmp1 ;
			
 
				+               fjac(4,5,k) = c2;
			
 
				+
			
 
				+               fjac(5,1,k) = ( c2 * 2.0e0 * utmp(6,k)
			
 
				+                    - c1 * ( utmp(5,k) * tmp1 ) )
			
 
				+                    * ( utmp(4,k) * tmp1 );
			
 
				+               fjac(5,2,k) = - c2 * ( utmp(2,k)*utmp(4,k) )
			
 
				+                    * tmp2 ;
			
 
				+               fjac(5,3,k) = - c2 * ( utmp(3,k)*utmp(4,k) )
			
 
				+                    * tmp2;
			
 
				+               fjac(5,4,k) = c1 * ( utmp(5,k) * tmp1 )
			
 
				+                    - c2 * ( utmp(6,k)
			
 
				+                    + utmp(4,k)*utmp(4,k) * tmp2 );
			
 
				+               fjac(5,5,k) = c1 * utmp(4,k) * tmp1;
			
 
				+
			
 
				+               njac(1,1,k) = 0.0e+00;
			
 
				+               njac(1,2,k) = 0.0e+00;
			
 
				+               njac(1,3,k) = 0.0e+00;
			
 
				+               njac(1,4,k) = 0.0e+00;
			
 
				+               njac(1,5,k) = 0.0e+00;
			
 
				+
			
 
				+               njac(2,1,k) = - c3c4 * tmp2 * utmp(2,k);
			
 
				+               njac(2,2,k) =   c3c4 * tmp1;
			
 
				+               njac(2,3,k) =   0.0e+00;
			
 
				+               njac(2,4,k) =   0.0e+00;
			
 
				+               njac(2,5,k) =   0.0e+00;
			
 
				+
			
 
				+               njac(3,1,k) = - c3c4 * tmp2 * utmp(3,k);
			
 
				+               njac(3,2,k) =   0.0e+00;
			
 
				+               njac(3,3,k) =   c3c4 * tmp1;
			
 
				+               njac(3,4,k) =   0.0e+00;
			
 
				+               njac(3,5,k) =   0.0e+00;
			
 
				+
			
 
				+               njac(4,1,k) = - con43 * c3c4 * tmp2 * utmp(4,k);
			
 
				+               njac(4,2,k) =   0.0e+00;
			
 
				+               njac(4,3,k) =   0.0e+00;
			
 
				+               njac(4,4,k) =   con43 * c3 * c4 * tmp1;
			
 
				+               njac(4,5,k) =   0.0e+00;
			
 
				+
			
 
				+               njac(5,1,k) = - (  c3c4
			
 
				+                    - c1345 ) * tmp3 * SQR(utmp(2,k))
			
 
				+                    - ( c3c4 - c1345 ) * tmp3 * SQR(utmp(3,k))
			
 
				+                    - ( con43 * c3c4
			
 
				+                    - c1345 ) * tmp3 * SQR(utmp(4,k))
			
 
				+                    - c1345 * tmp2 * utmp(5,k);
			
 
				+
			
 
				+               njac(5,2,k) = (  c3c4 - c1345 ) * tmp2 * utmp(2,k);
			
 
				+               njac(5,3,k) = (  c3c4 - c1345 ) * tmp2 * utmp(3,k);
			
 
				+               njac(5,4,k) = ( con43 * c3c4
			
 
				+                    - c1345 ) * tmp2 * utmp(4,k);
			
 
				+               njac(5,5,k) = ( c1345 )* tmp1;
			
 
				+
			
 
				+
			
 
				+            }
			
 
				+
			
 
				+//---------------------------------------------------------------------
			
 
				+//     now joacobians set, so form left hand side in z direction
			
 
				+//---------------------------------------------------------------------
			
 
				+            for (k = start(3,c); k <= ksize-end(3,c); k++) {
			
 
				+
			
 
				+               tmp1 = dt * tz1;
			
 
				+               tmp2 = dt * tz2;
			
 
				+
			
 
				+               lhsa(1,1,k) = - tmp2 * fjac(1,1,k-1)
			
 
				+                    - tmp1 * njac(1,1,k-1)
			
 
				+                    - tmp1 * dz1 ;
			
 
				+               lhsa(1,2,k) = - tmp2 * fjac(1,2,k-1)
			
 
				+                    - tmp1 * njac(1,2,k-1);
			
 
				+               lhsa(1,3,k) = - tmp2 * fjac(1,3,k-1)
			
 
				+                    - tmp1 * njac(1,3,k-1);
			
 
				+               lhsa(1,4,k) = - tmp2 * fjac(1,4,k-1)
			
 
				+                    - tmp1 * njac(1,4,k-1);
			
 
				+               lhsa(1,5,k) = - tmp2 * fjac(1,5,k-1)
			
 
				+                    - tmp1 * njac(1,5,k-1);
			
 
				+
			
 
				+               lhsa(2,1,k) = - tmp2 * fjac(2,1,k-1)
			
 
				+                    - tmp1 * njac(2,1,k-1);
			
 
				+               lhsa(2,2,k) = - tmp2 * fjac(2,2,k-1)
			
 
				+                    - tmp1 * njac(2,2,k-1)
			
 
				+                    - tmp1 * dz2;
			
 
				+               lhsa(2,3,k) = - tmp2 * fjac(2,3,k-1)
			
 
				+                    - tmp1 * njac(2,3,k-1);
			
 
				+               lhsa(2,4,k) = - tmp2 * fjac(2,4,k-1)
			
 
				+                    - tmp1 * njac(2,4,k-1);
			
 
				+               lhsa(2,5,k) = - tmp2 * fjac(2,5,k-1)
			
 
				+                    - tmp1 * njac(2,5,k-1);
			
 
				+
			
 
				+               lhsa(3,1,k) = - tmp2 * fjac(3,1,k-1)
			
 
				+                    - tmp1 * njac(3,1,k-1);
			
 
				+               lhsa(3,2,k) = - tmp2 * fjac(3,2,k-1)
			
 
				+                    - tmp1 * njac(3,2,k-1);
			
 
				+               lhsa(3,3,k) = - tmp2 * fjac(3,3,k-1)
			
 
				+                    - tmp1 * njac(3,3,k-1)
			
 
				+                    - tmp1 * dz3 ;
			
 
				+               lhsa(3,4,k) = - tmp2 * fjac(3,4,k-1)
			
 
				+                    - tmp1 * njac(3,4,k-1);
			
 
				+               lhsa(3,5,k) = - tmp2 * fjac(3,5,k-1)
			
 
				+                    - tmp1 * njac(3,5,k-1);
			
 
				+
			
 
				+               lhsa(4,1,k) = - tmp2 * fjac(4,1,k-1)
			
 
				+                    - tmp1 * njac(4,1,k-1);
			
 
				+               lhsa(4,2,k) = - tmp2 * fjac(4,2,k-1)
			
 
				+                    - tmp1 * njac(4,2,k-1);
			
 
				+               lhsa(4,3,k) = - tmp2 * fjac(4,3,k-1)
			
 
				+                    - tmp1 * njac(4,3,k-1);
			
 
				+               lhsa(4,4,k) = - tmp2 * fjac(4,4,k-1)
			
 
				+                    - tmp1 * njac(4,4,k-1)
			
 
				+                    - tmp1 * dz4;
			
 
				+               lhsa(4,5,k) = - tmp2 * fjac(4,5,k-1)
			
 
				+                    - tmp1 * njac(4,5,k-1);
			
 
				+
			
 
				+               lhsa(5,1,k) = - tmp2 * fjac(5,1,k-1)
			
 
				+                    - tmp1 * njac(5,1,k-1);
			
 
				+               lhsa(5,2,k) = - tmp2 * fjac(5,2,k-1)
			
 
				+                    - tmp1 * njac(5,2,k-1);
			
 
				+               lhsa(5,3,k) = - tmp2 * fjac(5,3,k-1)
			
 
				+                    - tmp1 * njac(5,3,k-1);
			
 
				+               lhsa(5,4,k) = - tmp2 * fjac(5,4,k-1)
			
 
				+                    - tmp1 * njac(5,4,k-1);
			
 
				+               lhsa(5,5,k) = - tmp2 * fjac(5,5,k-1)
			
 
				+                    - tmp1 * njac(5,5,k-1)
			
 
				+                    - tmp1 * dz5;
			
 
				+
			
 
				+               lhsb(1,1,k) = 1.0e+00
			
 
				+                    + tmp1 * 2.0e+00 * njac(1,1,k)
			
 
				+                    + tmp1 * 2.0e+00 * dz1;
			
 
				+               lhsb(1,2,k) = tmp1 * 2.0e+00 * njac(1,2,k);
			
 
				+               lhsb(1,3,k) = tmp1 * 2.0e+00 * njac(1,3,k);
			
 
				+               lhsb(1,4,k) = tmp1 * 2.0e+00 * njac(1,4,k);
			
 
				+               lhsb(1,5,k) = tmp1 * 2.0e+00 * njac(1,5,k);
			
 
				+
			
 
				+               lhsb(2,1,k) = tmp1 * 2.0e+00 * njac(2,1,k);
			
 
				+               lhsb(2,2,k) = 1.0e+00
			
 
				+                    + tmp1 * 2.0e+00 * njac(2,2,k)
			
 
				+                    + tmp1 * 2.0e+00 * dz2;
			
 
				+               lhsb(2,3,k) = tmp1 * 2.0e+00 * njac(2,3,k);
			
 
				+               lhsb(2,4,k) = tmp1 * 2.0e+00 * njac(2,4,k);
			
 
				+               lhsb(2,5,k) = tmp1 * 2.0e+00 * njac(2,5,k);
			
 
				+
			
 
				+               lhsb(3,1,k) = tmp1 * 2.0e+00 * njac(3,1,k);
			
 
				+               lhsb(3,2,k) = tmp1 * 2.0e+00 * njac(3,2,k);
			
 
				+               lhsb(3,3,k) = 1.0e+00
			
 
				+                    + tmp1 * 2.0e+00 * njac(3,3,k)
			
 
				+                    + tmp1 * 2.0e+00 * dz3;
			
 
				+               lhsb(3,4,k) = tmp1 * 2.0e+00 * njac(3,4,k);
			
 
				+               lhsb(3,5,k) = tmp1 * 2.0e+00 * njac(3,5,k);
			
 
				+
			
 
				+               lhsb(4,1,k) = tmp1 * 2.0e+00 * njac(4,1,k);
			
 
				+               lhsb(4,2,k) = tmp1 * 2.0e+00 * njac(4,2,k);
			
 
				+               lhsb(4,3,k) = tmp1 * 2.0e+00 * njac(4,3,k);
			
 
				+               lhsb(4,4,k) = 1.0e+00
			
 
				+                    + tmp1 * 2.0e+00 * njac(4,4,k)
			
 
				+                    + tmp1 * 2.0e+00 * dz4;
			
 
				+               lhsb(4,5,k) = tmp1 * 2.0e+00 * njac(4,5,k);
			
 
				+
			
 
				+               lhsb(5,1,k) = tmp1 * 2.0e+00 * njac(5,1,k);
			
 
				+               lhsb(5,2,k) = tmp1 * 2.0e+00 * njac(5,2,k);
			
 
				+               lhsb(5,3,k) = tmp1 * 2.0e+00 * njac(5,3,k);
			
 
				+               lhsb(5,4,k) = tmp1 * 2.0e+00 * njac(5,4,k);
			
 
				+               lhsb(5,5,k) = 1.0e+00
			
 
				+                    + tmp1 * 2.0e+00 * njac(5,5,k) 
			
 
				+                    + tmp1 * 2.0e+00 * dz5;
			
 
				+
			
 
				+               lhsc(1,1,i,j,k,c) =  tmp2 * fjac(1,1,k+1)
			
 
				+                    - tmp1 * njac(1,1,k+1)
			
 
				+                    - tmp1 * dz1;
			
 
				+               lhsc(1,2,i,j,k,c) =  tmp2 * fjac(1,2,k+1)
			
 
				+                    - tmp1 * njac(1,2,k+1);
			
 
				+               lhsc(1,3,i,j,k,c) =  tmp2 * fjac(1,3,k+1)
			
 
				+                    - tmp1 * njac(1,3,k+1);
			
 
				+               lhsc(1,4,i,j,k,c) =  tmp2 * fjac(1,4,k+1)
			
 
				+                    - tmp1 * njac(1,4,k+1);
			
 
				+               lhsc(1,5,i,j,k,c) =  tmp2 * fjac(1,5,k+1)
			
 
				+                    - tmp1 * njac(1,5,k+1);
			
 
				+
			
 
				+               lhsc(2,1,i,j,k,c) =  tmp2 * fjac(2,1,k+1)
			
 
				+                    - tmp1 * njac(2,1,k+1);
			
 
				+               lhsc(2,2,i,j,k,c) =  tmp2 * fjac(2,2,k+1)
			
 
				+                    - tmp1 * njac(2,2,k+1)
			
 
				+                    - tmp1 * dz2;
			
 
				+               lhsc(2,3,i,j,k,c) =  tmp2 * fjac(2,3,k+1)
			
 
				+                    - tmp1 * njac(2,3,k+1);
			
 
				+               lhsc(2,4,i,j,k,c) =  tmp2 * fjac(2,4,k+1)
			
 
				+                    - tmp1 * njac(2,4,k+1);
			
 
				+               lhsc(2,5,i,j,k,c) =  tmp2 * fjac(2,5,k+1)
			
 
				+                    - tmp1 * njac(2,5,k+1);
			
 
				+
			
 
				+               lhsc(3,1,i,j,k,c) =  tmp2 * fjac(3,1,k+1)
			
 
				+                    - tmp1 * njac(3,1,k+1);
			
 
				+               lhsc(3,2,i,j,k,c) =  tmp2 * fjac(3,2,k+1)
			
 
				+                    - tmp1 * njac(3,2,k+1);
			
 
				+               lhsc(3,3,i,j,k,c) =  tmp2 * fjac(3,3,k+1)
			
 
				+                    - tmp1 * njac(3,3,k+1)
			
 
				+                    - tmp1 * dz3;
			
 
				+               lhsc(3,4,i,j,k,c) =  tmp2 * fjac(3,4,k+1)
			
 
				+                    - tmp1 * njac(3,4,k+1);
			
 
				+               lhsc(3,5,i,j,k,c) =  tmp2 * fjac(3,5,k+1)
			
 
				+                    - tmp1 * njac(3,5,k+1);
			
 
				+
			
 
				+               lhsc(4,1,i,j,k,c) =  tmp2 * fjac(4,1,k+1)
			
 
				+                    - tmp1 * njac(4,1,k+1);
			
 
				+               lhsc(4,2,i,j,k,c) =  tmp2 * fjac(4,2,k+1)
			
 
				+                    - tmp1 * njac(4,2,k+1);
			
 
				+               lhsc(4,3,i,j,k,c) =  tmp2 * fjac(4,3,k+1)
			
 
				+                    - tmp1 * njac(4,3,k+1);
			
 
				+               lhsc(4,4,i,j,k,c) =  tmp2 * fjac(4,4,k+1)
			
 
				+                    - tmp1 * njac(4,4,k+1)
			
 
				+                    - tmp1 * dz4;
			
 
				+               lhsc(4,5,i,j,k,c) =  tmp2 * fjac(4,5,k+1)
			
 
				+                    - tmp1 * njac(4,5,k+1);
			
 
				+
			
 
				+               lhsc(5,1,i,j,k,c) =  tmp2 * fjac(5,1,k+1)
			
 
				+                    - tmp1 * njac(5,1,k+1);
			
 
				+               lhsc(5,2,i,j,k,c) =  tmp2 * fjac(5,2,k+1)
			
 
				+                    - tmp1 * njac(5,2,k+1);
			
 
				+               lhsc(5,3,i,j,k,c) =  tmp2 * fjac(5,3,k+1)
			
 
				+                    - tmp1 * njac(5,3,k+1);
			
 
				+               lhsc(5,4,i,j,k,c) =  tmp2 * fjac(5,4,k+1)
			
 
				+                    - tmp1 * njac(5,4,k+1);
			
 
				+               lhsc(5,5,i,j,k,c) =  tmp2 * fjac(5,5,k+1)
			
 
				+                    - tmp1 * njac(5,5,k+1)
			
 
				+                    - tmp1 * dz5;
			
 
				+
			
 
				+            }
			
 
				+
			
 
				+
			
 
				+//---------------------------------------------------------------------
			
 
				+//     outer most do loops - sweeping in i direction
			
 
				+//---------------------------------------------------------------------
			
 
				+            if (first == 1) {
			
 
				+
			
 
				+//---------------------------------------------------------------------
			
 
				+//     multiply c(i,j,kstart) by b_inverse and copy back to c
			
 
				+//     multiply rhs(kstart) by b_inverse(kstart) and copy to rhs
			
 
				+//---------------------------------------------------------------------
			
 
				+               binvcrhs( &lhsb(1,1,kstart),
			
 
				+                              &lhsc(1,1,i,j,kstart,c),
			
 
				+                              &rhs(1,i,j,kstart,c) );
			
 
				+
			
 
				+            }
			
 
				+
			
 
				+//---------------------------------------------------------------------
			
 
				+//     begin inner most do loop
			
 
				+//     do all the elements of the cell unless last 
			
 
				+//---------------------------------------------------------------------
			
 
				+            for (k = kstart+first; k <= ksize-last; k++) {
			
 
				+
			
 
				+//---------------------------------------------------------------------
			
 
				+//     subtract A*lhs_vector(k-1) from lhs_vector(k)
			
 
				+//     
			
 
				+//     rhs(k) = rhs(k) - A*rhs(k-1)
			
 
				+//---------------------------------------------------------------------
			
 
				+               matvec_sub(&lhsa(1,1,k),
			
 
				+                               &rhs(1,i,j,k-1,c),&rhs(1,i,j,k,c));
			
 
				+
			
 
				+//---------------------------------------------------------------------
			
 
				+//     B(k) = B(k) - C(k-1)*A(k)
			
 
				+//     call matmul_sub(aa,i,j,k,c,cc,i,j,k-1,c,bb,i,j,k,c)
			
 
				+//---------------------------------------------------------------------
			
 
				+               matmul_sub(&lhsa(1,1,k),
			
 
				+                               &lhsc(1,1,i,j,k-1,c),
			
 
				+                               &lhsb(1,1,k));
			
 
				+
			
 
				+//---------------------------------------------------------------------
			
 
				+//     multiply c(i,j,k) by b_inverse and copy back to c
			
 
				+//     multiply rhs(i,j,1) by b_inverse(i,j,1) and copy to rhs
			
 
				+//---------------------------------------------------------------------
			
 
				+               binvcrhs( &lhsb(1,1,k),
			
 
				+                              &lhsc(1,1,i,j,k,c),
			
 
				+                              &rhs(1,i,j,k,c) );
			
 
				+
			
 
				+            }
			
 
				+
			
 
				+//---------------------------------------------------------------------
			
 
				+//     Now finish up special cases for last cell
			
 
				+//---------------------------------------------------------------------
			
 
				+            if (last == 1) {
			
 
				+
			
 
				+//---------------------------------------------------------------------
			
 
				+//     rhs(ksize) = rhs(ksize) - A*rhs(ksize-1)
			
 
				+//---------------------------------------------------------------------
			
 
				+               matvec_sub(&lhsa(1,1,ksize),
			
 
				+                               &rhs(1,i,j,ksize-1,c),&rhs(1,i,j,ksize,c));
			
 
				+
			
 
				+//---------------------------------------------------------------------
			
 
				+//     B(ksize) = B(ksize) - C(ksize-1)*A(ksize)
			
 
				+//     call matmul_sub(aa,i,j,ksize,c,
			
 
				+//     $              cc,i,j,ksize-1,c,bb,i,j,ksize,c)
			
 
				+//---------------------------------------------------------------------
			
 
				+               matmul_sub(&lhsa(1,1,ksize),
			
 
				+                               &lhsc(1,1,i,j,ksize-1,c),
			
 
				+                               &lhsb(1,1,ksize));
			
 
				+
			
 
				+//---------------------------------------------------------------------
			
 
				+//     multiply rhs(ksize) by b_inverse(ksize) and copy to rhs
			
 
				+//---------------------------------------------------------------------
			
 
				+               binvrhs( &lhsb(1,1,ksize),
			
 
				+                             &rhs(1,i,j,ksize,c) );
			
 
				+
			
 
				+            }
			
 
				+         }
			
 
				+      }
			
 
				+
			
 
				+
			
 
				+      return;
			
 
				+}
			
 
				+      
			
 
				+
			
 
				+
			
 
				+
			
 
				+
			
 
				+
			
--- a/RCCE_V2.0/apps/NPB/BT/Makefile
+++ b/RCCE_V2.0/apps/NPB/BT/Makefile
@@ -0,0 +1,65 @@
 
				+SHELL=/bin/sh
			
 
				+BENCHMARK=bt
			
 
				+BENCHMARKU=BT
			
 
				+
			
 
				+PROGRAM  = $(BENCHMARK).$(CLASS).$(NPROCS)
			
 
				+
			
 
				+default:: ${PROGRAM}
			
 
				+
			
 
				+# This makes sure the configuration utility setparams 
			
 
				+# is up to date. 
			
 
				+# Note that this must be run every time, which is why the
			
 
				+# target does not exist and is not created. 
			
 
				+# If you create a file called "config" you will break things. 
			
 
				+config:
			
 
				+	cd ../sys; ${MAKE} all
			
 
				+	../sys/setparams ${BENCHMARK} ${NPROCS} ${CLASS}
			
 
				+
			
 
				+# Normally setparams updates npbparams.h only if the settings (CLASS/NPROCS)
			
 
				+# have changed. However, we also want to update if the compile options
			
 
				+# may have changed (set in ../config/make.def). 
			
 
				+npbparams.h: ../config/make.def
			
 
				+	@ echo make.def modified. Rebuilding npbparams.h just in case
			
 
				+	rm -f npbparams.h
			
 
				+	../sys/setparams ${BENCHMARK} ${NPROCS} ${CLASS}
			
 
				+
			
 
				+# So that "make benchmark-name" works
			
 
				+${BENCHMARK}:  default
			
 
				+${BENCHMARKU}: default
			
 
				+
			
 
				+bt.o:             bt.c  header.h npbparams.h  mpinpb.h
			
 
				+make_set.o:       make_set.c  header.h npbparams.h  mpinpb.h
			
 
				+initialize.o:     initialize.c  header.h npbparams.h
			
 
				+exact_solution.o: exact_solution.c  header.h npbparams.h
			
 
				+exact_rhs.o:      exact_rhs.c  header.h npbparams.h
			
 
				+set_constants.o:  set_constants.c  header.h npbparams.h
			
 
				+adi.o:            adi.c  header.h npbparams.h
			
 
				+define.o:         define.c  header.h npbparams.h
			
 
				+copy_faces.o:     copy_faces.c  header.h npbparams.h  mpinpb.h
			
 
				+rhs.o:            rhs.c  header.h npbparams.h
			
 
				+x_solve.o:        x_solve.c  header.h work_lhs.h npbparams.h  mpinpb.h
			
 
				+y_solve.o:        y_solve.c  header.h work_lhs.h npbparams.h  mpinpb.h
			
 
				+z_solve.o:        z_solve.c  header.h work_lhs.h npbparams.h  mpinpb.h
			
 
				+solve_subs.o:     solve_subs.c  npbparams.h
			
 
				+add.o:            add.c  header.h npbparams.h
			
 
				+error.o:          error.c  header.h npbparams.h  mpinpb.h
			
 
				+verify.o:         verify.c  header.h npbparams.h  mpinpb.h
			
 
				+setup_mpi.o:      setup_mpi.c mpinpb.h npbparams.h 
			
 
				+
			
 
				+
			
 
				+OBJS = bt.o make_set.o initialize.o exact_solution.o \
			
 
				+       exact_rhs.o set_constants.o adi.o define.o copy_faces.o  \
			
 
				+       rhs.o x_solve.o y_solve.o z_solve.o add.o solve_subs.o   \
			
 
				+       error.o verify.o setup_mpi.o print_results.o timers.o $(ARCHIVE) 
			
 
				+
			
 
				+$(PROGRAM): ${OBJS} 
			
 
				+	${CCOMPILE} ${CFLAGS} -o ${PROGRAM} ${OBJS} 
			
 
				+# use line below for gcc, which does not link libm by default
			
 
				+#	${CCOMPILE} ${CFLAGS} -o ${PROGRAM} ${OBJS} -lm
			
 
				+
			
 
				+.c.o:
			
 
				+	${CCOMPILE} -c $(CFLAGS)  $<
			
 
				+
			
 
				+clean:
			
 
				+	- rm -f *.o *~ mputil*
			
 
				+	- rm -f  npbparams.h core
			
--- a/RCCE_V2.0/apps/NPB/BT/add.c
+++ b/RCCE_V2.0/apps/NPB/BT/add.c
@@ -0,0 +1,44 @@
 
				+// 
			
 
				+// Copyright 2010 Intel Corporation
			
 
				+// 
			
 
				+//    Licensed under the Apache License, Version 2.0 (the "License");
			
 
				+//    you may not use this file except in compliance with the License.
			
 
				+//    You may obtain a copy of the License at
			
 
				+// 
			
 
				+//        http://www.apache.org/licenses/LICENSE-2.0
			
 
				+// 
			
 
				+//    Unless required by applicable law or agreed to in writing, software
			
 
				+//    distributed under the License is distributed on an "AS IS" BASIS,
			
 
				+//    WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
			
 
				+//    See the License for the specific language governing permissions and
			
 
				+//    limitations under the License.
			
 
				+// 
			
 
				+//---------------------------------------------------------------------
			
 
				+//---------------------------------------------------------------------
			
 
				+#include "header.h"
			
 
				+
			
 
				+void  add() {
			
 
				+
			
 
				+//---------------------------------------------------------------------
			
 
				+//---------------------------------------------------------------------
			
 
				+
			
 
				+//---------------------------------------------------------------------
			
 
				+//     addition of update to the vector u
			
 
				+//---------------------------------------------------------------------
			
 
				+
			
 
				+      int  c, i, j, k, m;
			
 
				+
			
 
				+      for (c = 1; c <= ncells; c++) {
			
 
				+         for (k = start(3,c); k <= cell_size(3,c)-end(3,c)-1; k++) {
			
 
				+            for (j = start(2,c); j <= cell_size(2,c)-end(2,c)-1; j++) {
			
 
				+               for (i = start(1,c); i <= cell_size(1,c)-end(1,c)-1; i++) {
			
 
				+                  for (m = 1; m <= 5; m++) {
			
 
				+                     u(m,i,j,k,c) = u(m,i,j,k,c) + rhs(m,i,j,k,c);
			
 
				+                  }
			
 
				+               }
			
 
				+            }
			
 
				+         }
			
 
				+      }
			
 
				+
			
 
				+      return;
			
 
				+}
			
--- a/RCCE_V2.0/apps/NPB/BT/adi.c
+++ b/RCCE_V2.0/apps/NPB/BT/adi.c
@@ -0,0 +1,34 @@
 
				+// 
			
 
				+// Copyright 2010 Intel Corporation
			
 
				+// 
			
 
				+//    Licensed under the Apache License, Version 2.0 (the "License");
			
 
				+//    you may not use this file except in compliance with the License.
			
 
				+//    You may obtain a copy of the License at
			
 
				+// 
			
 
				+//        http://www.apache.org/licenses/LICENSE-2.0
			
 
				+// 
			
 
				+//    Unless required by applicable law or agreed to in writing, software
			
 
				+//    distributed under the License is distributed on an "AS IS" BASIS,
			
 
				+//    WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
			
 
				+//    See the License for the specific language governing permissions and
			
 
				+//    limitations under the License.
			
 
				+// 
			
 
				+//---------------------------------------------------------------------
			
 
				+//---------------------------------------------------------------------
			
 
				+#include "header.h"
			
 
				+#include "RCCE.h"
			
 
				+
			
 
				+void  adi() {
			
 
				+
			
 
				+//---------------------------------------------------------------------
			
 
				+//---------------------------------------------------------------------
			
 
				+
			
 
				+      copy_faces();
			
 
				+      x_solve();
			
 
				+      y_solve();
			
 
				+      z_solve();
			
 
				+      add();
			
 
				+
			
 
				+      return;
			
 
				+}
			
 
				+
			
--- a/RCCE_V2.0/apps/NPB/BT/applu_macros.h
+++ b/RCCE_V2.0/apps/NPB/BT/applu_macros.h
@@ -0,0 +1,8 @@
 
				+/* PAD32byte is used to compute a cacheline padded length of n (input) bytes */
			
 
				+#define  PAD32byte(n) ((n)%32==0 ? (n) : (n) + 32 - (n)%32)
			
 
				+/* PAD32dbl is used to compute a cacheline padded length of n (input) doubles */
			
 
				+#define  PAD32dbl(n)  ((n)%(32/sizeof(double))==0 ? (n) : (n) + (32/sizeof(double)) \
			
 
				+                      - (n)%(32/sizeof(double)))
			
 
				+
			
 
				+#define max(x,y)      ((x)>(y)? (x) : (y))
			
 
				+#define min(x,y)      ((x)<(y)? (x) : (y))
			
--- a/RCCE_V2.0/apps/NPB/BT/applu_protos.h
+++ b/RCCE_V2.0/apps/NPB/BT/applu_protos.h
@@ -0,0 +1,38 @@
 
				+void blts(int);
			
 
				+void buts(int, double *);
			
 
				+void erhs();
			
 
				+void error();
			
 
				+void exact(int, int, int, double *);
			
 
				+void exchange_1(double *, int, int);
			
 
				+void exchange_3(double *, int);
			
 
				+void exchange_4(double *, double *, int, int, int, int);
			
 
				+void exchange_5(double *, int, int);
			
 
				+void exchange_6(double *, int, int);
			
 
				+void RCCE_allreduce_d(double *, double *, int, int);
			
 
				+void init_comm(int *, char ***);
			
 
				+void jacld(int);
			
 
				+void jacu(int);
			
 
				+void l2norm(int, int, int, double *, double *);
			
 
				+void neighbors();
			
 
				+void pintgr();
			
 
				+void print_results(char *, char *, int *,  int *, int *, int *,
			
 
				+                    int *, int *, double *, double *, char *,
			
 
				+                    int *, char *, char *, char *, char *, char *,
			
 
				+                    char *, char *, char *, char *);
			
 
				+void proc_grid();
			
 
				+void bcast_inputs();
			
 
				+void read_input();
			
 
				+void rhs();
			
 
				+void setbv();
			
 
				+void setcoeff();
			
 
				+void setiv();
			
 
				+void ssor(int);
			
 
				+void subdomain();
			
 
				+void timer_clear(int *);
			
 
				+void timer_start(int *);
			
 
				+void timer_stop(int *);
			
 
				+void verify(double *, double *, double *, char *);
			
 
				+int  nodedim();
			
 
				+double timer_read(int *);
			
 
				+double test_rsd();
			
 
				+
			
--- a/RCCE_V2.0/apps/NPB/BT/applu_share.h
+++ b/RCCE_V2.0/apps/NPB/BT/applu_share.h
@@ -0,0 +1,60 @@
 
				+#include "npbparams.h"
			
 
				+#include "applu_protos.h"
			
 
				+#include "RCCE.h"
			
 
				+
			
 
				+extern double u[5*(isiz1+4)*(isiz2+4)*isiz3],
			
 
				+              rsd[5*(isiz1+4)*(isiz2+4)*isiz3],
			
 
				+              frct[5*(isiz1+4)*(isiz2+4)*isiz3],
			
 
				+              flux[5*(isiz1+2)*(isiz2+2)*isiz3];
			
 
				+extern double a[5*5*isiz1*isiz2],
			
 
				+              b[5*5*isiz1*isiz2],
			
 
				+              c[5*5*isiz1*isiz2],
			
 
				+              d[5*5*isiz1*isiz2];
			
 
				+
			
 
				+extern double dt, omega, tolrsd[5], rsdnm[5], errnm[5], frc, ttotal;
			
 
				+extern double tolrsd1_def, tolrsd2_def, tolrsd3_def, tolrsd4_def, tolrsd5_def,
			
 
				+              omega_default;
			
 
				+extern double ce[5*13];
			
 
				+
			
 
				+extern int ndim, id, num, xdim, ydim, row, col;
			
 
				+extern int ii1, ii2, ji1, ji2, ki1, ki2;
			
 
				+extern int itmax, invert; 
			
 
				+extern int ipr, ipr_default, inorm;
			
 
				+extern int north,south,east,west;
			
 
				+extern int nx0, ny0, nz0;
			
 
				+extern int nx, ny, nz;
			
 
				+extern int ist, iend, jst, jend, ipt, jpt;
			
 
				+extern int dp_type;
			
 
				+extern double tx1, ty1, tz1, 
			
 
				+              dx1, dy1, dz1, 
			
 
				+              tx2, ty2, tz2, 
			
 
				+              dx2, dy2, dz2, 
			
 
				+              tx3, ty3, tz3, 
			
 
				+              dx3, dy3, dz3, 
			
 
				+              dx4, dy4, dz4, 
			
 
				+              dx5, dy5, dz5, 
			
 
				+              dssp, c1,  c2,  
			
 
				+              c3,  c4,  c5;
			
 
				+extern double dxi, deta, dzeta;
			
 
				+extern double npmax, maxtime;
			
 
				+extern double *buf1_exch_1;
			
 
				+
			
 
				+#ifdef _OPENMP
			
 
				+#pragma omp threadprivate (nx, ny, nz, nx0, ny0, nz0, \
			
 
				+                     ipt, ist, iend, jpt, jst, jend, \
			
 
				+                     ii1, ii2, ji1, ji2, ki1, ki2, \
			
 
				+                     dxi, deta, dzeta, \
			
 
				+                     tx1, tx2, tx3, ty1, ty2, ty3, tz1, tz2, tz3)
			
 
				+#pragma omp threadprivate (dx1, dx2, dx3, dx4, dx5, \
			
 
				+                     dy1, dy2, dy3, dy4, dy5, \
			
 
				+                     dz1, dz2, dz3, dz4, dz5, \
			
 
				+                     dssp)
			
 
				+#pragma omp threadprivate(u, rsd, frct, flux)
			
 
				+#pragma omp threadprivate(ipr, inorm)
			
 
				+#pragma omp threadprivate(itmax, invert, \
			
 
				+                    dt, omega, tolrsd, rsdnm, errnm, frc, ttotal, \
			
 
				+                    a, b, c, d)
			
 
				+#pragma omp threadprivate(ce)
			
 
				+#pragma omp threadprivate (id, ndim, num, xdim, ydim, row, col, \
			
 
				+                     north,south,east,west, buf1_exch_1, npmax, maxtime)
			
 
				+#endif
			
--- a/RCCE_V2.0/apps/NPB/BT/bt.c
+++ b/RCCE_V2.0/apps/NPB/BT/bt.c
@@ -0,0 +1,216 @@
 
				+//-------------------------------------------------------------------------!
			
 
				+//                                                                         !
			
 
				+//        N  A  S     P A R A L L E L     B E N C H M A R K S  3.3         !
			
 
				+//                                                                         !
			
 
				+//                                   B T                                   !
			
 
				+//                                                                         !
			
 
				+//-------------------------------------------------------------------------!
			
 
				+//                                                                         !
			
 
				+//    This benchmark is part of the NAS Parallel Benchmark 3.3 suite.      !
			
 
				+//    It is described in NAS Technical Reports 95-020 and 02-007.          !
			
 
				+//                                                                         !
			
 
				+//    Permission to use, copy, distribute and modify this software         !
			
 
				+//    for any purpose with or without fee is hereby granted.  We           !
			
 
				+//    request, however, that all derived work reference the NAS            !
			
 
				+//    Parallel Benchmarks 3.3. This software is provided "as is"           !
			
 
				+//    without express or implied warranty.                                 !
			
 
				+//                                                                         !
			
 
				+//    Information on NPB 3.3, including the technical report, the          !
			
 
				+//    original specifications, source code, results and information        !
			
 
				+//    on how to submit new results, is available at:                       !
			
 
				+//                                                                         !
			
 
				+//           http://www.nas.nasa.gov/Software/NPB/                         !
			
 
				+//                                                                         !
			
 
				+//    Send comments or suggestions to  npb@nas.nasa.gov                    !
			
 
				+//                                                                         !
			
 
				+//          NAS Parallel Benchmarks Group                                  !
			
 
				+//          NASA Ames Research Center                                      !
			
 
				+//          Mail Stop: T27A-1                                              !
			
 
				+//          Moffett Field, CA   94035-1000                                 !
			
 
				+//                                                                         !
			
 
				+//          E-mail:  npb@nas.nasa.gov                                      !
			
 
				+//          Fax:     (650) 604-3957                                        !
			
 
				+//                                                                         !
			
 
				+//-------------------------------------------------------------------------!
			
 
				+// 
			
 
				+// Copyright 2010 Intel Corporation
			
 
				+// 
			
 
				+//    Licensed under the Apache License, Version 2.0 (the "License");
			
 
				+//    you may not use this file except in compliance with the License.
			
 
				+//    You may obtain a copy of the License at
			
 
				+// 
			
 
				+//        http://www.apache.org/licenses/LICENSE-2.0
			
 
				+// 
			
 
				+//    Unless required by applicable law or agreed to in writing, software
			
 
				+//    distributed under the License is distributed on an "AS IS" BASIS,
			
 
				+//    WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
			
 
				+//    See the License for the specific language governing permissions and
			
 
				+//    limitations under the License.
			
 
				+// 
			
 
				+//---------------------------------------------------------------------
			
 
				+//
			
 
				+// Authors: R. F. Van der Wijngaart
			
 
				+//          T. Harris
			
 
				+//          M. Yarrow
			
 
				+//
			
 
				+//---------------------------------------------------------------------
			
 
				+#include <stdio.h>
			
 
				+#include <string.h>
			
 
				+#include "RCCE.h"
			
 
				+#include "applu_macros.h"
			
 
				+#define G_MAIN
			
 
				+#include "header.h"
			
 
				+#include "mpinpb.h"
			
 
				+
			
 
				+#define BSIZE 132
			
 
				+void make_color(void);
			
 
				+void print_results(char*, char, int, int, int, int, int, int, double,
			
 
				+                   double, char*, int, char*, char*, char*, char*, 
			
 
				+                   char*, char*, char*, char*);
			
 
				+
			
 
				+//---------------------------------------------------------------------
			
 
				+//      program MPBT;
			
 
				+//---------------------------------------------------------------------
			
 
				+int RCCE_APP(int argc, char **argv) {
			
 
				+
			
 
				+       int N = 1000, nothing;
			
 
				+       int i, niter, step, c, error, fstatus;
			
 
				+       double navg, mflops, mbytes, n3;
			
 
				+       RCCE_COMM aux[N];
			
 
				+
			
 
				+       double t, tmax, tiominv, tpc;
			
 
				+       int verified;
			
 
				+       char class;
			
 
				+       size_t chunk;
			
 
				+
			
 
				+       char cbuf[BSIZE];
			
 
				+
			
 
				+       if (setup_mpi(&argc, &argv)) {
			
 
				+       RCCE_finalize();
			
 
				+       return 0;
			
 
				+       }
			
 
				+
			
 
				+//       RCCE_debug_set(RCCE_DEBUG_ALL);
			
 
				+
			
 
				+//---------------------------------------------------------------------
			
 
				+//      Root node reads input file (if it exists) else takes
			
 
				+//      defaults from parameters
			
 
				+//---------------------------------------------------------------------
			
 
				+       if (node == root) {
			
 
				+          
			
 
				+          printf("\n\n NAS Parallel Benchmarks 3.3 -- BT Benchmark\n");
			
 
				+
			
 
				+       }
			
 
				+          niter = NITER_DEFAULT;
			
 
				+          dt    = dt_default;
			
 
				+          grid_points(1) = PROBLEM_SIZE;
			
 
				+          grid_points(2) = PROBLEM_SIZE;
			
 
				+          grid_points(3) = PROBLEM_SIZE;
			
 
				+
			
 
				+       if (node == root) {
			
 
				+          printf(" Size: %4dx%4dx%4d\n", 
			
 
				+                 grid_points(1), grid_points(2), grid_points(3));
			
 
				+          printf(" Iterations: %4d    dt: %11.7f\n", niter, dt);
			
 
				+          if (no_nodes != total_nodes)
			
 
				+              printf(" Total number of processes: %5d\n", total_nodes);
			
 
				+          if (no_nodes != MAXCELLS*MAXCELLS) 
			
 
				+              printf(" WARNING: compiled for %5d processes\n",
			
 
				+                     MAXCELLS*MAXCELLS);
			
 
				+          printf(" Number of active processes: %5d\n\n", no_nodes);
			
 
				+
			
 
				+       }
			
 
				+
			
 
				+       make_set();
			
 
				+       make_color();
			
 
				+
			
 
				+
			
 
				+       for (c = 1; c <= MAXCELLS; c++) {
			
 
				+          if ( (cell_size(1,c) > IMAX) ||
			
 
				+               (cell_size(2,c) > JMAX) ||
			
 
				+               (cell_size(3,c) > KMAX) ) {
			
 
				+             printf(" %d %d %d %d %d\n", node, c, cell_size(1,c),
			
 
				+                     cell_size(2,c), cell_size(3,c));
			
 
				+             printf(" Problem size too big for compiled array sizes\n");
			
 
				+          }
			
 
				+       }
			
 
				+
			
 
				+       set_constants();
			
 
				+
			
 
				+       initialize();
			
 
				+
			
 
				+       lhsinit();
			
 
				+
			
 
				+       exact_rhs();
			
 
				+
			
 
				+       compute_buffer_size(5);
			
 
				+
			
 
				+//---------------------------------------------------------------------
			
 
				+//      do one time step to touch all code, and reinitialize
			
 
				+//---------------------------------------------------------------------
			
 
				+       adi();
			
 
				+
			
 
				+       initialize();
			
 
				+
			
 
				+       timer_clear(2);
			
 
				+
			
 
				+//---------------------------------------------------------------------
			
 
				+//      Synchronize before placing time stamp
			
 
				+//---------------------------------------------------------------------
			
 
				+       RCCE_barrier(&RCCE_COMM_WORLD);
			
 
				+
			
 
				+       timer_clear(1);
			
 
				+       timer_start(1);
			
 
				+
			
 
				+       for (step = 1; step <= niter; step++) {
			
 
				+
			
 
				+          if (node == root) {
			
 
				+             if ((step%20) == 0 || step == niter ||
			
 
				+                 step == 1) {
			
 
				+		 printf(" Time step %4d\n", step); fflush(stdout);
			
 
				+             }
			
 
				+          }
			
 
				+          adi();
			
 
				+       }
			
 
				+
			
 
				+       timer_stop(1);
			
 
				+       t = timer_read(1);
			
 
				+       
			
 
				+       verify(niter, &class, &verified);
			
 
				+
			
 
				+       RCCE_reduce((char*)(&t), (char*)(&tmax), 1, RCCE_DOUBLE, RCCE_MAX, root, RCCE_COMM_WORLD);
			
 
				+
			
 
				+       if( node == root ) {
			
 
				+          n3 = 1.0e0*grid_points(1)*grid_points(2)*grid_points(3);
			
 
				+          navg = (grid_points(1)+grid_points(2)+grid_points(3))/3.0;
			
 
				+          if( tmax != 0. ) {
			
 
				+             mflops = 1.0e-6*(double)(niter)*
			
 
				+               (3478.8*(double)n3-17655.7*navg*navg+28023.7*navg)
			
 
				+               / tmax;
			
 
				+          } else {
			
 
				+             mflops = 0.0;
			
 
				+          }
			
 
				+
			
 
				+         print_results("BT", class, grid_points[0], 
			
 
				+           grid_points[1], grid_points[2], niter, MAXCELLS*MAXCELLS, 
			
 
				+           total_nodes, tmax, mflops, "          floating point", 
			
 
				+           verified, NPBVERSION,COMPILETIME, CS1, CS2, CS3, CS4, CS5, 
			
 
				+           CS6);
			
 
				+
			
 
				+
			
 
				+//         FILE *perf_file;
			
 
				+//         char name[50] = "/shared/DEMOS/RCCE/NPB_BT/perf."; 
			
 
				+//         char postfix[50]; 
			
 
				+//         sprintf(postfix, "%d", total_nodes); 
			
 
				+//         strcat(name, postfix); 
			
 
				+//         perf_file = fopen(name,"w"); 
			
 
				+//         fprintf(perf_file, "%d", (int)mflops); 
			
 
				+//         fclose(perf_file); 
			
 
				+       }
			
 
				+
			
 
				+
			
 
				+       RCCE_finalize();
			
 
				+
			
 
				+       return 0;
			
 
				+
			
 
				+}
			
 
				+
			
--- a/RCCE_V2.0/apps/NPB/BT/copy_faces.c
+++ b/RCCE_V2.0/apps/NPB/BT/copy_faces.c
@@ -0,0 +1,338 @@
 
				+//---------------------------------------------------------------------
			
 
				+// 
			
 
				+// Copyright 2010 Intel Corporation
			
 
				+// 
			
 
				+//    Licensed under the Apache License, Version 2.0 (the "License");
			
 
				+//    you may not use this file except in compliance with the License.
			
 
				+//    You may obtain a copy of the License at
			
 
				+// 
			
 
				+//        http://www.apache.org/licenses/LICENSE-2.0
			
 
				+// 
			
 
				+//    Unless required by applicable law or agreed to in writing, software
			
 
				+//    distributed under the License is distributed on an "AS IS" BASIS,
			
 
				+//    WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
			
 
				+//    See the License for the specific language governing permissions and
			
 
				+//    limitations under the License.
			
 
				+// 
			
 
				+//---------------------------------------------------------------------
			
 
				+#include "header.h"
			
 
				+#include "mpinpb.h"
			
 
				+
			
 
				+void copy_faces() {
			
 
				+
			
 
				+//---------------------------------------------------------------------
			
 
				+//---------------------------------------------------------------------
			
 
				+
			
 
				+//---------------------------------------------------------------------
			
 
				+//     
			
 
				+// This function copies the face values of a variable defined on a set 
			
 
				+// of cells to the overlap locations of the adjacent sets of cells. 
			
 
				+// Because a set of cells interfaces in each direction with exactly one 
			
 
				+// other set, we only need to fill six different buffers. We could try to
			
 
				+// overlap communication with computation, by computing
			
 
				+// some internal values while communicating boundary values, but this
			
 
				+// adds so much overhead that it's not clearly useful. 
			
 
				+//---------------------------------------------------------------------
			
 
				+
			
 
				+      int i, j, k, c, m, p0, p1, phase,
			
 
				+           p2, p3, p4, p5, b_size[6], ss[6], 
			
 
				+           sr[6], error;
			
 
				+
			
 
				+#define b_size(m) b_size[m]
			
 
				+#define ss(m) ss[m]
			
 
				+#define sr(m) sr[m]
			
 
				+
			
 
				+//---------------------------------------------------------------------
			
 
				+//     exit immediately if there are no faces to be copied           
			
 
				+//---------------------------------------------------------------------
			
 
				+      if (no_nodes == 1) {
			
 
				+         compute_rhs();
			
 
				+         return;
			
 
				+      }
			
 
				+
			
 
				+      ss(0) = start_send_east;
			
 
				+      ss(1) = start_send_west;
			
 
				+      ss(2) = start_send_north;
			
 
				+      ss(3) = start_send_south;
			
 
				+      ss(4) = start_send_top;
			
 
				+      ss(5) = start_send_bottom;
			
 
				+
			
 
				+      sr(0) = start_recv_east;
			
 
				+      sr(1) = start_recv_west;
			
 
				+      sr(2) = start_recv_north;
			
 
				+      sr(3) = start_recv_south;
			
 
				+      sr(4) = start_recv_top;
			
 
				+      sr(5) = start_recv_bottom;
			
 
				+
			
 
				+      b_size(0) = east_size   ;
			
 
				+      b_size(1) = west_size   ;
			
 
				+      b_size(2) = north_size  ;
			
 
				+      b_size(3) = south_size  ;
			
 
				+      b_size(4) = top_size    ;
			
 
				+      b_size(5) = bottom_size ;
			
 
				+
			
 
				+//---------------------------------------------------------------------
			
 
				+//     because the difference stencil for the diagonalized scheme is 
			
 
				+//     orthogonal, we do not have to perform the staged copying of faces,
			
 
				+//     but can send all face information simultaneously to the neighboring
			
 
				+//     cells in all directions          
			
 
				+//---------------------------------------------------------------------
			
 
				+      p0 = 0;
			
 
				+      p1 = 0;
			
 
				+      p2 = 0;
			
 
				+      p3 = 0;
			
 
				+      p4 = 0;
			
 
				+      p5 = 0;
			
 
				+
			
 
				+      for (c = 1; c <= ncells; c++) {
			
 
				+
			
 
				+//---------------------------------------------------------------------
			
 
				+//     fill the buffer to be sent to eastern neighbors (i-dir)
			
 
				+//---------------------------------------------------------------------
			
 
				+         if (cell_coord(1,c) != ncells) {
			
 
				+            for (k = 0; k <= cell_size(3,c)-1; k++) {
			
 
				+               for (j = 0; j <= cell_size(2,c)-1; j++) {
			
 
				+                  for (i = cell_size(1,c)-2; i <= cell_size(1,c)-1; i++) {
			
 
				+                     for (m = 1; m <= 5; m++) {
			
 
				+                        out_buffer(ss(0)+p0) = u(m,i,j,k,c);
			
 
				+                        p0 = p0 + 1;
			
 
				+                     }
			
 
				+                  }
			
 
				+               }
			
 
				+            }
			
 
				+         }
			
 
				+
			
 
				+//---------------------------------------------------------------------
			
 
				+//     fill the buffer to be sent to western neighbors 
			
 
				+//---------------------------------------------------------------------
			
 
				+         if (cell_coord(1,c) != 1) {
			
 
				+            for (k = 0; k <= cell_size(3,c)-1; k++) {
			
 
				+               for (j = 0; j <= cell_size(2,c)-1; j++) {
			
 
				+                  for (i = 0; i <= 1; i++) {
			
 
				+                     for (m = 1; m <= 5; m++) {
			
 
				+                        out_buffer(ss(1)+p1) = u(m,i,j,k,c);
			
 
				+                        p1 = p1 + 1;
			
 
				+                     }
			
 
				+                  }
			
 
				+               }
			
 
				+            }
			
 
				+
			
 
				+         }
			
 
				+
			
 
				+//---------------------------------------------------------------------
			
 
				+//     fill the buffer to be sent to northern neighbors (j_dir)
			
 
				+//---------------------------------------------------------------------
			
 
				+         if (cell_coord(2,c) != ncells) {
			
 
				+            for (k = 0; k <= cell_size(3,c)-1; k++) {
			
 
				+               for (j = cell_size(2,c)-2; j <= cell_size(2,c)-1; j++) {
			
 
				+                  for (i = 0; i <= cell_size(1,c)-1; i++) {
			
 
				+                     for (m = 1; m <= 5; m++) {
			
 
				+                        out_buffer(ss(2)+p2) = u(m,i,j,k,c);
			
 
				+                        p2 = p2 + 1;
			
 
				+                     }
			
 
				+                  }
			
 
				+               }
			
 
				+            }
			
 
				+         }
			
 
				+
			
 
				+//---------------------------------------------------------------------
			
 
				+//     fill the buffer to be sent to southern neighbors 
			
 
				+//---------------------------------------------------------------------
			
 
				+         if (cell_coord(2,c)!= 1) {
			
 
				+            for (k = 0; k <= cell_size(3,c)-1; k++) {
			
 
				+               for (j = 0; j <= 1; j++) {
			
 
				+                  for (i = 0; i <= cell_size(1,c)-1; i++) {
			
 
				+                     for (m = 1; m <= 5; m++) {
			
 
				+                        out_buffer(ss(3)+p3) = u(m,i,j,k,c);
			
 
				+                        p3 = p3 + 1;
			
 
				+                     }
			
 
				+                  }
			
 
				+               }
			
 
				+            }
			
 
				+         }
			
 
				+
			
 
				+//---------------------------------------------------------------------
			
 
				+//     fill the buffer to be sent to top neighbors (k-dir)
			
 
				+//---------------------------------------------------------------------
			
 
				+         if (cell_coord(3,c) != ncells) {
			
 
				+            for (k = cell_size(3,c)-2; k <= cell_size(3,c)-1; k++) {
			
 
				+               for (j = 0; j <= cell_size(2,c)-1; j++) {
			
 
				+                  for (i = 0; i <= cell_size(1,c)-1; i++) {
			
 
				+                     for (m = 1; m <= 5; m++) {
			
 
				+                        out_buffer(ss(4)+p4) = u(m,i,j,k,c);
			
 
				+                        p4 = p4 + 1;
			
 
				+                     }
			
 
				+                  }
			
 
				+               }
			
 
				+            }
			
 
				+         }
			
 
				+
			
 
				+//---------------------------------------------------------------------
			
 
				+//     fill the buffer to be sent to bottom neighbors
			
 
				+//---------------------------------------------------------------------
			
 
				+         if (cell_coord(3,c)!= 1) {
			
 
				+            for (k = 0; k <= 1; k++) {
			
 
				+               for (j = 0; j <= cell_size(2,c)-1; j++) {
			
 
				+                  for (i = 0; i <= cell_size(1,c)-1; i++) {
			
 
				+                     for (m = 1; m <= 5; m++) {
			
 
				+                        out_buffer(ss(5)+p5) = u(m,i,j,k,c);
			
 
				+                        p5 = p5 + 1;
			
 
				+                     }
			
 
				+                  }
			
 
				+               }
			
 
				+            }
			
 
				+         }
			
 
				+
			
 
				+//---------------------------------------------------------------------
			
 
				+//     cell loop
			
 
				+//---------------------------------------------------------------------
			
 
				+      }
			
 
				+
			
 
				+      for (phase = 0; phase < 3; phase++) {
			
 
				+
			
 
				+      if (send_color[WESTDIR]==phase)  {
			
 
				+        RCCE_send((char*)(&out_buffer(ss(1))), b_size(1)*sizeof(double), predecessor(1));
			
 
				+      }
			
 
				+      if (recv_color[WESTDIR]==phase)  {
			
 
				+        RCCE_recv((char*)(&in_buffer(sr(0))),  b_size(0)*sizeof(double), successor(1));
			
 
				+      }
			
 
				+
			
 
				+      if (send_color[EASTDIR]==phase)  {
			
 
				+        RCCE_send((char*)(&out_buffer(ss(0))), b_size(0)*sizeof(double), successor(1));
			
 
				+      }
			
 
				+      if (recv_color[EASTDIR]==phase)  {
			
 
				+        RCCE_recv((char*)(&in_buffer(sr(1))),  b_size(1)*sizeof(double), predecessor(1));
			
 
				+      }
			
 
				+
			
 
				+      if (send_color[SOUTHDIR]==phase)  {
			
 
				+        RCCE_send((char*)(&out_buffer(ss(3))), b_size(3)*sizeof(double), predecessor(2));
			
 
				+      }
			
 
				+      if (recv_color[SOUTHDIR]==phase)  {
			
 
				+        RCCE_recv((char*)(&in_buffer(sr(2))),  b_size(2)*sizeof(double), successor(2));
			
 
				+      }
			
 
				+
			
 
				+      if (send_color[NORTHDIR]==phase)  {
			
 
				+        RCCE_send((char*)(&out_buffer(ss(2))), b_size(2)*sizeof(double),successor(2));
			
 
				+      }
			
 
				+      if (recv_color[NORTHDIR]==phase)  {
			
 
				+        RCCE_recv((char*)(&in_buffer(sr(3))),  b_size(3)*sizeof(double), predecessor(2));
			
 
				+      }
			
 
				+
			
 
				+      if (send_color[BOTTOMDIR]==phase)  {
			
 
				+        RCCE_send((char*)(&out_buffer(ss(5))), b_size(5)*sizeof(double),predecessor(3));
			
 
				+      }
			
 
				+      if (recv_color[BOTTOMDIR]==phase)  {
			
 
				+        RCCE_recv((char*)(&in_buffer(sr(4))),  b_size(4)*sizeof(double), successor(3));
			
 
				+      }
			
 
				+
			
 
				+      if (send_color[TOPDIR]==phase)  {
			
 
				+        RCCE_send((char*)(&out_buffer(ss(4))), b_size(4)*sizeof(double),successor(3));
			
 
				+      }
			
 
				+      if (recv_color[TOPDIR]==phase)  {
			
 
				+        RCCE_recv((char*)(&in_buffer(sr(5))),  b_size(5)*sizeof(double), predecessor(3));
			
 
				+      }
			
 
				+   }      
			
 
				+
			
 
				+//---------------------------------------------------------------------
			
 
				+//     unpack the data that has just been received;             
			
 
				+//---------------------------------------------------------------------
			
 
				+      p0 = 0;
			
 
				+      p1 = 0;
			
 
				+      p2 = 0;
			
 
				+      p3 = 0;
			
 
				+      p4 = 0;
			
 
				+      p5 = 0;
			
 
				+
			
 
				+      for (c = 1; c <= ncells; c++) {
			
 
				+
			
 
				+         if (cell_coord(1,c) != 1) {
			
 
				+            for (k = 0; k <= cell_size(3,c)-1; k++) {
			
 
				+               for (j = 0; j <= cell_size(2,c)-1; j++) {
			
 
				+                  for (i = -2; i <= -1; i++) {
			
 
				+                     for (m = 1; m <= 5; m++) {
			
 
				+                        u(m,i,j,k,c) = in_buffer(sr(1)+p0);
			
 
				+                        p0 = p0 + 1;
			
 
				+                     }
			
 
				+                  }
			
 
				+               }
			
 
				+            }
			
 
				+         }
			
 
				+
			
 
				+         if (cell_coord(1,c) != ncells) {
			
 
				+            for (k = 0; k <= cell_size(3,c)-1; k++) {
			
 
				+               for (j = 0; j <= cell_size(2,c)-1; j++) {
			
 
				+                  for (i = cell_size(1,c); i <= cell_size(1,c)+1; i++) {
			
 
				+                     for (m = 1; m <= 5; m++) {
			
 
				+                        u(m,i,j,k,c) = in_buffer(sr(0)+p1);
			
 
				+                        p1 = p1 + 1;
			
 
				+                     }
			
 
				+                  }
			
 
				+               }
			
 
				+            }
			
 
				+         }
			
 
				+            
			
 
				+         if (cell_coord(2,c) != 1) {
			
 
				+            for (k = 0; k <= cell_size(3,c)-1; k++) {
			
 
				+               for (j = -2; j <= -1; j++) {
			
 
				+                  for (i = 0; i <= cell_size(1,c)-1; i++) {
			
 
				+                     for (m = 1; m <= 5; m++) {
			
 
				+                        u(m,i,j,k,c) = in_buffer(sr(3)+p2);
			
 
				+                        p2 = p2 + 1;
			
 
				+                     }
			
 
				+                  }
			
 
				+               }
			
 
				+            }
			
 
				+
			
 
				+         }
			
 
				+            
			
 
				+         if (cell_coord(2,c) != ncells) {
			
 
				+            for (k = 0; k <= cell_size(3,c)-1; k++) {
			
 
				+               for (j = cell_size(2,c); j <= cell_size(2,c)+1; j++) {
			
 
				+                  for (i = 0; i <= cell_size(1,c)-1; i++) {
			
 
				+                     for (m = 1; m <= 5; m++) {
			
 
				+                        u(m,i,j,k,c) = in_buffer(sr(2)+p3);
			
 
				+                        p3 = p3 + 1;
			
 
				+                     }
			
 
				+                  }
			
 
				+               }
			
 
				+            }
			
 
				+         }
			
 
				+
			
 
				+         if (cell_coord(3,c) != 1) {
			
 
				+            for (k = -2; k <= -1; k++) {
			
 
				+               for (j = 0; j <= cell_size(2,c)-1; j++) {
			
 
				+                  for (i = 0; i <= cell_size(1,c)-1; i++) {
			
 
				+                     for (m = 1; m <= 5; m++) {
			
 
				+                        u(m,i,j,k,c) = in_buffer(sr(5)+p4);
			
 
				+                        p4 = p4 + 1;
			
 
				+                     }
			
 
				+                  }
			
 
				+               }
			
 
				+            }
			
 
				+         }
			
 
				+
			
 
				+         if (cell_coord(3,c) != ncells) {
			
 
				+            for (k = cell_size(3,c); k <= cell_size(3,c)+1; k++) {
			
 
				+               for (j = 0; j <= cell_size(2,c)-1; j++) {
			
 
				+                  for (i = 0; i <= cell_size(1,c)-1; i++) {
			
 
				+                     for (m = 1; m <= 5; m++) {
			
 
				+                        u(m,i,j,k,c) = in_buffer(sr(4)+p5);
			
 
				+                        p5 = p5 + 1;
			
 
				+                     }
			
 
				+                  }
			
 
				+               }
			
 
				+            }
			
 
				+         }
			
 
				+
			
 
				+//---------------------------------------------------------------------
			
 
				+//     cells loop
			
 
				+//---------------------------------------------------------------------
			
 
				+      }
			
 
				+
			
 
				+//---------------------------------------------------------------------
			
 
				+//     do the rest of the rhs that uses the copied face values          
			
 
				+//---------------------------------------------------------------------
			
 
				+      compute_rhs();
			
 
				+
			
 
				+      return;
			
 
				+}
			
--- a/RCCE_V2.0/apps/NPB/BT/define.c
+++ b/RCCE_V2.0/apps/NPB/BT/define.c
@@ -0,0 +1,78 @@
 
				+//---------------------------------------------------------------------
			
 
				+// 
			
 
				+// Copyright 2010 Intel Corporation
			
 
				+// 
			
 
				+//    Licensed under the Apache License, Version 2.0 (the "License");
			
 
				+//    you may not use this file except in compliance with the License.
			
 
				+//    You may obtain a copy of the License at
			
 
				+// 
			
 
				+//        http://www.apache.org/licenses/LICENSE-2.0
			
 
				+// 
			
 
				+//    Unless required by applicable law or agreed to in writing, software
			
 
				+//    distributed under the License is distributed on an "AS IS" BASIS,
			
 
				+//    WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
			
 
				+//    See the License for the specific language governing permissions and
			
 
				+//    limitations under the License.
			
 
				+// 
			
 
				+//---------------------------------------------------------------------
			
 
				+#include "header.h"
			
 
				+
			
 
				+void compute_buffer_size(int dim) {
			
 
				+
			
 
				+//---------------------------------------------------------------------
			
 
				+//---------------------------------------------------------------------
			
 
				+
			
 
				+      int  c, face_size;
			
 
				+
			
 
				+      if (ncells == 1) return;
			
 
				+
			
 
				+//---------------------------------------------------------------------
			
 
				+//     compute the actual sizes of the buffers; note that there is 
			
 
				+//     always one cell face that doesn't need buffer space, because it 
			
 
				+//     is at the boundary of the grid
			
 
				+//---------------------------------------------------------------------
			
 
				+      west_size = 0;
			
 
				+      east_size = 0;
			
 
				+
			
 
				+      for (c = 1; c <= ncells; c++) {
			
 
				+         face_size = cell_size(2,c) * cell_size(3,c) * dim * 2;
			
 
				+         if (cell_coord(1,c)!=1) west_size = west_size + face_size;
			
 
				+         if (cell_coord(1,c)!=ncells) east_size = east_size + 
			
 
				+              face_size ;
			
 
				+      }
			
 
				+
			
 
				+      north_size = 0;
			
 
				+      south_size = 0;
			
 
				+      for (c = 1; c <= ncells; c++) {
			
 
				+         face_size = cell_size(1,c)*cell_size(3,c) * dim * 2;
			
 
				+         if (cell_coord(2,c)!=1) south_size = south_size + face_size;
			
 
				+         if (cell_coord(2,c)!=ncells) north_size = north_size + 
			
 
				+              face_size ;
			
 
				+      }
			
 
				+
			
 
				+      top_size = 0;
			
 
				+      bottom_size = 0;
			
 
				+      for (c = 1; c <= ncells; c++) {
			
 
				+         face_size = cell_size(1,c) * cell_size(2,c) * dim * 2;
			
 
				+         if (cell_coord(3,c)!=1) bottom_size = bottom_size + 
			
 
				+              face_size;
			
 
				+         if (cell_coord(3,c)!=ncells) top_size = top_size +
			
 
				+              face_size     ;
			
 
				+      }
			
 
				+
			
 
				+      start_send_west   = 1;
			
 
				+      start_send_east   = start_send_west   + west_size;
			
 
				+      start_send_south  = start_send_east   + east_size;
			
 
				+      start_send_north  = start_send_south  + south_size;
			
 
				+      start_send_bottom = start_send_north  + north_size;
			
 
				+      start_send_top    = start_send_bottom + bottom_size;
			
 
				+      start_recv_west   = 1;
			
 
				+      start_recv_east   = start_recv_west   + west_size;
			
 
				+      start_recv_south  = start_recv_east   + east_size;
			
 
				+      start_recv_north  = start_recv_south  + south_size;
			
 
				+      start_recv_bottom = start_recv_north  + north_size;
			
 
				+      start_recv_top    = start_recv_bottom + bottom_size;
			
 
				+
			
 
				+      return;
			
 
				+}
			
 
				+
			
--- a/RCCE_V2.0/apps/NPB/BT/error.c
+++ b/RCCE_V2.0/apps/NPB/BT/error.c
@@ -0,0 +1,121 @@
 
				+//---------------------------------------------------------------------
			
 
				+// 
			
 
				+// Copyright 2010 Intel Corporation
			
 
				+// 
			
 
				+//    Licensed under the Apache License, Version 2.0 (the "License");
			
 
				+//    you may not use this file except in compliance with the License.
			
 
				+//    You may obtain a copy of the License at
			
 
				+// 
			
 
				+//        http://www.apache.org/licenses/LICENSE-2.0
			
 
				+// 
			
 
				+//    Unless required by applicable law or agreed to in writing, software
			
 
				+//    distributed under the License is distributed on an "AS IS" BASIS,
			
 
				+//    WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
			
 
				+//    See the License for the specific language governing permissions and
			
 
				+//    limitations under the License.
			
 
				+// 
			
 
				+//---------------------------------------------------------------------
			
 
				+#include <math.h>
			
 
				+#include "header.h"
			
 
				+#include "mpinpb.h"
			
 
				+#include "applu_macros.h"
			
 
				+
			
 
				+#define u_exact(m) u_exact[m-1]
			
 
				+#define rms(m) rms[m-1]
			
 
				+#define rms_work(m) rms_work[m-1]
			
 
				+
			
 
				+void error_norm(double rms[]) {
			
 
				+
			
 
				+//---------------------------------------------------------------------
			
 
				+//---------------------------------------------------------------------
			
 
				+
			
 
				+//---------------------------------------------------------------------
			
 
				+//     this function computes the norm of the difference between the
			
 
				+//     computed solution and the exact solution
			
 
				+//---------------------------------------------------------------------
			
 
				+
			
 
				+      int c, i, j, k, m, ii, jj, kk, d, error;
			
 
				+      double xi, eta, zeta, u_exact[5], rms_work[5],
			
 
				+           add;
			
 
				+
			
 
				+      for (m = 1; m <= 5; m++) {
			
 
				+         rms_work(m) = 0.0e0;
			
 
				+      }
			
 
				+
			
 
				+      for (c = 1; c <= ncells; c++) {
			
 
				+         kk = 0;
			
 
				+         for (k = cell_low(3,c); k <= cell_high(3,c); k++) {
			
 
				+            zeta = (double)(k) * dnzm1;
			
 
				+            jj = 0;
			
 
				+            for (j = cell_low(2,c); j <= cell_high(2,c); j++) {
			
 
				+               eta = (double)(j) * dnym1;
			
 
				+               ii = 0;
			
 
				+               for (i = cell_low(1,c); i <= cell_high(1,c); i++) {
			
 
				+                  xi = (double)(i) * dnxm1;
			
 
				+                  exact_solution(xi, eta, zeta, u_exact);
			
 
				+
			
 
				+                  for (m = 1; m <= 5; m++) {
			
 
				+                     add = u(m,ii,jj,kk,c)-u_exact(m);
			
 
				+                     rms_work(m) = rms_work(m) + add*add;
			
 
				+                  }
			
 
				+                  ii = ii + 1;
			
 
				+               }
			
 
				+               jj = jj + 1;
			
 
				+            }
			
 
				+            kk = kk + 1;
			
 
				+         }
			
 
				+      }
			
 
				+
			
 
				+      RCCE_allreduce((char*)rms_work, (char*)rms, 5, RCCE_DOUBLE, RCCE_SUM, RCCE_COMM_WORLD);
			
 
				+
			
 
				+      for (m = 1; m <= 5; m++) {
			
 
				+         for (d = 1; d <= 3; d++) {
			
 
				+            rms(m) = rms(m) / (double)(grid_points(d)-2);
			
 
				+         }
			
 
				+         rms(m) = sqrt(rms(m));
			
 
				+      }
			
 
				+
			
 
				+      return;
			
 
				+}
			
 
				+
			
 
				+
			
 
				+//---------------------------------------------------------------------
			
 
				+//---------------------------------------------------------------------
			
 
				+
			
 
				+void rhs_norm(double rms[]) {
			
 
				+
			
 
				+//---------------------------------------------------------------------
			
 
				+//---------------------------------------------------------------------
			
 
				+
			
 
				+      int c, i, j, k, d, m, error;
			
 
				+      double rms_work[5], add;
			
 
				+
			
 
				+      for (m = 1; m <= 5; m++) {
			
 
				+         rms_work(m) = 0.0e0;
			
 
				+      }
			
 
				+
			
 
				+      for (c = 1; c <= ncells; c++) {
			
 
				+         for (k = start(3,c); k <= cell_size(3,c)-end(3,c)-1; k++) {
			
 
				+            for (j = start(2,c); j <= cell_size(2,c)-end(2,c)-1; j++) {
			
 
				+               for (i = start(1,c); i <= cell_size(1,c)-end(1,c)-1; i++) {
			
 
				+                  for (m = 1; m <= 5; m++) {
			
 
				+                     add = rhs(m,i,j,k,c);
			
 
				+                     rms_work(m) = rms_work(m) + add*add;
			
 
				+                  }
			
 
				+               }
			
 
				+            }
			
 
				+         }
			
 
				+      }
			
 
				+
			
 
				+      RCCE_allreduce((char*)rms_work, (char*)rms, 5, RCCE_DOUBLE, RCCE_SUM, RCCE_COMM_WORLD);
			
 
				+
			
 
				+      for (m = 1; m <= 5; m++) {
			
 
				+         for (d = 1; d <= 3; d++) {
			
 
				+            rms(m) = rms(m) / (double)(grid_points(d)-2);
			
 
				+         }
			
 
				+         rms(m) = sqrt(rms(m));
			
 
				+      }
			
 
				+
			
 
				+      return;
			
 
				+}
			
 
				+
			
--- a/RCCE_V2.0/apps/NPB/BT/exact_rhs.c
+++ b/RCCE_V2.0/apps/NPB/BT/exact_rhs.c
@@ -0,0 +1,375 @@
 
				+
			
 
				+//---------------------------------------------------------------------
			
 
				+// 
			
 
				+// Copyright 2010 Intel Corporation
			
 
				+// 
			
 
				+//    Licensed under the Apache License, Version 2.0 (the "License");
			
 
				+//    you may not use this file except in compliance with the License.
			
 
				+//    You may obtain a copy of the License at
			
 
				+// 
			
 
				+//        http://www.apache.org/licenses/LICENSE-2.0
			
 
				+// 
			
 
				+//    Unless required by applicable law or agreed to in writing, software
			
 
				+//    distributed under the License is distributed on an "AS IS" BASIS,
			
 
				+//    WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
			
 
				+//    See the License for the specific language governing permissions and
			
 
				+//    limitations under the License.
			
 
				+// 
			
 
				+//---------------------------------------------------------------------
			
 
				+#include "header.h"
			
 
				+
			
 
				+void exact_rhs() {
			
 
				+
			
 
				+//---------------------------------------------------------------------
			
 
				+//---------------------------------------------------------------------
			
 
				+
			
 
				+//---------------------------------------------------------------------
			
 
				+//     compute the right hand side based on exact solution
			
 
				+//---------------------------------------------------------------------
			
 
				+
			
 
				+      double dtemp[5], xi, eta, zeta, dtpp;
			
 
				+      int          c, m, i, j, k, ip1, im1, jp1, 
			
 
				+           jm1, km1, kp1;
			
 
				+#define dtemp(m) dtemp[m-1]
			
 
				+
			
 
				+
			
 
				+//---------------------------------------------------------------------
			
 
				+//     loop over all cells owned by this node                   
			
 
				+//---------------------------------------------------------------------
			
 
				+      for (c = 1; c <= ncells; c++) {
			
 
				+
			
 
				+//---------------------------------------------------------------------
			
 
				+//     initialize                                  
			
 
				+//---------------------------------------------------------------------
			
 
				+         for (k = 0; k <= cell_size(3,c)-1; k++) {
			
 
				+            for (j = 0; j <= cell_size(2,c)-1; j++) {
			
 
				+               for (i = 0; i <= cell_size(1,c)-1; i++) {
			
 
				+                  for (m = 1; m <= 5; m++) {
			
 
				+                     forcing(m,i,j,k,c) = 0.0e0;
			
 
				+                  }
			
 
				+               }
			
 
				+            }
			
 
				+         }
			
 
				+
			
 
				+//---------------------------------------------------------------------
			
 
				+//     xi-direction flux differences                      
			
 
				+//---------------------------------------------------------------------
			
 
				+         for (k = start(3,c); k <= cell_size(3,c)-end(3,c)-1; k++) {
			
 
				+            zeta = (double)(k+cell_low(3,c)) * dnzm1;
			
 
				+            for (j = start(2,c); j <= cell_size(2,c)-end(2,c)-1; j++) {
			
 
				+               eta = (double)(j+cell_low(2,c)) * dnym1;
			
 
				+
			
 
				+               for (i = -2*(1-start(1,c)); i <= cell_size(1,c)+1-2*end(1,c); i++) {
			
 
				+                  xi = (double)(i+cell_low(1,c)) * dnxm1;
			
 
				+
			
 
				+                  exact_solution(xi, eta, zeta, dtemp);
			
 
				+                  for (m = 1; m <= 5; m++) {
			
 
				+                     ue(i,m) = dtemp(m);
			
 
				+                  }
			
 
				+
			
 
				+                  dtpp = 1.0e0 / dtemp(1);
			
 
				+
			
 
				+                  for (m = 2; m <= 5; m++) {
			
 
				+                     buf(i,m) = dtpp * dtemp(m);
			
 
				+                  }
			
 
				+
			
 
				+                  cuf(i)   = buf(i,2) * buf(i,2);
			
 
				+                  buf(i,1) = cuf(i) + buf(i,3) * buf(i,3) + 
			
 
				+                       buf(i,4) * buf(i,4) ;
			
 
				+                  q(i) = 0.5e0*(buf(i,2)*ue(i,2) + buf(i,3)*ue(i,3) +
			
 
				+                       buf(i,4)*ue(i,4));
			
 
				+
			
 
				+               }
			
 
				+               
			
 
				+               for (i = start(1,c); i <= cell_size(1,c)-end(1,c)-1; i++) {
			
 
				+                  im1 = i-1;
			
 
				+                  ip1 = i+1;
			
 
				+
			
 
				+                  forcing(1,i,j,k,c) = forcing(1,i,j,k,c) -
			
 
				+                       tx2*( ue(ip1,2)-ue(im1,2) )+
			
 
				+                       dx1tx1*(ue(ip1,1)-2.0e0*ue(i,1)+ue(im1,1));
			
 
				+
			
 
				+                  forcing(2,i,j,k,c) = forcing(2,i,j,k,c) - tx2 * (
			
 
				+                       (ue(ip1,2)*buf(ip1,2)+c2*(ue(ip1,5)-q(ip1)))-
			
 
				+                       (ue(im1,2)*buf(im1,2)+c2*(ue(im1,5)-q(im1))))+
			
 
				+                       xxcon1*(buf(ip1,2)-2.0e0*buf(i,2)+buf(im1,2))+
			
 
				+                       dx2tx1*( ue(ip1,2)-2.0e0* ue(i,2)+ue(im1,2));
			
 
				+
			
 
				+                  forcing(3,i,j,k,c) = forcing(3,i,j,k,c) - tx2 * (
			
 
				+                       ue(ip1,3)*buf(ip1,2)-ue(im1,3)*buf(im1,2))+
			
 
				+                       xxcon2*(buf(ip1,3)-2.0e0*buf(i,3)+buf(im1,3))+
			
 
				+                       dx3tx1*( ue(ip1,3)-2.0e0*ue(i,3) +ue(im1,3));
			
 
				+                  
			
 
				+                  forcing(4,i,j,k,c) = forcing(4,i,j,k,c) - tx2*(
			
 
				+                       ue(ip1,4)*buf(ip1,2)-ue(im1,4)*buf(im1,2))+
			
 
				+                       xxcon2*(buf(ip1,4)-2.0e0*buf(i,4)+buf(im1,4))+
			
 
				+                       dx4tx1*( ue(ip1,4)-2.0e0* ue(i,4)+ ue(im1,4));
			
 
				+
			
 
				+                  forcing(5,i,j,k,c) = forcing(5,i,j,k,c) - tx2*(
			
 
				+                       buf(ip1,2)*(c1*ue(ip1,5)-c2*q(ip1))-
			
 
				+                       buf(im1,2)*(c1*ue(im1,5)-c2*q(im1)))+
			
 
				+                       0.5e0*xxcon3*(buf(ip1,1)-2.0e0*buf(i,1)+
			
 
				+                       buf(im1,1))+
			
 
				+                       xxcon4*(cuf(ip1)-2.0e0*cuf(i)+cuf(im1))+
			
 
				+                       xxcon5*(buf(ip1,5)-2.0e0*buf(i,5)+buf(im1,5))+
			
 
				+                       dx5tx1*( ue(ip1,5)-2.0e0* ue(i,5)+ ue(im1,5));
			
 
				+               }
			
 
				+
			
 
				+//---------------------------------------------------------------------
			
 
				+//     Fourth-order dissipation                         
			
 
				+//---------------------------------------------------------------------
			
 
				+               if (start(1,c) > 0) {
			
 
				+                  for (m = 1; m <= 5; m++) {
			
 
				+                     i = 1;
			
 
				+                     forcing(m,i,j,k,c) = forcing(m,i,j,k,c) - dssp *
			
 
				+                          (5.0e0*ue(i,m) - 4.0e0*ue(i+1,m) +ue(i+2,m));
			
 
				+                     i = 2;
			
 
				+                     forcing(m,i,j,k,c) = forcing(m,i,j,k,c) - dssp *
			
 
				+                          (-4.0e0*ue(i-1,m) + 6.0e0*ue(i,m) -
			
 
				+                          4.0e0*ue(i+1,m) +       ue(i+2,m));
			
 
				+                  }
			
 
				+               }
			
 
				+
			
 
				+               for (i = start(1,c)*3; i <= cell_size(1,c)-3*end(1,c)-1; i++) {
			
 
				+                  for (m = 1; m <= 5; m++) {
			
 
				+                     forcing(m,i,j,k,c) = forcing(m,i,j,k,c) - dssp*
			
 
				+                          (ue(i-2,m) - 4.0e0*ue(i-1,m) +
			
 
				+                          6.0e0*ue(i,m) - 4.0e0*ue(i+1,m) + ue(i+2,m));
			
 
				+                  }
			
 
				+               }
			
 
				+
			
 
				+               if (end(1,c) > 0) {
			
 
				+                  for (m = 1; m <= 5; m++) {
			
 
				+                     i = cell_size(1,c)-3;
			
 
				+                     forcing(m,i,j,k,c) = forcing(m,i,j,k,c) - dssp *
			
 
				+                          (ue(i-2,m) - 4.0e0*ue(i-1,m) +
			
 
				+                          6.0e0*ue(i,m) - 4.0e0*ue(i+1,m));
			
 
				+                     i = cell_size(1,c)-2;
			
 
				+                     forcing(m,i,j,k,c) = forcing(m,i,j,k,c) - dssp *
			
 
				+                          (ue(i-2,m) - 4.0e0*ue(i-1,m) + 5.0e0*ue(i,m));
			
 
				+                  }
			
 
				+               }
			
 
				+
			
 
				+            }
			
 
				+         }
			
 
				+
			
 
				+//---------------------------------------------------------------------
			
 
				+//     eta-direction flux differences             
			
 
				+//---------------------------------------------------------------------
			
 
				+         for (k = start(3,c); k <= cell_size(3,c)-end(3,c)-1; k++) {
			
 
				+            zeta = (double)(k+cell_low(3,c)) * dnzm1;
			
 
				+            for (i = start(1,c); i <= cell_size(1,c)-end(1,c)-1; i++) {
			
 
				+               xi = (double)(i+cell_low(1,c)) * dnxm1;
			
 
				+
			
 
				+               for (j = -2*(1-start(2,c)); j <= cell_size(2,c)+1-2*end(2,c); j++) {
			
 
				+                  eta = (double)(j+cell_low(2,c)) * dnym1;
			
 
				+
			
 
				+                  exact_solution(xi, eta, zeta, dtemp);
			
 
				+                  for (m = 1; m <= 5; m++) {
			
 
				+                     ue(j,m) = dtemp(m);
			
 
				+                  }
			
 
				+                  
			
 
				+                  dtpp = 1.0e0/dtemp(1);
			
 
				+
			
 
				+                  for (m = 2; m <= 5; m++) {
			
 
				+                     buf(j,m) = dtpp * dtemp(m);
			
 
				+                  }
			
 
				+
			
 
				+                  cuf(j)   = buf(j,3) * buf(j,3);
			
 
				+                  buf(j,1) = cuf(j) + buf(j,2) * buf(j,2) + 
			
 
				+                       buf(j,4) * buf(j,4);
			
 
				+                  q(j) = 0.5e0*(buf(j,2)*ue(j,2) + buf(j,3)*ue(j,3) +
			
 
				+                       buf(j,4)*ue(j,4));
			
 
				+               }
			
 
				+
			
 
				+               for (j = start(2,c); j <= cell_size(2,c)-end(2,c)-1; j++) {
			
 
				+                  jm1 = j-1;
			
 
				+                  jp1 = j+1;
			
 
				+                  
			
 
				+                  forcing(1,i,j,k,c) = forcing(1,i,j,k,c) -
			
 
				+                       ty2*( ue(jp1,3)-ue(jm1,3) )+
			
 
				+                       dy1ty1*(ue(jp1,1)-2.0e0*ue(j,1)+ue(jm1,1));
			
 
				+
			
 
				+                  forcing(2,i,j,k,c) = forcing(2,i,j,k,c) - ty2*(
			
 
				+                       ue(jp1,2)*buf(jp1,3)-ue(jm1,2)*buf(jm1,3))+
			
 
				+                       yycon2*(buf(jp1,2)-2.0e0*buf(j,2)+buf(jm1,2))+
			
 
				+                       dy2ty1*( ue(jp1,2)-2.0* ue(j,2)+ ue(jm1,2));
			
 
				+
			
 
				+                  forcing(3,i,j,k,c) = forcing(3,i,j,k,c) - ty2*(
			
 
				+                       (ue(jp1,3)*buf(jp1,3)+c2*(ue(jp1,5)-q(jp1)))-
			
 
				+                       (ue(jm1,3)*buf(jm1,3)+c2*(ue(jm1,5)-q(jm1))))+
			
 
				+                       yycon1*(buf(jp1,3)-2.0e0*buf(j,3)+buf(jm1,3))+
			
 
				+                       dy3ty1*( ue(jp1,3)-2.0e0*ue(j,3) +ue(jm1,3));
			
 
				+
			
 
				+                  forcing(4,i,j,k,c) = forcing(4,i,j,k,c) - ty2*(
			
 
				+                       ue(jp1,4)*buf(jp1,3)-ue(jm1,4)*buf(jm1,3))+
			
 
				+                       yycon2*(buf(jp1,4)-2.0e0*buf(j,4)+buf(jm1,4))+
			
 
				+                       dy4ty1*( ue(jp1,4)-2.0e0*ue(j,4)+ ue(jm1,4));
			
 
				+
			
 
				+                  forcing(5,i,j,k,c) = forcing(5,i,j,k,c) - ty2*(
			
 
				+                       buf(jp1,3)*(c1*ue(jp1,5)-c2*q(jp1))-
			
 
				+                       buf(jm1,3)*(c1*ue(jm1,5)-c2*q(jm1)))+
			
 
				+                       0.5e0*yycon3*(buf(jp1,1)-2.0e0*buf(j,1)+
			
 
				+                       buf(jm1,1))+
			
 
				+                       yycon4*(cuf(jp1)-2.0e0*cuf(j)+cuf(jm1))+
			
 
				+                       yycon5*(buf(jp1,5)-2.0e0*buf(j,5)+buf(jm1,5))+
			
 
				+                       dy5ty1*(ue(jp1,5)-2.0e0*ue(j,5)+ue(jm1,5));
			
 
				+               }
			
 
				+
			
 
				+//---------------------------------------------------------------------
			
 
				+//     Fourth-order dissipation                      
			
 
				+//---------------------------------------------------------------------
			
 
				+               if (start(2,c) > 0) {
			
 
				+                  for (m = 1; m <= 5; m++) {
			
 
				+                     j = 1;
			
 
				+                     forcing(m,i,j,k,c) = forcing(m,i,j,k,c) - dssp *
			
 
				+                          (5.0e0*ue(j,m) - 4.0e0*ue(j+1,m) +ue(j+2,m));
			
 
				+                     j = 2;
			
 
				+                     forcing(m,i,j,k,c) = forcing(m,i,j,k,c) - dssp *
			
 
				+                          (-4.0e0*ue(j-1,m) + 6.0e0*ue(j,m) -
			
 
				+                          4.0e0*ue(j+1,m) +       ue(j+2,m));
			
 
				+                  }
			
 
				+               }
			
 
				+
			
 
				+               for (j = start(2,c)*3; j <= cell_size(2,c)-3*end(2,c)-1; j++) {
			
 
				+                  for (m = 1; m <= 5; m++) {
			
 
				+                     forcing(m,i,j,k,c) = forcing(m,i,j,k,c) - dssp*
			
 
				+                          (ue(j-2,m) - 4.0e0*ue(j-1,m) +
			
 
				+                          6.0e0*ue(j,m) - 4.0e0*ue(j+1,m) + ue(j+2,m));
			
 
				+                  }
			
 
				+               }
			
 
				+
			
 
				+               if (end(2,c) > 0) {
			
 
				+                  for (m = 1; m <= 5; m++) {
			
 
				+                     j = cell_size(2,c)-3;
			
 
				+                     forcing(m,i,j,k,c) = forcing(m,i,j,k,c) - dssp *
			
 
				+                          (ue(j-2,m) - 4.0e0*ue(j-1,m) +
			
 
				+                          6.0e0*ue(j,m) - 4.0e0*ue(j+1,m));
			
 
				+                     j = cell_size(2,c)-2;
			
 
				+                     forcing(m,i,j,k,c) = forcing(m,i,j,k,c) - dssp *
			
 
				+                          (ue(j-2,m) - 4.0e0*ue(j-1,m) + 5.0e0*ue(j,m));
			
 
				+
			
 
				+                  }
			
 
				+               }
			
 
				+
			
 
				+            }
			
 
				+         }
			
 
				+
			
 
				+//---------------------------------------------------------------------
			
 
				+//     zeta-direction flux differences                      
			
 
				+//---------------------------------------------------------------------
			
 
				+         for (j = start(2,c); j <= cell_size(2,c)-end(2,c)-1; j++) {
			
 
				+            eta = (double)(j+cell_low(2,c)) * dnym1;
			
 
				+            for (i = start(1,c); i <= cell_size(1,c)-end(1,c)-1; i++) {
			
 
				+               xi = (double)(i+cell_low(1,c)) * dnxm1;
			
 
				+
			
 
				+               for (k = -2*(1-start(3,c)); k <= cell_size(3,c)+1-2*end(3,c); k++) {
			
 
				+                  zeta = (double)(k+cell_low(3,c)) * dnzm1;
			
 
				+
			
 
				+                  exact_solution(xi, eta, zeta, dtemp);
			
 
				+                  for (m = 1; m <= 5; m++) {
			
 
				+                     ue(k,m) = dtemp(m);
			
 
				+                  }
			
 
				+
			
 
				+                  dtpp = 1.0e0/dtemp(1);
			
 
				+
			
 
				+                  for (m = 2; m <= 5; m++) {
			
 
				+                     buf(k,m) = dtpp * dtemp(m);
			
 
				+                  }
			
 
				+
			
 
				+                  cuf(k)   = buf(k,4) * buf(k,4);
			
 
				+                  buf(k,1) = cuf(k) + buf(k,2) * buf(k,2) + 
			
 
				+                       buf(k,3) * buf(k,3);
			
 
				+                  q(k) = 0.5e0*(buf(k,2)*ue(k,2) + buf(k,3)*ue(k,3) +
			
 
				+                       buf(k,4)*ue(k,4));
			
 
				+               }
			
 
				+
			
 
				+               for (k = start(3,c); k <= cell_size(3,c)-end(3,c)-1; k++) {
			
 
				+                  km1 = k-1;
			
 
				+                  kp1 = k+1;
			
 
				+                  
			
 
				+                  forcing(1,i,j,k,c) = forcing(1,i,j,k,c) -
			
 
				+                       tz2*( ue(kp1,4)-ue(km1,4) )+
			
 
				+                       dz1tz1*(ue(kp1,1)-2.0e0*ue(k,1)+ue(km1,1));
			
 
				+
			
 
				+                  forcing(2,i,j,k,c) = forcing(2,i,j,k,c) - tz2 * (
			
 
				+                       ue(kp1,2)*buf(kp1,4)-ue(km1,2)*buf(km1,4))+
			
 
				+                       zzcon2*(buf(kp1,2)-2.0e0*buf(k,2)+buf(km1,2))+
			
 
				+                       dz2tz1*( ue(kp1,2)-2.0e0* ue(k,2)+ ue(km1,2));
			
 
				+
			
 
				+                  forcing(3,i,j,k,c) = forcing(3,i,j,k,c) - tz2 * (
			
 
				+                       ue(kp1,3)*buf(kp1,4)-ue(km1,3)*buf(km1,4))+
			
 
				+                       zzcon2*(buf(kp1,3)-2.0e0*buf(k,3)+buf(km1,3))+
			
 
				+                       dz3tz1*(ue(kp1,3)-2.0e0*ue(k,3)+ue(km1,3));
			
 
				+
			
 
				+                  forcing(4,i,j,k,c) = forcing(4,i,j,k,c) - tz2 * (
			
 
				+                       (ue(kp1,4)*buf(kp1,4)+c2*(ue(kp1,5)-q(kp1)))-
			
 
				+                       (ue(km1,4)*buf(km1,4)+c2*(ue(km1,5)-q(km1))))+
			
 
				+                       zzcon1*(buf(kp1,4)-2.0e0*buf(k,4)+buf(km1,4))+
			
 
				+                       dz4tz1*( ue(kp1,4)-2.0e0*ue(k,4) +ue(km1,4));
			
 
				+
			
 
				+                  forcing(5,i,j,k,c) = forcing(5,i,j,k,c) - tz2 * (
			
 
				+                       buf(kp1,4)*(c1*ue(kp1,5)-c2*q(kp1))-
			
 
				+                       buf(km1,4)*(c1*ue(km1,5)-c2*q(km1)))+
			
 
				+                       0.5e0*zzcon3*(buf(kp1,1)-2.0e0*buf(k,1)
			
 
				+                       +buf(km1,1))+
			
 
				+                       zzcon4*(cuf(kp1)-2.0e0*cuf(k)+cuf(km1))+
			
 
				+                       zzcon5*(buf(kp1,5)-2.0e0*buf(k,5)+buf(km1,5))+
			
 
				+                       dz5tz1*( ue(kp1,5)-2.0e0*ue(k,5)+ ue(km1,5));
			
 
				+               }
			
 
				+
			
 
				+//---------------------------------------------------------------------
			
 
				+//     Fourth-order dissipation                        
			
 
				+//---------------------------------------------------------------------
			
 
				+               if (start(3,c) > 0) {
			
 
				+                  for (m = 1; m <= 5; m++) {
			
 
				+                     k = 1;
			
 
				+                     forcing(m,i,j,k,c) = forcing(m,i,j,k,c) - dssp *
			
 
				+                          (5.0e0*ue(k,m) - 4.0e0*ue(k+1,m) +ue(k+2,m));
			
 
				+                     k = 2;
			
 
				+                     forcing(m,i,j,k,c) = forcing(m,i,j,k,c) - dssp *
			
 
				+                          (-4.0e0*ue(k-1,m) + 6.0e0*ue(k,m) -
			
 
				+                          4.0e0*ue(k+1,m) +       ue(k+2,m));
			
 
				+                  }
			
 
				+               }
			
 
				+
			
 
				+               for (k = start(3,c)*3; k <= cell_size(3,c)-3*end(3,c)-1; k++) {
			
 
				+                  for (m = 1; m <= 5; m++) {
			
 
				+                     forcing(m,i,j,k,c) = forcing(m,i,j,k,c) - dssp*
			
 
				+                          (ue(k-2,m) - 4.0e0*ue(k-1,m) +
			
 
				+                          6.0e0*ue(k,m) - 4.0e0*ue(k+1,m) + ue(k+2,m));
			
 
				+                  }
			
 
				+               }
			
 
				+
			
 
				+               if (end(3,c) > 0) {
			
 
				+                  for (m = 1; m <= 5; m++) {
			
 
				+                     k = cell_size(3,c)-3;
			
 
				+                     forcing(m,i,j,k,c) = forcing(m,i,j,k,c) - dssp *
			
 
				+                          (ue(k-2,m) - 4.0e0*ue(k-1,m) +
			
 
				+                          6.0e0*ue(k,m) - 4.0e0*ue(k+1,m));
			
 
				+                     k = cell_size(3,c)-2;
			
 
				+                     forcing(m,i,j,k,c) = forcing(m,i,j,k,c) - dssp *
			
 
				+                          (ue(k-2,m) - 4.0e0*ue(k-1,m) + 5.0e0*ue(k,m));
			
 
				+                  }
			
 
				+               }
			
 
				+
			
 
				+            }
			
 
				+         }
			
 
				+
			
 
				+//---------------------------------------------------------------------
			
 
				+//     now change the sign of the forcing function, 
			
 
				+//---------------------------------------------------------------------
			
 
				+         for (k = start(3,c); k <= cell_size(3,c)-end(3,c)-1; k++) {
			
 
				+            for (j = start(2,c); j <= cell_size(2,c)-end(2,c)-1; j++) {
			
 
				+               for (i = start(1,c); i <= cell_size(1,c)-end(1,c)-1; i++) {
			
 
				+                  for (m = 1; m <= 5; m++) {
			
 
				+                     forcing(m,i,j,k,c) = -1.e0 * forcing(m,i,j,k,c);
			
 
				+                  }
			
 
				+               }
			
 
				+            }
			
 
				+         }
			
 
				+
			
 
				+      }
			
 
				+
			
 
				+      return;
			
 
				+}
			
--- a/RCCE_V2.0/apps/NPB/BT/exact_solution.c
+++ b/RCCE_V2.0/apps/NPB/BT/exact_solution.c
@@ -0,0 +1,43 @@
 
				+//---------------------------------------------------------------------
			
 
				+// 
			
 
				+// Copyright 2010 Intel Corporation
			
 
				+// 
			
 
				+//    Licensed under the Apache License, Version 2.0 (the "License");
			
 
				+//    you may not use this file except in compliance with the License.
			
 
				+//    You may obtain a copy of the License at
			
 
				+// 
			
 
				+//        http://www.apache.org/licenses/LICENSE-2.0
			
 
				+// 
			
 
				+//    Unless required by applicable law or agreed to in writing, software
			
 
				+//    distributed under the License is distributed on an "AS IS" BASIS,
			
 
				+//    WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
			
 
				+//    See the License for the specific language governing permissions and
			
 
				+//    limitations under the License.
			
 
				+// 
			
 
				+//---------------------------------------------------------------------
			
 
				+#include "header.h"
			
 
				+
			
 
				+void exact_solution(double xi,double eta,double zeta,double dtemp[]) {
			
 
				+
			
 
				+//---------------------------------------------------------------------
			
 
				+//---------------------------------------------------------------------
			
 
				+
			
 
				+//---------------------------------------------------------------------
			
 
				+//     this function returns the exact solution at point xi, eta, zeta  
			
 
				+//---------------------------------------------------------------------
			
 
				+
			
 
				+      int m;
			
 
				+#define dtemp(m) dtemp[m-1]
			
 
				+
			
 
				+      for (m = 1; m <= 5; m++) {
			
 
				+         dtemp(m) =  ce(m,1) +
			
 
				+           xi*(ce(m,2) + xi*(ce(m,5) + xi*(ce(m,8) + xi*ce(m,11)))) +
			
 
				+           eta*(ce(m,3) + eta*(ce(m,6) + eta*(ce(m,9) + eta*ce(m,12))))+
			
 
				+           zeta*(ce(m,4) + zeta*(ce(m,7) + zeta*(ce(m,10) + 
			
 
				+           zeta*ce(m,13))));
			
 
				+      }
			
 
				+
			
 
				+      return;
			
 
				+}
			
 
				+
			
 
				+
			
--- a/RCCE_V2.0/apps/NPB/BT/header.h
+++ b/RCCE_V2.0/apps/NPB/BT/header.h
@@ -0,0 +1,287 @@
 
				+//---------------------------------------------------------------------
			
 
				+//---------------------------------------------------------------------
			
 
				+//
			
 
				+//  header.h
			
 
				+//
			
 
				+//---------------------------------------------------------------------
			
 
				+//---------------------------------------------------------------------
			
 
				+#ifndef __HEADER_H
			
 
				+#define __HEADER_H
			
 
				+
			
 
				+//---------------------------------------------------------------------
			
 
				+// The following include file is generated automatically by the
			
 
				+// "setparams" utility. It defines 
			
 
				+//      maxcells:      the square root of the maximum number of processors
			
 
				+//      problem_size:  12, 64, 102, 162 (for class T, A, B, C)
			
 
				+//      dt_default:    default time step for this problem size if no
			
 
				+//                     config file
			
 
				+//      niter_default: default number of iterations for this problem size
			
 
				+//---------------------------------------------------------------------
			
 
				+
			
 
				+#include "npbparams.h"
			
 
				+#include "RCCE.h"
			
 
				+//we introduce the next definition to avoid confusing the compiler, which
			
 
				+//sometimes thinks the variable class is a reserved word
			
 
				+#define class _class_
			
 
				+#include "../common/common.h"
			
 
				+
			
 
				+#define AA 0
			
 
				+#define BB 1
			
 
				+#define CC 2
			
 
				+#define BLOCK_SIZE 5
			
 
				+
			
 
				+#define EAST   2000
			
 
				+#define WEST   3000
			
 
				+#define NORTH  4000
			
 
				+#define SOUTH  5000
			
 
				+#define BOTTOM 6000
			
 
				+#define TOP    7000
			
 
				+
			
 
				+#define WESTDIR   0
			
 
				+#define EASTDIR   1
			
 
				+#define SOUTHDIR  2
			
 
				+#define NORTHDIR  3
			
 
				+#define BOTTOMDIR 4
			
 
				+#define TOPDIR    5
			
 
				+
			
 
				+#define MAX_CELL_DIM ((PROBLEM_SIZE/MAXCELLS)+1)
			
 
				+#define IMAX MAX_CELL_DIM
			
 
				+#define JMAX MAX_CELL_DIM
			
 
				+#define KMAX MAX_CELL_DIM
			
 
				+
			
 
				+#define BUF_SIZE (MAX_CELL_DIM*MAX_CELL_DIM*(MAXCELLS-1)*60+1)
			
 
				+
			
 
				+#define SQR(x) (x)*(x)
			
 
				+
			
 
				+#define grid_points(m) grid_points[m-1]
			
 
				+#define ce(m,n) ce[(m-1)+5*(n-1)]
			
 
				+#define cell_coord(m,n) cell_coord[(m-1)+3*(n-1)]
			
 
				+#define cell_low(m,n) cell_low[(m-1)+3*(n-1)]
			
 
				+#define cell_high(m,n) cell_high[(m-1)+3*(n-1)]
			
 
				+#define cell_size(m,n) cell_size[(m-1)+3*(n-1)]
			
 
				+#define predecessor(m) predecessor[m-1]
			
 
				+#define slice(m,n) slice[(m-1)+3*(n-1)]
			
 
				+#define grid_size(m) grid_size[m-1]
			
 
				+#define successor(m) successor[m-1]
			
 
				+#define start(m,n) start[(m-1)+3*(n-1)]
			
 
				+#define end(m,n) end[(m-1)+3*(n-1)]
			
 
				+#define us(i,j,k,c) us[(i+1)+(IMAX+2)*((j+1)+(JMAX+2)*((k+1)+(KMAX+2)*(c-1)))]
			
 
				+#define vs(i,j,k,c) vs[(i+1)+(IMAX+2)*((j+1)+(JMAX+2)*((k+1)+(KMAX+2)*(c-1)))]
			
 
				+#define ws(i,j,k,c) ws[(i+1)+(IMAX+2)*((j+1)+(JMAX+2)*((k+1)+(KMAX+2)*(c-1)))]
			
 
				+#define qs(i,j,k,c) qs[(i+1)+(IMAX+2)*((j+1)+(JMAX+2)*((k+1)+(KMAX+2)*(c-1)))]
			
 
				+#define rho_i(i,j,k,c) rho_i[(i+1)+(IMAX+2)*((j+1)+(JMAX+2)*((k+1)+(KMAX+2)*(c-1)))]
			
 
				+#define square(i,j,k,c) square[(i+1)+(IMAX+2)*((j+1)+(JMAX+2)*((k+1)+(KMAX+2)*(c-1)))]
			
 
				+#define forcing(m,i,j,k,c) forcing[(m-1)+5*(i+IMAX*(j+JMAX*(k+KMAX*(c-1))))]
			
 
				+#define u(m,i,j,k,c) u[(m-1)+5*((i+2)+(IMAX+4)*((j+2)+(JMAX+4)*((k+2)+(KMAX+4)*(c-1))))]
			
 
				+#define rhs(m,i,j,k,c) rhs[(m-1)+5*((i+1)+(IMAX+1)*((j+1)+(JMAX+1)*((k+1)+(KMAX+1)*(c-1))))]
			
 
				+#define lhsc(m,n,i,j,k,c) lhsc[(m-1)+5*((n-1)+5*((i+1)+(IMAX+1)*((j+1)+(JMAX+1)*((k+1)+(KMAX+1)*(c-1)))))]
			
 
				+#define backsub_info(m,i,j,c) backsub_info[(m-1)+5*((i)+(IMAX+1)*((j)+(JMAX+1)*(c-1)))]
			
 
				+#define in_buffer(i) in_buffer[i-1]
			
 
				+#define out_buffer(i) out_buffer[i-1]
			
 
				+#define cv(m) cv[m+2]
			
 
				+#define rhon(m) rhon[m+2]
			
 
				+#define rhos(m) rhos[m+2]
			
 
				+#define rhoq(m) rhoq[m+2]
			
 
				+#define cuf(m) cuf[m+2]
			
 
				+#define q(m) q[m+2]
			
 
				+#define ue(m,n) ue[(m+2)+(MAX_CELL_DIM+4)*(n-1)]
			
 
				+#define buf(m,n) buf[(m+2)+(MAX_CELL_DIM+4)*(n-1)]
			
 
				+#define sum(m) sum[m-1]
			
 
				+#define xce_sub(m) xce_sub[m-1]
			
 
				+
			
 
				+
			
 
				+#ifdef G_MAIN
			
 
				+      int     ncells, grid_points[3];
			
 
				+      double  elapsed_time;
			
 
				+
			
 
				+      double  tx1, tx2, tx3, ty1, ty2, ty3, tz1, tz2, tz3, 
			
 
				+                        dx1, dx2, dx3, dx4, dx5, dy1, dy2, dy3, dy4, 
			
 
				+                        dy5, dz1, dz2, dz3, dz4, dz5, dssp, dt, 
			
 
				+                        ce[5*13], dxmax, dymax, dzmax, xxcon1, xxcon2, 
			
 
				+                        xxcon3, xxcon4, xxcon5, dx1tx1, dx2tx1, dx3tx1,
			
 
				+                        dx4tx1, dx5tx1, yycon1, yycon2, yycon3, yycon4,
			
 
				+                        yycon5, dy1ty1, dy2ty1, dy3ty1, dy4ty1, dy5ty1,
			
 
				+                        zzcon1, zzcon2, zzcon3, zzcon4, zzcon5, dz1tz1, 
			
 
				+                        dz2tz1, dz3tz1, dz4tz1, dz5tz1, dnxm1, dnym1, 
			
 
				+                        dnzm1, c1c2, c1c5, c3c4, c1345, conz1, c1, c2, 
			
 
				+                        c3, c4, c5, c4dssp, c5dssp, dtdssp, dttx1, bt,
			
 
				+                        dttx2, dtty1, dtty2, dttz1, dttz2, c2dttx1, 
			
 
				+                        c2dtty1, c2dttz1, comz1, comz4, comz5, comz6, 
			
 
				+                        c3c4tx3, c3c4ty3, c3c4tz3, c2iv, con43, con16;
			
 
				+
			
 
				+      int     cell_coord[MAXCELLS*3], cell_low[MAXCELLS*3], 
			
 
				+              cell_high[MAXCELLS*3],  cell_size[MAXCELLS*3],
			
 
				+              predecessor[3],         slice[MAXCELLS*3],
			
 
				+              grid_size[3],           successor[3],
			
 
				+              start[MAXCELLS*3],      end[MAXCELLS*3];
			
 
				+
			
 
				+      double 
			
 
				+         us      [(IMAX+2)*(JMAX+2)*(KMAX+2)*MAXCELLS],
			
 
				+         vs      [(IMAX+2)*(JMAX+2)*(KMAX+2)*MAXCELLS],
			
 
				+         ws      [(IMAX+2)*(JMAX+2)*(KMAX+2)*MAXCELLS],
			
 
				+         qs      [(IMAX+2)*(JMAX+2)*(KMAX+2)*MAXCELLS],
			
 
				+         rho_i   [(IMAX+2)*(JMAX+2)*(KMAX+2)*MAXCELLS],
			
 
				+         square  [(IMAX+2)*(JMAX+2)*(KMAX+2)*MAXCELLS],
			
 
				+         forcing [5*IMAX*JMAX*KMAX*MAXCELLS],
			
 
				+         u       [5*(IMAX+4)*(JMAX+4)*(KMAX+4)*MAXCELLS],
			
 
				+         rhs     [5*(IMAX+1)*(JMAX+1)*(KMAX+1)*MAXCELLS],
			
 
				+         lhsc    [5*5*(IMAX+1)*(JMAX+1)*(KMAX+1)*MAXCELLS],
			
 
				+         backsub_info [5*(MAX_CELL_DIM+1)*(MAX_CELL_DIM+1)*MAXCELLS],
			
 
				+         in_buffer[BUF_SIZE], out_buffer[BUF_SIZE];
			
 
				+
			
 
				+      double cv[MAX_CELL_DIM+4],   rhon[MAX_CELL_DIM+4],
			
 
				+             rhos[MAX_CELL_DIM+4], rhoq[MAX_CELL_DIM+4],
			
 
				+             cuf[MAX_CELL_DIM+4],  q[MAX_CELL_DIM+4],
			
 
				+             ue[(MAX_CELL_DIM+4)*5], buf[(MAX_CELL_DIM+4)*5];
			
 
				+
			
 
				+      int  west_size, east_size, bottom_size, top_size,
			
 
				+               north_size, south_size, start_send_west, 
			
 
				+               start_send_east, start_send_south, start_send_north,
			
 
				+               start_send_bottom, start_send_top, start_recv_west,
			
 
				+               start_recv_east, start_recv_south, start_recv_north,
			
 
				+               start_recv_bottom, start_recv_top;
			
 
				+//
			
 
				+//     These are used by btio
			
 
				+//
			
 
				+      int collbuf_nodes, collbuf_size, iosize,
			
 
				+              idump, record_length,
			
 
				+              idump_sub, rd_interval;
			
 
				+      double sum[NITER_DEFAULT], xce_sub[5];
			
 
				+      long int iseek;
			
 
				+      int    send_color[6], recv_color[6];
			
 
				+#else
			
 
				+extern int     ncells, grid_points[3];
			
 
				+extern double  elapsed_time;
			
 
				+
			
 
				+extern double  tx1, tx2, tx3, ty1, ty2, ty3, tz1, tz2, tz3, 
			
 
				+                        dx1, dx2, dx3, dx4, dx5, dy1, dy2, dy3, dy4, 
			
 
				+                        dy5, dz1, dz2, dz3, dz4, dz5, dssp, dt, 
			
 
				+                        ce[5*13], dxmax, dymax, dzmax, xxcon1, xxcon2, 
			
 
				+                        xxcon3, xxcon4, xxcon5, dx1tx1, dx2tx1, dx3tx1,
			
 
				+                        dx4tx1, dx5tx1, yycon1, yycon2, yycon3, yycon4,
			
 
				+                        yycon5, dy1ty1, dy2ty1, dy3ty1, dy4ty1, dy5ty1,
			
 
				+                        zzcon1, zzcon2, zzcon3, zzcon4, zzcon5, dz1tz1, 
			
 
				+                        dz2tz1, dz3tz1, dz4tz1, dz5tz1, dnxm1, dnym1, 
			
 
				+                        dnzm1, c1c2, c1c5, c3c4, c1345, conz1, c1, c2, 
			
 
				+                        c3, c4, c5, c4dssp, c5dssp, dtdssp, dttx1, bt,
			
 
				+                        dttx2, dtty1, dtty2, dttz1, dttz2, c2dttx1, 
			
 
				+                        c2dtty1, c2dttz1, comz1, comz4, comz5, comz6, 
			
 
				+                        c3c4tx3, c3c4ty3, c3c4tz3, c2iv, con43, con16;
			
 
				+
			
 
				+extern int    cell_coord[MAXCELLS*3], cell_low[MAXCELLS*3], 
			
 
				+              cell_high[MAXCELLS*3],  cell_size[MAXCELLS*3],
			
 
				+              predecessor[3],         slice[MAXCELLS*3],
			
 
				+              grid_size[3],           successor[3],
			
 
				+              start[MAXCELLS*3],      end[MAXCELLS*3];
			
 
				+
			
 
				+extern double 
			
 
				+         us      [(IMAX+2)*(JMAX+2)*(KMAX+2)*MAXCELLS],
			
 
				+         vs      [(IMAX+2)*(JMAX+2)*(KMAX+2)*MAXCELLS],
			
 
				+         ws      [(IMAX+2)*(JMAX+2)*(KMAX+2)*MAXCELLS],
			
 
				+         qs      [(IMAX+2)*(JMAX+2)*(KMAX+2)*MAXCELLS],
			
 
				+         rho_i   [(IMAX+2)*(JMAX+2)*(KMAX+2)*MAXCELLS],
			
 
				+         square  [(IMAX+2)*(JMAX+2)*(KMAX+2)*MAXCELLS],
			
 
				+         forcing [5*IMAX*JMAX*KMAX*MAXCELLS],
			
 
				+         u       [5*(IMAX+4)*(JMAX+4)*(KMAX+4)*MAXCELLS],
			
 
				+         rhs     [5*(IMAX+1)*(JMAX+1)*(KMAX+1)*MAXCELLS],
			
 
				+         lhsc    [5*5*(IMAX+1)*(JMAX+1)*(KMAX+1)*MAXCELLS],
			
 
				+         backsub_info [5*(MAX_CELL_DIM+1)*(MAX_CELL_DIM+1)*MAXCELLS],
			
 
				+         in_buffer[BUF_SIZE], out_buffer[BUF_SIZE];
			
 
				+
			
 
				+extern double cv[MAX_CELL_DIM+4],   rhon[MAX_CELL_DIM+4],
			
 
				+             rhos[MAX_CELL_DIM+4], rhoq[MAX_CELL_DIM+4],
			
 
				+             cuf[MAX_CELL_DIM+4],  q[MAX_CELL_DIM+4],
			
 
				+             ue[(MAX_CELL_DIM+4)*5], buf[(MAX_CELL_DIM+4)*5];
			
 
				+
			
 
				+extern int  west_size, east_size, bottom_size, top_size,
			
 
				+               north_size, south_size, start_send_west, 
			
 
				+               start_send_east, start_send_south, start_send_north,
			
 
				+               start_send_bottom, start_send_top, start_recv_west,
			
 
				+               start_recv_east, start_recv_south, start_recv_north,
			
 
				+               start_recv_bottom, start_recv_top;
			
 
				+
			
 
				+//
			
 
				+//     These are used by btio
			
 
				+//
			
 
				+extern int collbuf_nodes, collbuf_size, iosize,
			
 
				+              idump, record_length,
			
 
				+              idump_sub, rd_interval;
			
 
				+extern double sum[NITER_DEFAULT], xce_sub[5];
			
 
				+extern long int iseek;
			
 
				+extern int    send_color[6], recv_color[6];
			
 
				+
			
 
				+#endif /*G_MAIN*/
			
 
				+
			
 
				+extern void matvec_sub(double ablock[], double avec[], double bvec[]);
			
 
				+extern void matmul_sub(double ablock[], double bblock[], double cblock[]);
			
 
				+extern void binvcrhs( double lhs[], double c[], double r[] );
			
 
				+extern void binvrhs( double lhs[], double r[] );
			
 
				+extern void exact_solution(double xi,double eta,double zeta,double dtemp[]);
			
 
				+
			
 
				+extern int setup_mpi(int *argc, char ***argv);
			
 
				+extern void make_set(void);
			
 
				+extern void set_constants(void);
			
 
				+extern void lhsinit(void);
			
 
				+extern void lhsabinit(double lhsa[], double lhsb[], int size);
			
 
				+extern void initialize(void);
			
 
				+extern void exact_rhs(void);
			
 
				+extern void compute_buffer_size(int c);
			
 
				+extern void adi(void);
			
 
				+extern void compute_rhs(void);
			
 
				+extern void copy_faces(void);
			
 
				+extern void x_solve(void);
			
 
				+extern void y_solve(void);
			
 
				+extern void z_solve(void);
			
 
				+extern void add(void);
			
 
				+extern void verify(int niter, char *class, int *verified);
			
 
				+extern void error_norm(double rms[]);
			
 
				+extern void rhs_norm(double rms[]);
			
 
				+
			
 
				+extern void setup_btio(void);
			
 
				+extern void output_timestep(void);
			
 
				+extern void btio_cleanup(void);
			
 
				+extern void btio_verify(int *verified);
			
 
				+extern void accumulate_norms(double xce[]);
			
 
				+extern void clear_timestep(void);
			
 
				+
			
 
				+#endif
			
 
				+
			
 
				+#ifdef _OPENMP
			
 
				+#pragma omp threadprivate (cell_coord, cell_low, cell_high,  cell_size)
			
 
				+#pragma omp threadprivate (predecessor, slice, grid_size, successor)
			
 
				+#pragma omp threadprivate (start, end)
			
 
				+
			
 
				+#pragma omp threadprivate (ncells, grid_points, elapsed_time)
			
 
				+#pragma omp threadprivate (tx1, tx2, tx3, ty1, ty2, ty3, tz1, tz2, tz3, \
			
 
				+                           dx1, dx2, dx3, dx4, dx5, dy1, dy2, dy3, dy4, \
			
 
				+                           dy5, dz1, dz2, dz3, dz4, dz5, dssp, dt, \
			
 
				+                           ce, dxmax, dymax, dzmax, xxcon1, xxcon2, \
			
 
				+                           xxcon3, xxcon4, xxcon5, dx1tx1, dx2tx1, dx3tx1, \
			
 
				+                           dx4tx1, dx5tx1, yycon1, yycon2, yycon3, yycon4, \
			
 
				+                           yycon5, dy1ty1, dy2ty1, dy3ty1, dy4ty1, dy5ty1, \
			
 
				+                           zzcon1, zzcon2, zzcon3, zzcon4, zzcon5, dz1tz1, \
			
 
				+                           dz2tz1, dz3tz1, dz4tz1, dz5tz1, dnxm1, dnym1, \
			
 
				+                           dnzm1, c1c2, c1c5, c3c4, c1345, conz1, c1, c2, \
			
 
				+                           c3, c4, c5, c4dssp, c5dssp, dtdssp, dttx1, bt, \
			
 
				+                           dttx2, dtty1, dtty2, dttz1, dttz2, c2dttx1, \
			
 
				+                           c2dtty1, c2dttz1, comz1, comz4, comz5, comz6, \
			
 
				+                           c3c4tx3, c3c4ty3, c3c4tz3, c2iv, con43, con16)
			
 
				+
			
 
				+#pragma omp threadprivate (us, vs, ws, qs, rho_i, square, forcing, \
			
 
				+                           u, rhs, lhsc, backsub_info, in_buffer, out_buffer)
			
 
				+
			
 
				+#pragma omp threadprivate (cv, rhon, rhos, rhoq, cuf, q, ue, buf)
			
 
				+
			
 
				+#pragma omp threadprivate (west_size, east_size, bottom_size, top_size, \
			
 
				+                           north_size, south_size, start_send_west, \
			
 
				+                           start_send_east, start_send_south, start_send_north, \
			
 
				+                           start_send_bottom, start_send_top, start_recv_west, \
			
 
				+                           start_recv_east, start_recv_south, start_recv_north, \
			
 
				+                           start_recv_bottom, start_recv_top, send_color, recv_color)
			
 
				+//
			
 
				+//     These are used by btio
			
 
				+//
			
 
				+#pragma omp threadprivate (collbuf_nodes, collbuf_size, iosize, idump,\
			
 
				+                           record_length, idump_sub, rd_interval, \
			
 
				+                           sum, xce_sub, iseek)
			
 
				+#endif
			
--- a/RCCE_V2.0/apps/NPB/BT/initialize.c
+++ b/RCCE_V2.0/apps/NPB/BT/initialize.c
@@ -0,0 +1,321 @@
 
				+//---------------------------------------------------------------------
			
 
				+// 
			
 
				+// Copyright 2010 Intel Corporation
			
 
				+// 
			
 
				+//    Licensed under the Apache License, Version 2.0 (the "License");
			
 
				+//    you may not use this file except in compliance with the License.
			
 
				+//    You may obtain a copy of the License at
			
 
				+// 
			
 
				+//        http://www.apache.org/licenses/LICENSE-2.0
			
 
				+// 
			
 
				+//    Unless required by applicable law or agreed to in writing, software
			
 
				+//    distributed under the License is distributed on an "AS IS" BASIS,
			
 
				+//    WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
			
 
				+//    See the License for the specific language governing permissions and
			
 
				+//    limitations under the License.
			
 
				+// 
			
 
				+//---------------------------------------------------------------------
			
 
				+#include "header.h"
			
 
				+
			
 
				+void  initialize() {
			
 
				+
			
 
				+//---------------------------------------------------------------------
			
 
				+//---------------------------------------------------------------------
			
 
				+
			
 
				+//---------------------------------------------------------------------
			
 
				+//     This subroutine initializes the field variable u using 
			
 
				+//     tri-linear transfinite interpolation of the boundary values     
			
 
				+//---------------------------------------------------------------------
			
 
				+      
			
 
				+      int c, i, j, k, m, ii, jj, kk, ix, iy, iz;
			
 
				+      double xi, eta, zeta, Pface[5*3*2], Pxi, Peta, 
			
 
				+           Pzeta, temp[5];
			
 
				+#define Pface(m,n,i) Pface[(m-1)+5*((n-1)+3*(i-1))]
			
 
				+#define temp(m) temp[m-1]
			
 
				+
			
 
				+//---------------------------------------------------------------------
			
 
				+//  Later (in compute_rhs) we compute 1/u for every element. A few of 
			
 
				+//  the corner elements are not used, but it convenient (and faster) 
			
 
				+//  to compute the whole thing with a simple loop. Make sure those 
			
 
				+//  values are nonzero by initializing the whole thing here. 
			
 
				+//---------------------------------------------------------------------
			
 
				+      for (c = 1; c <= ncells; c++) {
			
 
				+         for (kk = -1; kk <= KMAX; kk++) {
			
 
				+            for (jj = -1; jj <= JMAX; jj++) {
			
 
				+               for (ii = -1; ii <= IMAX; ii++) {
			
 
				+                  for (m = 1; m <= 5; m++) {
			
 
				+                     u(m, ii, jj, kk, c) = 1.0;
			
 
				+                  }
			
 
				+               }
			
 
				+            }
			
 
				+         }
			
 
				+      }
			
 
				+//---------------------------------------------------------------------
			
 
				+
			
 
				+
			
 
				+
			
 
				+//---------------------------------------------------------------------
			
 
				+//     first store the "interpolated" values everywhere on the grid    
			
 
				+//---------------------------------------------------------------------
			
 
				+      for (c = 1; c <= ncells; c++) {
			
 
				+         kk = 0;
			
 
				+         for (k = cell_low(3,c); k <= cell_high(3,c); k++) {
			
 
				+            zeta = (double)(k) * dnzm1;
			
 
				+            jj = 0;
			
 
				+            for (j = cell_low(2,c); j <= cell_high(2,c); j++) {
			
 
				+               eta = (double)(j) * dnym1;
			
 
				+               ii = 0;
			
 
				+               for (i = cell_low(1,c); i <= cell_high(1,c); i++) {
			
 
				+                  xi = (double)(i) * dnxm1;
			
 
				+                  
			
 
				+                  for (ix = 1; ix <= 2; ix++) {
			
 
				+                     exact_solution((double)(ix-1), eta, zeta, 
			
 
				+                          &Pface(1,1,ix));
			
 
				+                  }
			
 
				+
			
 
				+                  for (iy = 1; iy <= 2; iy++) {
			
 
				+                     exact_solution(xi, (double)(iy-1) , zeta, 
			
 
				+                          &Pface(1,2,iy));
			
 
				+                  }
			
 
				+
			
 
				+                  for (iz = 1; iz <= 2; iz++) {
			
 
				+                     exact_solution(xi, eta, (double)(iz-1),   
			
 
				+                          &Pface(1,3,iz));
			
 
				+                  }
			
 
				+
			
 
				+                  for (m = 1; m <= 5; m++) {
			
 
				+                     Pxi   = xi   * Pface(m,1,2) + 
			
 
				+                          (1.0e0-xi)   * Pface(m,1,1);
			
 
				+                     Peta  = eta  * Pface(m,2,2) + 
			
 
				+                          (1.0e0-eta)  * Pface(m,2,1);
			
 
				+                     Pzeta = zeta * Pface(m,3,2) + 
			
 
				+                          (1.0e0-zeta) * Pface(m,3,1);
			
 
				+                     
			
 
				+                     u(m,ii,jj,kk,c) = Pxi + Peta + Pzeta - 
			
 
				+                          Pxi*Peta - Pxi*Pzeta - Peta*Pzeta + 
			
 
				+                          Pxi*Peta*Pzeta;
			
 
				+
			
 
				+                  }
			
 
				+                  ii = ii + 1;
			
 
				+               }
			
 
				+               jj = jj + 1;
			
 
				+            }
			
 
				+            kk = kk+1;
			
 
				+         }
			
 
				+      }
			
 
				+
			
 
				+//---------------------------------------------------------------------
			
 
				+//     now store the exact values on the boundaries        
			
 
				+//---------------------------------------------------------------------
			
 
				+
			
 
				+//---------------------------------------------------------------------
			
 
				+//     west face                                                  
			
 
				+//---------------------------------------------------------------------
			
 
				+      c = slice(1,1);
			
 
				+      ii = 0;
			
 
				+      xi = 0.0e0;
			
 
				+      kk = 0;
			
 
				+      for (k = cell_low(3,c); k <= cell_high(3,c); k++) {
			
 
				+         zeta = (double)(k) * dnzm1;
			
 
				+         jj = 0;
			
 
				+         for (j = cell_low(2,c); j <= cell_high(2,c); j++) {
			
 
				+            eta = (double)(j) * dnym1;
			
 
				+            exact_solution(xi, eta, zeta, temp);
			
 
				+            for (m = 1; m <= 5; m++) {
			
 
				+               u(m,ii,jj,kk,c) = temp(m);
			
 
				+            }
			
 
				+            jj = jj + 1;
			
 
				+         }
			
 
				+         kk = kk + 1;
			
 
				+      }
			
 
				+
			
 
				+//---------------------------------------------------------------------
			
 
				+//     east face                                                      
			
 
				+//---------------------------------------------------------------------
			
 
				+      c  = slice(1,ncells);
			
 
				+      ii = cell_size(1,c)-1;
			
 
				+      xi = 1.0e0;
			
 
				+      kk = 0;
			
 
				+      for (k = cell_low(3,c); k <= cell_high(3,c); k++) {
			
 
				+         zeta = (double)(k) * dnzm1;
			
 
				+         jj = 0;
			
 
				+         for (j = cell_low(2,c); j <= cell_high(2,c); j++) {
			
 
				+            eta = (double)(j) * dnym1;
			
 
				+            exact_solution(xi, eta, zeta, temp);
			
 
				+            for (m = 1; m <= 5; m++) {
			
 
				+               u(m,ii,jj,kk,c) = temp(m);
			
 
				+            }
			
 
				+            jj = jj + 1;
			
 
				+         }
			
 
				+         kk = kk + 1;
			
 
				+      }
			
 
				+
			
 
				+//---------------------------------------------------------------------
			
 
				+//     south face                                                 
			
 
				+//---------------------------------------------------------------------
			
 
				+      c = slice(2,1);
			
 
				+      jj = 0;
			
 
				+      eta = 0.0e0;
			
 
				+      kk = 0;
			
 
				+      for (k = cell_low(3,c); k <= cell_high(3,c); k++) {
			
 
				+         zeta = (double)(k) * dnzm1;
			
 
				+         ii = 0;
			
 
				+         for (i = cell_low(1,c); i <= cell_high(1,c); i++) {
			
 
				+            xi = (double)(i) * dnxm1;
			
 
				+            exact_solution(xi, eta, zeta, temp);
			
 
				+            for (m = 1; m <= 5; m++) {
			
 
				+               u(m,ii,jj,kk,c) = temp(m);
			
 
				+            }
			
 
				+            ii = ii + 1;
			
 
				+         }
			
 
				+         kk = kk + 1;
			
 
				+      }
			
 
				+
			
 
				+
			
 
				+//---------------------------------------------------------------------
			
 
				+//     north face                                    
			
 
				+//---------------------------------------------------------------------
			
 
				+      c = slice(2,ncells);
			
 
				+      jj = cell_size(2,c)-1;
			
 
				+      eta = 1.0e0;
			
 
				+      kk = 0;
			
 
				+      for (k = cell_low(3,c); k <= cell_high(3,c); k++) {
			
 
				+         zeta = (double)(k) * dnzm1;
			
 
				+         ii = 0;
			
 
				+         for (i = cell_low(1,c); i <= cell_high(1,c); i++) {
			
 
				+            xi = (double)(i) * dnxm1;
			
 
				+            exact_solution(xi, eta, zeta, temp);
			
 
				+            for (m = 1; m <= 5; m++) {
			
 
				+               u(m,ii,jj,kk,c) = temp(m);
			
 
				+            }
			
 
				+            ii = ii + 1;
			
 
				+         }
			
 
				+         kk = kk + 1;
			
 
				+      }
			
 
				+
			
 
				+//---------------------------------------------------------------------
			
 
				+//     bottom face                                       
			
 
				+//---------------------------------------------------------------------
			
 
				+      c = slice(3,1);
			
 
				+      kk = 0;
			
 
				+      zeta = 0.0e0;
			
 
				+      jj = 0;
			
 
				+      for (j = cell_low(2,c); j <= cell_high(2,c); j++) {
			
 
				+         eta = (double)(j) * dnym1;
			
 
				+         ii = 0;
			
 
				+         for (i = cell_low(1,c); i <= cell_high(1,c); i++) {
			
 
				+            xi = (double)(i) *dnxm1;
			
 
				+            exact_solution(xi, eta, zeta, temp);
			
 
				+            for (m = 1; m <= 5; m++) {
			
 
				+               u(m,ii,jj,kk,c) = temp(m);
			
 
				+            }
			
 
				+            ii = ii + 1;
			
 
				+         }
			
 
				+         jj = jj + 1;
			
 
				+      }
			
 
				+
			
 
				+//---------------------------------------------------------------------
			
 
				+//     top face     
			
 
				+//---------------------------------------------------------------------
			
 
				+      c = slice(3,ncells);
			
 
				+      kk = cell_size(3,c)-1;
			
 
				+      zeta = 1.0e0;
			
 
				+      jj = 0;
			
 
				+      for (j = cell_low(2,c); j <= cell_high(2,c); j++) {
			
 
				+         eta = (double)(j) * dnym1;
			
 
				+         ii = 0;
			
 
				+         for (i = cell_low(1,c); i <= cell_high(1,c); i++) {
			
 
				+            xi = (double)(i) * dnxm1;
			
 
				+            exact_solution(xi, eta, zeta, temp);
			
 
				+            for (m = 1; m <= 5; m++) {
			
 
				+               u(m,ii,jj,kk,c) = temp(m);
			
 
				+            }
			
 
				+            ii = ii + 1;
			
 
				+         }
			
 
				+         jj = jj + 1;
			
 
				+      }
			
 
				+
			
 
				+      return;
			
 
				+}
			
 
				+
			
 
				+
			
 
				+//---------------------------------------------------------------------
			
 
				+//---------------------------------------------------------------------
			
 
				+
			
 
				+void lhsinit() {
			
 
				+
			
 
				+//---------------------------------------------------------------------
			
 
				+//---------------------------------------------------------------------
			
 
				+      
			
 
				+      int i, j, k, d, c, m, n;
			
 
				+
			
 
				+//---------------------------------------------------------------------
			
 
				+//     loop over all cells                                       
			
 
				+//---------------------------------------------------------------------
			
 
				+      for (c = 1; c <= ncells; c++) {
			
 
				+
			
 
				+//---------------------------------------------------------------------
			
 
				+//     first, initialize the start and end arrays
			
 
				+//---------------------------------------------------------------------
			
 
				+         for (d = 1; d <= 3; d++) {
			
 
				+            if (cell_coord(d,c) == 1) {
			
 
				+               start(d,c) = 1;
			
 
				+            } else {
			
 
				+               start(d,c) = 0;
			
 
				+            }
			
 
				+            if (cell_coord(d,c) == ncells) {
			
 
				+               end(d,c) = 1;
			
 
				+            } else {
			
 
				+               end(d,c) = 0;
			
 
				+            }
			
 
				+         }
			
 
				+
			
 
				+//---------------------------------------------------------------------
			
 
				+//     zero the whole left hand side for starters
			
 
				+//---------------------------------------------------------------------
			
 
				+         for (k = 0; k <= cell_size(3,c)-1; k++) {
			
 
				+            for (j = 0; j <= cell_size(2,c)-1; j++) {
			
 
				+               for (i = 0; i <= cell_size(1,c)-1; i++) {
			
 
				+                  for (m = 1; m <= 5; m++) {
			
 
				+                     for (n = 1; n <= 5; n++) {
			
 
				+                        lhsc(m,n,i,j,k,c) = 0.0e0;
			
 
				+                     }
			
 
				+                  }
			
 
				+               }
			
 
				+            }
			
 
				+         }
			
 
				+
			
 
				+      }
			
 
				+
			
 
				+      return;
			
 
				+}
			
 
				+
			
 
				+
			
 
				+//---------------------------------------------------------------------
			
 
				+//---------------------------------------------------------------------
			
 
				+
			
 
				+void lhsabinit(double lhsa[], double lhsb[], int size) {
			
 
				+
			
 
				+#define lhsa(m,n,i) lhsa[(m-1)+5*((n-1)+5*(i+1))]
			
 
				+#define lhsb(m,n,i) lhsb[(m-1)+5*((n-1)+5*(i+1))]
			
 
				+
			
 
				+      int i, m, n;
			
 
				+
			
 
				+//---------------------------------------------------------------------
			
 
				+//     next, set all diagonal values to 1. This is overkill, but convenient
			
 
				+//---------------------------------------------------------------------
			
 
				+      for (i = 0; i <= size; i++) {
			
 
				+         for (m = 1; m <= 5; m++) {
			
 
				+            for (n = 1; n <= 5; n++) {
			
 
				+               lhsa(m,n,i) = 0.0e0;
			
 
				+               lhsb(m,n,i) = 0.0e0;
			
 
				+            }
			
 
				+            lhsb(m,m,i) = 1.0e0;
			
 
				+         }
			
 
				+      }
			
 
				+
			
 
				+      return;
			
 
				+}
			
 
				+
			
 
				+
			
 
				+
			
--- a/RCCE_V2.0/apps/NPB/BT/inputbt.data.sample
+++ b/RCCE_V2.0/apps/NPB/BT/inputbt.data.sample
@@ -0,0 +1,5 @@
 
				+200       number of time steps
			
 
				+0.0008d0  dt for class A = 0.0008d0. class B = 0.0003d0  class C = 0.0001d0
			
 
				+64 64 64
			
 
				+5 0        write interval (optional read interval) for BTIO
			
 
				+0 1000000  number of nodes in collective buffering and buffer size for BTIO
			
--- a/RCCE_V2.0/apps/NPB/BT/make_set.c
+++ b/RCCE_V2.0/apps/NPB/BT/make_set.c
@@ -0,0 +1,222 @@
 
				+//---------------------------------------------------------------------
			
 
				+// 
			
 
				+// Copyright 2010 Intel Corporation
			
 
				+// 
			
 
				+//    Licensed under the Apache License, Version 2.0 (the "License");
			
 
				+//    you may not use this file except in compliance with the License.
			
 
				+//    You may obtain a copy of the License at
			
 
				+// 
			
 
				+//        http://www.apache.org/licenses/LICENSE-2.0
			
 
				+// 
			
 
				+//    Unless required by applicable law or agreed to in writing, software
			
 
				+//    distributed under the License is distributed on an "AS IS" BASIS,
			
 
				+//    WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
			
 
				+//    See the License for the specific language governing permissions and
			
 
				+//    limitations under the License.
			
 
				+// 
			
 
				+//---------------------------------------------------------------------
			
 
				+#include <stdio.h>
			
 
				+#include <stdlib.h>
			
 
				+#include <math.h>
			
 
				+#include "header.h"
			
 
				+#include "mpinpb.h"
			
 
				+
			
 
				+#define mod(p,q) ((p)%(q))
			
 
				+#define max(x,y)      ((x)>(y)? (x) : (y))
			
 
				+#define min(x,y)      ((x)<(y)? (x) : (y))
			
 
				+
			
 
				+void make_set() {
			
 
				+
			
 
				+//---------------------------------------------------------------------
			
 
				+//---------------------------------------------------------------------
			
 
				+
			
 
				+//---------------------------------------------------------------------
			
 
				+//     This function allocates space for a set of cells and fills the set
			
 
				+//     such that communication between cells on different nodes is only
			
 
				+//     nearest neighbor
			
 
				+//---------------------------------------------------------------------
			
 
				+
			
 
				+
			
 
				+      int p, i, j, c, dir, size, excess, ierr,ierrcode;
			
 
				+
			
 
				+//---------------------------------------------------------------------
			
 
				+//     compute square root; add small number to allow for roundoff
			
 
				+//     (note: this is computed in setup_mpi.f also, but prefer to do
			
 
				+//     it twice because of some include file problems).
			
 
				+//---------------------------------------------------------------------
			
 
				+      ncells = (int)(sqrt((double)(no_nodes) + 0.00001e0));
			
 
				+
			
 
				+//---------------------------------------------------------------------
			
 
				+//     this makes coding easier
			
 
				+//---------------------------------------------------------------------
			
 
				+      p = ncells;
			
 
				+      
			
 
				+//---------------------------------------------------------------------
			
 
				+//     determine the location of the cell at the bottom of the 3D 
			
 
				+//     array of cells
			
 
				+//---------------------------------------------------------------------
			
 
				+      cell_coord(1,1) = mod(node,p) ;
			
 
				+      cell_coord(2,1) = node/p ;
			
 
				+      cell_coord(3,1) = 0;
			
 
				+
			
 
				+//---------------------------------------------------------------------
			
 
				+//     set the cell_coords for cells in the rest of the z-layers; 
			
 
				+//     this comes down to a simple linear numbering in the z-direct-
			
 
				+//     ion, and to the doubly-cyclic numbering in the other dirs     
			
 
				+//---------------------------------------------------------------------
			
 
				+      for (c = 2; c <= p; c++) {
			
 
				+         cell_coord(1,c) = mod(cell_coord(1,c-1)+1,p) ;
			
 
				+         cell_coord(2,c) = mod(cell_coord(2,c-1)-1+p,p) ;
			
 
				+         cell_coord(3,c) = c-1;
			
 
				+      }
			
 
				+
			
 
				+//---------------------------------------------------------------------
			
 
				+//     offset all the coordinates by 1 to adjust for Fortran arrays
			
 
				+//---------------------------------------------------------------------
			
 
				+      for (dir = 1; dir <= 3; dir++) {
			
 
				+         for (c = 1; c <= p; c++) {
			
 
				+            cell_coord(dir,c) = cell_coord(dir,c) + 1;
			
 
				+         }
			
 
				+      }
			
 
				+      
			
 
				+//---------------------------------------------------------------------
			
 
				+//     slice(dir,n) contains the sequence number of the cell that is in
			
 
				+//     coordinate plane n in the dir direction
			
 
				+//---------------------------------------------------------------------
			
 
				+      for (dir = 1; dir <= 3; dir++) {
			
 
				+         for (c = 1; c <= p; c++) {
			
 
				+            slice(dir,cell_coord(dir,c)) = c;
			
 
				+         }
			
 
				+      }
			
 
				+
			
 
				+
			
 
				+//---------------------------------------------------------------------
			
 
				+//     fill the predecessor and successor entries, using the indices 
			
 
				+//     of the bottom cells (they are the same at each level of k 
			
 
				+//     anyway) acting as if full periodicity pertains; note that p is
			
 
				+//     added to those arguments to the mod functions that might
			
 
				+//     otherwise return wrong values when using the modulo function
			
 
				+//---------------------------------------------------------------------
			
 
				+      i = cell_coord(1,1)-1;
			
 
				+      j = cell_coord(2,1)-1;
			
 
				+
			
 
				+      predecessor(1) = mod(i-1+p,p) + p*j;
			
 
				+      predecessor(2) = i + p*mod(j-1+p,p);
			
 
				+      predecessor(3) = mod(i+1,p) + p*mod(j-1+p,p);
			
 
				+      successor(1)   = mod(i+1,p) + p*j;
			
 
				+      successor(2)   = i + p*mod(j+1,p);
			
 
				+      successor(3)   = mod(i-1+p,p) + p*mod(j+1,p);
			
 
				+
			
 
				+//---------------------------------------------------------------------
			
 
				+//     now compute the sizes of the cells                                
			
 
				+//---------------------------------------------------------------------
			
 
				+      for (dir = 1; dir <= 3; dir++) {
			
 
				+//---------------------------------------------------------------------
			
 
				+//     set cell_coord range for each direction                           
			
 
				+//---------------------------------------------------------------------
			
 
				+         size   = grid_points(dir)/p;
			
 
				+         excess = mod(grid_points(dir),p);
			
 
				+         for (c = 1; c <= ncells; c++) {
			
 
				+            if (cell_coord(dir,c) <= excess) {
			
 
				+               cell_size(dir,c) = size+1;
			
 
				+               cell_low(dir,c) = (cell_coord(dir,c)-1)*(size+1);
			
 
				+               cell_high(dir,c) = cell_low(dir,c)+size;
			
 
				+            } else {
			
 
				+               cell_size(dir,c) = size;
			
 
				+               cell_low(dir,c)  = excess*(size+1)+
			
 
				+                    (cell_coord(dir,c)-excess-1)*size;
			
 
				+               cell_high(dir,c) = cell_low(dir,c)+size-1;
			
 
				+            }
			
 
				+            if (cell_size(dir, c) <= 2) {
			
 
				+               printf(" Error: Cell size too small. Min size is 3\n");
			
 
				+               ierrcode = 1;
			
 
				+               exit(1);
			
 
				+            }
			
 
				+         }
			
 
				+      }
			
 
				+
			
 
				+      return;
			
 
				+}
			
 
				+
			
 
				+//---------------------------------------------------------------------
			
 
				+//---------------------------------------------------------------------
			
 
				+
			
 
				+
			
 
				+void make_color() {
			
 
				+
			
 
				+//---------------------------------------------------------------------
			
 
				+//---------------------------------------------------------------------
			
 
				+
			
 
				+//---------------------------------------------------------------------
			
 
				+//     This function determines cycles in the communication graphs in
			
 
				+//     the six coordinate directions, and colors the ranks so they know
			
 
				+//     how to construct deadlock-free blocking communication schedules
			
 
				+//---------------------------------------------------------------------
			
 
				+
			
 
				+      int p, i, j, dir, node_loc, comm_color, node_min, length, start_found;
			
 
				+
			
 
				+//---------------------------------------------------------------------
			
 
				+//     compute square root; add small number to allow for roundoff
			
 
				+//     (note: this is computed in setup_mpi.f also, but prefer to do
			
 
				+//     it twice because of some include file problems).
			
 
				+//---------------------------------------------------------------------
			
 
				+      ncells = (int)(sqrt((double)(no_nodes) + 0.00001e0));
			
 
				+
			
 
				+//---------------------------------------------------------------------
			
 
				+//     this makes coding easier
			
 
				+//---------------------------------------------------------------------
			
 
				+      p = ncells;
			
 
				+
			
 
				+      for (dir = 0; dir<6; dir++) {
			
 
				+
			
 
				+        node_loc = node_min = node; length = 1; start_found = 0;
			
 
				+        while (!start_found) {
			
 
				+          i = mod(node_loc,p) ;
			
 
				+          j = node_loc/p ;
			
 
				+
			
 
				+          switch (dir) {
			
 
				+            case (WESTDIR):   node_loc = mod(i-1+p,p) + p*j;          break;
			
 
				+            case (EASTDIR):   node_loc = mod(i+1,p) + p*j;            break;
			
 
				+            case (SOUTHDIR):  node_loc = i + p*mod(j-1+p,p);          break;
			
 
				+            case (NORTHDIR):  node_loc = i + p*mod(j+1,p);            break;
			
 
				+            case (BOTTOMDIR): node_loc = mod(i+1,p) + p*mod(j-1+p,p); break;
			
 
				+            case (TOPDIR):    node_loc = mod(i-1+p,p) + p*mod(j+1,p); break;
			
 
				+          }
			
 
				+
			
 
				+          // the next block ensures that the node with the lowest rank
			
 
				+          // in this cycle is colored WHITE (=0), and that nodes an even
			
 
				+          // number of jumps removed from that lowest-ranked member
			
 
				+          // are also white. The others are RED (1).
			
 
				+          if (node_loc <= node_min) {
			
 
				+            node_min = node_loc;
			
 
				+            comm_color = 0;
			
 
				+          } else comm_color = !comm_color;
			
 
				+          if (node_loc == node) start_found = 1;
			
 
				+          else length++;
			
 
				+        }
			
 
				+        send_color[dir] = comm_color;
			
 
				+        recv_color[dir] = !send_color[dir];
			
 
				+        // if the number of nodes in this cycle is odd, we need to treat the 
			
 
				+        // last node before the "start" of the cycle differently
			
 
				+        if (length%2) {
			
 
				+          if (node == node_min) recv_color[dir] = 2;
			
 
				+          i = mod(node,p) ;
			
 
				+          j = node/p ;
			
 
				+          switch (dir) {
			
 
				+            case (WESTDIR):   node_loc = mod(i-1+p,p) + p*j;          break;
			
 
				+            case (EASTDIR):   node_loc = mod(i+1,p) + p*j;            break;
			
 
				+            case (SOUTHDIR):  node_loc = i + p*mod(j-1+p,p);          break;
			
 
				+            case (NORTHDIR):  node_loc = i + p*mod(j+1,p);            break;
			
 
				+            case (BOTTOMDIR): node_loc = mod(i+1,p) + p*mod(j-1+p,p); break;
			
 
				+            case (TOPDIR):    node_loc = mod(i-1+p,p) + p*mod(j+1,p); break;
			
 
				+          }      
			
 
				+          if (node_loc == node_min) send_color[dir] = 2;
			
 
				+        }
			
 
				+      }
			
 
				+     return;
			
 
				+}
			
 
				+
			
 
				+//---------------------------------------------------------------------
			
 
				+//---------------------------------------------------------------------
			
 
				+
			
 
				+
			
--- a/RCCE_V2.0/apps/NPB/BT/mpinpb.h
+++ b/RCCE_V2.0/apps/NPB/BT/mpinpb.h
@@ -0,0 +1,34 @@
 
				+
			
 
				+//---------------------------------------------------------------------
			
 
				+// 
			
 
				+// Copyright 2010 Intel Corporation
			
 
				+// 
			
 
				+//    Licensed under the Apache License, Version 2.0 (the "License");
			
 
				+//    you may not use this file except in compliance with the License.
			
 
				+//    You may obtain a copy of the License at
			
 
				+// 
			
 
				+//        http://www.apache.org/licenses/LICENSE-2.0
			
 
				+// 
			
 
				+//    Unless required by applicable law or agreed to in writing, software
			
 
				+//    distributed under the License is distributed on an "AS IS" BASIS,
			
 
				+//    WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
			
 
				+//    See the License for the specific language governing permissions and
			
 
				+//    limitations under the License.
			
 
				+// 
			
 
				+//---------------------------------------------------------------------
			
 
				+#ifndef __MPINPB_H
			
 
				+#define __MPINPB_H
			
 
				+
			
 
				+#ifdef G_MAIN
			
 
				+       int           node, no_nodes, total_nodes, root;
			
 
				+       int           active;
			
 
				+#else
			
 
				+extern int           node, no_nodes, total_nodes, root;
			
 
				+extern int           active;
			
 
				+
			
 
				+#endif
			
 
				+#ifdef _OPENMP
			
 
				+#pragma omp threadprivate (node, no_nodes, total_nodes, root, active)
			
 
				+#endif
			
 
				+#endif
			
 
				+
			
--- a/RCCE_V2.0/apps/NPB/BT/print_results.c
+++ b/RCCE_V2.0/apps/NPB/BT/print_results.c
@@ -0,0 +1,104 @@
 
				+/*****************************************************************/
			
 
				+/******     C  _  P  R  I  N  T  _  R  E  S  U  L  T  S     ******/
			
 
				+/*****************************************************************/
			
 
				+// 
			
 
				+// Copyright 2010 Intel Corporation
			
 
				+// 
			
 
				+//    Licensed under the Apache License, Version 2.0 (the "License");
			
 
				+//    you may not use this file except in compliance with the License.
			
 
				+//    You may obtain a copy of the License at
			
 
				+// 
			
 
				+//        http://www.apache.org/licenses/LICENSE-2.0
			
 
				+// 
			
 
				+//    Unless required by applicable law or agreed to in writing, software
			
 
				+//    distributed under the License is distributed on an "AS IS" BASIS,
			
 
				+//    WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
			
 
				+//    See the License for the specific language governing permissions and
			
 
				+//    limitations under the License.
			
 
				+// 
			
 
				+#include <stdlib.h>
			
 
				+#include <stdio.h>
			
 
				+#define class _class_
			
 
				+
			
 
				+void print_results( char   *name,
			
 
				+                      char   class,
			
 
				+                      int    n1, 
			
 
				+                      int    n2,
			
 
				+                      int    n3,
			
 
				+                      int    niter,
			
 
				+                      int    nprocs_compiled,
			
 
				+                      int    nprocs_total,
			
 
				+                      double t,
			
 
				+                      double mops,
			
 
				+		      char   *optype,
			
 
				+                      int    passed_verification,
			
 
				+                      char   *npbversion,
			
 
				+                      char   *compiletime,
			
 
				+                      char   *mpicc,
			
 
				+                      char   *clink,
			
 
				+                      char   *cmpi_lib,
			
 
				+                      char   *cmpi_inc,
			
 
				+                      char   *cflags,
			
 
				+                      char   *clinkflags )
			
 
				+{
			
 
				+    char *evalue="1000";
			
 
				+
			
 
				+    printf( "\n\n %s Benchmark Completed\n", name ); 
			
 
				+
			
 
				+    printf( " Class           =                        %c\n", class );
			
 
				+
			
 
				+    printf( " Size            =            %3dx %3dx %3d\n", n1,n2,n3 );
			
 
				+
			
 
				+    printf( " Iterations      =             %12d\n", niter );
			
 
				+ 
			
 
				+    printf( " Time in seconds =             %12.2f\n", t );
			
 
				+
			
 
				+    printf( " Total processes =             %12d\n", nprocs_total );
			
 
				+
			
 
				+    if ( nprocs_compiled != 0 )
			
 
				+        printf( " Compiled procs  =             %12d\n", nprocs_compiled );
			
 
				+
			
 
				+    printf( " Mop/s total     =             %12.2f\n", mops );
			
 
				+
			
 
				+    printf( " Mop/s/process   =             %12.2f\n", mops/((float) nprocs_total) );
			
 
				+
			
 
				+    printf( " Operation type  = %24s\n", optype);
			
 
				+
			
 
				+    if( passed_verification )
			
 
				+        printf( " Verification    =               SUCCESSFUL\n" );
			
 
				+    else
			
 
				+        printf( " Verification    =             UNSUCCESSFUL\n" );
			
 
				+
			
 
				+    printf( " Version         =             %12s\n", npbversion );
			
 
				+
			
 
				+    printf( " Compile date    =             %12s\n", compiletime );
			
 
				+
			
 
				+    printf( "\n Compile options:\n" );
			
 
				+
			
 
				+    printf( "    MPICC        = %s\n", mpicc );
			
 
				+
			
 
				+    printf( "    CLINK        = %s\n", clink );
			
 
				+
			
 
				+    printf( "    CMPI_LIB     = %s\n", cmpi_lib );
			
 
				+
			
 
				+    printf( "    CMPI_INC     = %s\n", cmpi_inc );
			
 
				+
			
 
				+    printf( "    CFLAGS       = %s\n", cflags );
			
 
				+
			
 
				+    printf( "    CLINKFLAGS   = %s\n", clinkflags );
			
 
				+#ifdef SMP
			
 
				+    evalue = getenv("MP_SET_NUMTHREADS");
			
 
				+    printf( "   MULTICPUS = %s\n", evalue );
			
 
				+#endif
			
 
				+
			
 
				+    printf( "\n\n" );
			
 
				+    printf( " Please send the results of this run to:\n\n" );
			
 
				+    printf( " NPB Development Team\n" );
			
 
				+    printf( " Internet: npb@nas.nasa.gov\n \n" );
			
 
				+    printf( " If email is not available, send this to:\n\n" );
			
 
				+    printf( " MS T27A-1\n" );
			
 
				+    printf( " NASA Ames Research Center\n" );
			
 
				+    printf( " Moffett Field, CA  94035-1000\n\n" );
			
 
				+    printf( " Fax: 650-604-3957\n\n" );
			
 
				+}
			
 
				+ 
			
--- a/RCCE_V2.0/apps/NPB/BT/rhs.c
+++ b/RCCE_V2.0/apps/NPB/BT/rhs.c
		`@@ -0,0 +1 @@`
		`+keep link hpl->XHPL intact, it is necessary to build LINPACK.`