#!/usr/bin/env python3 # -*- coding: utf-8 -*- """ NBody Demonstrator implemented in OpenCL, rendering OpenGL By default, rendering in OpenGL is disabled. Add -g option to activate. Part of matrix programs from: https://forge.cbp.ens-lyon.fr/svn/bench4gpu/ CC BY-NC-SA 2011 : Emmanuel QUEMENER Cecill v2 : Emmanuel QUEMENER Thanks to Andreas Klockner for PyOpenCL: http://mathema.tician.de/software/pyopencl """ import getopt import sys import time import numpy as np import pyopencl as cl import pyopencl.array from numpy.random import randint as nprnd def DictionariesAPI(): Marsaglia = {"CONG": 0, "SHR3": 1, "MWC": 2, "KISS": 3} Computing = {"FP32": 0, "FP64": 1} Interaction = {"Force": 0, "Potential": 1} Artevasion = {"None": 0, "NegExp": 1, "CorRad": 2} return (Marsaglia, Computing, Interaction, Artevasion) BlobOpenCL = """ #define TFP32 0 #define TFP64 1 #define TFORCE 0 #define TPOTENTIAL 1 #define NONE 0 #define NEGEXP 1 #define CORRAD 2 #if TYPE == TFP32 #define MYFLOAT4 float4 #define MYFLOAT8 float8 #define MYFLOAT float #define DISTANCE fast_distance #else #define MYFLOAT4 double4 #define MYFLOAT8 double8 #define MYFLOAT double #define DISTANCE distance #if defined(cl_khr_fp64) // Khronos extension available? #pragma OPENCL EXTENSION cl_khr_fp64 : enable #endif #endif #define znew ((zmwc=36969*(zmwc&65535)+(zmwc>>16))<<16) #define wnew ((wmwc=18000*(wmwc&65535)+(wmwc>>16))&65535) #define MWC (znew+wnew) #define SHR3 (jsr=(jsr=(jsr=jsr^(jsr<<17))^(jsr>>13))^(jsr<<5)) #define CONG (jcong=69069*jcong+1234567) #define KISS ((MWC^CONG)+SHR3) #define MWCfp (MYFLOAT)(MWC * 2.3283064365386963e-10f) #define KISSfp (MYFLOAT)(KISS * 2.3283064365386963e-10f) #define SHR3fp (MYFLOAT)(SHR3 * 2.3283064365386963e-10f) #define CONGfp (MYFLOAT)(CONG * 2.3283064365386963e-10f) #define PI (MYFLOAT)3.141592653589793238e0f #define SMALL_NUM (MYFLOAT)1.e-9f #define CoreRadius (MYFLOAT)(1.e0f) // Create my own Distance implementation: distance buggy on Oland AMD chipset MYFLOAT MyDistance(MYFLOAT4 n,MYFLOAT4 m) { private MYFLOAT x2,y2,z2; x2=n.s0-m.s0; x2*=x2; y2=n.s1-m.s1; y2*=y2; z2=n.s2-m.s2; z2*=z2; return(sqrt(x2+y2+z2)); } // Potential between 2 m,n bodies MYFLOAT PairPotential(MYFLOAT4 m,MYFLOAT4 n) #if ARTEVASION == NEGEXP // Add exp(-r) to numerator to avoid divergence for low distances { MYFLOAT r=DISTANCE(n,m); return((-1.e0f+exp(-r))/r); } #elif ARTEVASION == CORRAD // Add Core Radius to avoid divergence for low distances { MYFLOAT r=DISTANCE(n,m); return(-1.e0f/sqrt(r*r+CoreRadius*CoreRadius)); } #else // Classical potential in 1/r { // return((MYFLOAT)(-1.e0f)/(MyDistance(m,n))); return((MYFLOAT)(-1.e0f)/(DISTANCE(n,m))); } #endif // Interaction based of Force as gradient of Potential MYFLOAT4 Interaction(MYFLOAT4 m,MYFLOAT4 n) #if INTERACTION == TFORCE #if ARTEVASION == NEGEXP // Force gradient of potential, set as (1-exp(-r))/r { private MYFLOAT r=MyDistance(n,m); private MYFLOAT num=1.e0f+exp(-r)*(r-1.e0f); return((n-m)*num/(MYFLOAT)(r*r*r)); } #elif ARTEVASION == CORRAD // Force gradient of potential, (Core Radius) set as 1/sqrt(r**2+CoreRadius**2) { private MYFLOAT r=MyDistance(n,m); private MYFLOAT den=sqrt(r*r+CoreRadius*CoreRadius); return((n-m)/(MYFLOAT)(den*den*den)); } #else // Simplest implementation of force (equals to acceleration) // seems to bo bad (numerous artevasions) // MYFLOAT4 InteractionForce(MYFLOAT4 m,MYFLOAT4 n) { private MYFLOAT r=MyDistance(n,m); return((n-m)/(MYFLOAT)(r*r*r)); } #endif #else // Force definited as gradient of potential // Estimate potential and proximate potential to estimate force { // 1/1024 seems to be a good factor: larger one provides bad results private MYFLOAT epsilon=(MYFLOAT)(1.e0f/1024); private MYFLOAT4 er=normalize(n-m); private MYFLOAT4 dr=er*(MYFLOAT)epsilon; return(er/epsilon*(PairPotential(m,n)-PairPotential(m+dr,n))); } #endif MYFLOAT AtomicPotential(__global MYFLOAT4* clDataX,int gid) { private MYFLOAT potential=(MYFLOAT)0.e0f; private MYFLOAT4 x=clDataX[gid]; for (int i=0;iRadius) { Position=(MYFLOAT4)((MWCfp-0.5e0f)*diameter,(MWCfp-0.5e0f)*diameter,(MWCfp-0.5e0f)*diameter,0.e0f); Length=(MYFLOAT)length((MYFLOAT4)Position); } clDataX[gid]=Position; barrier(CLK_GLOBAL_MEM_FENCE); } __kernel void InBoxSplutterPoints(__global MYFLOAT4* clDataX, MYFLOAT box, uint seed_z,uint seed_w) { int gid=get_global_id(0); uint zmwc=seed_z+gid; uint wmwc=seed_w-gid; private MYFLOAT Heat; for (int i=0;i Rotate around X axis") print("\t Rotate around Y axis") print("\t Rotate around Z axis") print("\t <-|+> Unzoom/Zoom") print("\t Toggle to display Positions or Velocities") print("\t Quit\n") wall_time_start = time.time() Durations = np.array([], dtype=MyFloat) print("Starting!") if OpenGL: import OpenGL.GL as gl import OpenGL.GLUT as glut global ViewRX, ViewRY, ViewRZ Iterations = 0 ViewRX, ViewRY, ViewRZ = 0.0, 0.0, 0.0 # Launch OpenGL Loop glut.glutInit(sys.argv) glut.glutInitDisplayMode(glut.GLUT_DOUBLE | glut.GLUT_RGB) glut.glutSetOption(glut.GLUT_ACTION_ON_WINDOW_CLOSE, glut.GLUT_ACTION_CONTINUE_EXECUTION) glut.glutInitWindowSize(512, 512) glut.glutCreateWindow(b"NBodyGL") setup_viewport() glut.glutReshapeFunc(reshape) glut.glutDisplayFunc(display) glut.glutIdleFunc(display) # glutMouseFunc(mouse) glut.glutSpecialFunc(special) glut.glutKeyboardFunc(keyboard) glut.glutMainLoop() else: for iteration in range(Iterations): Elapsed = MainOpenCL(clDataX, clDataV, Step, Method) if Verbose: # print("Duration of #%s iteration: %s" % (iteration,Elapsed)) cl.enqueue_copy(queue, MyDataX, clDataX) print("Positions for #%s iteration: %s" % (iteration, MyDataX)) else: sys.stdout.write(".") sys.stdout.flush() Durations = np.append(Durations, Elapsed) print("\nEnding!") MyRoutines.CenterOfMass(queue, (1, 1), None, clDataX, clCoM, np.int32(Number)) CLLaunch = MyRoutines.Potential(queue, (Number, 1), None, clDataX, clPotential) CLLaunch = MyRoutines.Kinetic(queue, (Number, 1), None, clDataV, clKinetic) CLLaunch.wait() cl.enqueue_copy(queue, MyCoM, clCoM) cl.enqueue_copy(queue, MyPotential, clPotential) cl.enqueue_copy(queue, MyKinetic, clKinetic) print("\nCenter Of Mass estimated: (%s,%s,%s)" % (MyCoM[0], MyCoM[1], MyCoM[2])) print( "Energy estimated: Viriel=%s Potential=%s Kinetic=%s\n" % ( np.sum(MyPotential) + 2.0 * np.sum(MyKinetic), np.sum(MyPotential), np.sum(MyKinetic), ) ) print( "Duration stats on device %s with %s iterations :\n\tMean:\t%s\n\tMedian:\t%s\n\tStddev:\t%s\n\tMin:\t%s\n\tMax:\t%s\n\n\tVariability:\t%s\n" # noqa: E501 % ( Device, Iterations, np.mean(Durations), np.median(Durations), np.std(Durations), np.min(Durations), np.max(Durations), np.std(Durations) / np.median(Durations), ) ) # FPS: 1/Elapsed FPS = np.ones(len(Durations)) FPS /= Durations print( "FPS stats on device %s with %s iterations :\n\tMean:\t%s\n\tMedian:\t%s\n\tStddev:\t%s\n\tMin:\t%s\n\tMax:\t%s\n" # noqa: E501 % ( Device, Iterations, np.mean(FPS), np.median(FPS), np.std(FPS), np.min(FPS), np.max(FPS), ) ) # Contraction of Square*Size*Hertz: Size*Size/Elapsed Squertz = np.ones(len(Durations)) Squertz *= Number * Number Squertz /= Durations print( "Squertz in log10 & complete stats on device %s with %s iterations :\n\tMean:\t%s\t%s\n\tMedian:\t%s\t%s\n\tStddev:\t%s\t%s\n\tMin:\t%s\t%s\n\tMax:\t%s\t%s\n" # noqa: E501 % ( Device, Iterations, np.log10(np.mean(Squertz)), np.mean(Squertz), np.log10(np.median(Squertz)), np.median(Squertz), np.log10(np.std(Squertz)), np.std(Squertz), np.log10(np.min(Squertz)), np.min(Squertz), np.log10(np.max(Squertz)), np.max(Squertz), ) ) clDataX.release() clDataV.release() clKinetic.release() clPotential.release()