#!/usr/bin/env python3 # # TrouNoir model using PyOpenCL or PyCUDA # # CC BY-NC-SA 2019 : <emmanuel.quemener@ens-lyon.fr> # # Part of matrix programs from: https://forge.cbp.ens-lyon.fr/svn/bench4gpu/ # # Thanks to Andreas Klockner for PyOpenCL and PyCUDA: # http://mathema.tician.de/software/pyopencl # # Original code programmed in Fortran 77 in mars 1994 # for Practical Work of Numerical Simulation # DEA (old Master2) in astrophysics and spatial techniques in Meudon # by Herve Aussel & Emmanuel Quemener # # Conversion in C done by Emmanuel Quemener in august 1997 # GPUfication in OpenCL under Python in july 2019 # GPUfication in CUDA under Python in august 2019 # # Thanks to : # # - Herve Aussel for his part of code of black body spectrum # - Didier Pelat for his help to perform this work # - Jean-Pierre Luminet for his article published in 1979 # - Numerical Recipies for Runge Kutta recipies # - Luc Blanchet for his disponibility about my questions in General Relativity # - Pierre Lena for his passion about science and vulgarisation # If crash on OpenCL Intel implementation, add following options and force #export PYOPENCL_COMPILER_OUTPUT=1 #export CL_CONFIG_USE_VECTORIZER=True #export CL_CONFIG_CPU_VECTORIZER_MODE=16 import pyopencl as cl import numpy import time,string from numpy.random import randint as nprnd import sys import getopt from socket import gethostname def DictionariesAPI(): PhysicsList={'Einstein':0,'Newton':1} return(PhysicsList) # # Blank space below to simplify debugging on OpenCL code # BlobOpenCL= """ #define PI (float)3.14159265359e0f #define nbr 256 #define EINSTEIN 0 #define NEWTON 1 #ifdef SETTRACKPOINTS #define TRACKPOINTS SETTRACKPOINTS #else #define TRACKPOINTS 2048 #endif float atanp(float x,float y) { float angle; angle=atan2(y,x); if (angle<0.e0f) { angle+=(float)2.e0f*PI; } return angle; } float f(float v) { return v; } #if PHYSICS == NEWTON float g(float u,float m,float b) { return (-u); } #else float g(float u,float m,float b) { return (3.e0f*m/b*pow(u,2)-u); } #endif void calcul(float *us,float *vs,float up,float vp, float h,float m,float b) { float c0,c1,c2,c3,d0,d1,d2,d3; c0=h*f(vp); c1=h*f(vp+c0/2.e0f); c2=h*f(vp+c1/2.e0f); c3=h*f(vp+c2); d0=h*g(up,m,b); d1=h*g(up+d0/2.e0f,m,b); d2=h*g(up+d1/2.e0f,m,b); d3=h*g(up+d2,m,b); *us=up+(c0+2.e0f*c1+2.e0f*c2+c3)/6.e0f; *vs=vp+(d0+2.e0f*d1+2.e0f*d2+d3)/6.e0f; } void rungekutta(float *ps,float *us,float *vs, float pp,float up,float vp, float h,float m,float b) { calcul(us,vs,up,vp,h,m,b); *ps=pp+h; } float decalage_spectral(float r,float b,float phi, float tho,float m) { return (sqrt(1-3*m/r)/(1+sqrt(m/pow(r,3))*b*sin(tho)*sin(phi))); } float spectre(float rf,int q,float b,float db, float h,float r,float m,float bss) { float flx; // flx=exp(q*log(r/m))*pow(rf,4)*b*db*h; flx=exp(q*log(r/m)+4.e0f*log(rf))*b*db*h; return(flx); } float spectre_cn(float rf32,float b32,float db32, float h32,float r32,float m32,float bss32) { #define MYFLOAT float MYFLOAT rf=(MYFLOAT)(rf32); MYFLOAT b=(MYFLOAT)(b32); MYFLOAT db=(MYFLOAT)(db32); MYFLOAT h=(MYFLOAT)(h32); MYFLOAT r=(MYFLOAT)(r32); MYFLOAT m=(MYFLOAT)(m32); MYFLOAT bss=(MYFLOAT)(bss32); MYFLOAT flx; MYFLOAT nu_rec,nu_em,qu,temp_em,flux_int; int fi,posfreq; #define planck 6.62e-34f #define k 1.38e-23f #define c2 9.e16f #define temp 3.e7f #define m_point 1.e0f #define lplanck (log(6.62e0f)-34.e0f*log(10.e0f)) #define lk (log(1.38e0f)-23.e0f*log(10.e0f)) #define lc2 (log(9.e0f)+16.e0f*log(10.e0f)) MYFLOAT v=1.e0f-3.e0f/r; qu=1.e0f/sqrt((1.e0f-3.e0f/r)*r)*(sqrt(r)-sqrt(6.e0f)+sqrt(3.e0f)/2.e0f*log((sqrt(r)+sqrt(3.e0f))/(sqrt(r)-sqrt(3.e0f))* 0.17157287525380988e0f )); temp_em=temp*sqrt(m)*exp(0.25e0f*log(m_point)-0.75e0f*log(r)-0.125e0f*log(v)+0.25e0f*log(fabs(qu))); flux_int=0.e0f; flx=0.e0f; for (fi=0;fi<nbr;fi++) { nu_em=bss*(MYFLOAT)fi/(MYFLOAT)nbr; nu_rec=nu_em*rf; posfreq=(int)(nu_rec*(MYFLOAT)nbr/bss); if ((posfreq>0)&&(posfreq<nbr)) { // Initial version // flux_int=2.*planck/c2*pow(nu_em,3)/(exp(planck*nu_em/(k*temp_em))-1.); // Version with log used //flux_int=2.*exp(lplanck-lc2+3.*log(nu_em))/(exp(exp(lplanck-lk+log(nu_em/temp_em)))-1.); // flux_int*=pow(rf,3)*b*db*h; //flux_int*=exp(3.e0f*log(rf))*b*db*h; flux_int=2.e0f*exp(lplanck-lc2+3.e0f*log(nu_em))/(exp(exp(lplanck-lk+log(nu_em/temp_em)))-1.e0f)*exp(3.e0f*log(rf))*b*db*h; flx+=flux_int; } } return((float)(flx)); } void impact(float phi,float r,float b,float tho,float m, float *zp,float *fp, int q,float db, float h,int raie) { float flx,rf,bss; rf=decalage_spectral(r,b,phi,tho,m); if (raie==0) { bss=1.e19f; flx=spectre_cn(rf,b,db,h,r,m,bss); } else { bss=2.e0f; flx=spectre(rf,q,b,db,h,r,m,bss); } *zp=1.e0f/rf; *fp=flx; } __kernel void EachPixel(__global float *zImage,__global float *fImage, float Mass,float InternalRadius, float ExternalRadius,float Angle, int Line) { uint xi=(uint)get_global_id(0); uint yi=(uint)get_global_id(1); uint sizex=(uint)get_global_size(0); uint sizey=(uint)get_global_size(1); // Perform trajectory for each pixel, exit on hit float m,rs,ri,re,tho; int q,raie; m=Mass; rs=2.e0f*m; ri=InternalRadius; re=ExternalRadius; tho=Angle; q=-2; raie=Line; float bmx,db,b,h; float rp0,rps; float phi,phd; uint nh=0; float zp=0.e0f,fp=0.e0f; // Autosize for image bmx=1.25e0f*re; h=4.e0f*PI/(float)TRACKPOINTS; // set origin as center of image float x=(float)xi-(float)(sizex/2)+(float)5.e-1f; float y=(float)yi-(float)(sizey/2)+(float)5.e-1f; // angle extracted from cylindric symmetry phi=atanp(x,y); phd=atanp(cos(phi)*sin(tho),cos(tho)); float up,vp,pp,us,vs,ps; // impact parameter b=sqrt(x*x+y*y)*(float)2.e0f/(float)sizex*bmx; // step of impact parameter; db=bmx/(float)(sizex); up=0.e0f; vp=1.e0f; pp=0.e0f; rungekutta(&ps,&us,&vs,pp,up,vp,h,m,b); rps=fabs(b/us); rp0=rps; int ExitOnImpact=0; do { nh++; pp=ps; up=us; vp=vs; rungekutta(&ps,&us,&vs,pp,up,vp,h,m,b); rps=fabs(b/us); ExitOnImpact = ((fmod(pp,PI)<fmod(phd,PI))&&(fmod(ps,PI)>fmod(phd,PI)))&&(rps>=ri)&&(rps<=re)?1:0; } while ((rps>=rs)&&(rps<=rp0)&&(ExitOnImpact==0)&&(nh<TRACKPOINTS)); if (ExitOnImpact==1) { impact(phi,rps,b,tho,m,&zp,&fp,q,db,h,raie); } else { zp=0.e0f; fp=0.e0f; } barrier(CLK_GLOBAL_MEM_FENCE); zImage[yi+sizex*xi]=(float)zp; fImage[yi+sizex*xi]=(float)fp; } __kernel void Pixel(__global float *zImage,__global float *fImage, __global float *Trajectories,__global int *IdLast, uint ImpactParameter, float Mass,float InternalRadius, float ExternalRadius,float Angle, int Line) { uint xi=(uint)get_global_id(0); uint yi=(uint)get_global_id(1); uint sizex=(uint)get_global_size(0); uint sizey=(uint)get_global_size(1); // Perform trajectory for each pixel float m,ri,re,tho; int q,raie; m=Mass; ri=InternalRadius; re=ExternalRadius; tho=Angle; q=-2; raie=Line; float bmx,db,b,h; float phi,phd,php,nr,r; float zp=0.e0f,fp=0.e0f; // Autosize for image, 25% greater than external radius bmx=1.25e0f*re; // Angular step of integration h=4.e0f*PI/(float)TRACKPOINTS; // Step of Impact Parameter db=bmx/(2.e0f*(float)ImpactParameter); // set origin as center of image float x=(float)xi-(float)(sizex/2)+(float)5.e-1f; float y=(float)yi-(float)(sizey/2)+(float)5.e-1f; // angle extracted from cylindric symmetry phi=atanp(x,y); phd=atanp(cos(phi)*sin(tho),cos(tho)); // Real Impact Parameter b=sqrt(x*x+y*y)*bmx/(float)ImpactParameter; // Integer Impact Parameter uint bi=(uint)sqrt(x*x+y*y); int HalfLap=0,ExitOnImpact=0,ni; if (bi<ImpactParameter) { do { php=phd+(float)HalfLap*PI; nr=php/h; ni=(int)nr; if (ni<IdLast[bi]) { r=(Trajectories[bi*TRACKPOINTS+ni+1]-Trajectories[bi*TRACKPOINTS+ni])*(nr-ni*1.e0f)+Trajectories[bi*TRACKPOINTS+ni]; } else { r=Trajectories[bi*TRACKPOINTS+ni]; } if ((r<=re)&&(r>=ri)) { ExitOnImpact=1; impact(phi,r,b,tho,m,&zp,&fp,q,db,h,raie); } HalfLap++; } while ((HalfLap<=2)&&(ExitOnImpact==0)); } barrier(CLK_GLOBAL_MEM_FENCE); zImage[yi+sizex*xi]=zp; fImage[yi+sizex*xi]=fp; } __kernel void Circle(__global float *Trajectories,__global int *IdLast, __global float *zImage,__global float *fImage, float Mass,float InternalRadius, float ExternalRadius,float Angle, int Line) { // Integer Impact Parameter ID int bi=get_global_id(0); // Integer points on circle int i=get_global_id(1); // Integer Impact Parameter Size (half of image) int bmaxi=get_global_size(0); // Integer Points on circle int imx=get_global_size(1); // Perform trajectory for each pixel float m,ri,re,tho; int q,raie; m=Mass; ri=InternalRadius; re=ExternalRadius; tho=Angle; raie=Line; float bmx,db,b,h; float phi,phd; float zp=0.e0f,fp=0.e0f; // Autosize for image bmx=1.25e0f*re; // Angular step of integration h=4.e0f*PI/(float)TRACKPOINTS; // impact parameter b=(float)bi/(float)bmaxi*bmx; db=bmx/(2.e0f*(float)bmaxi); phi=2.e0f*PI/(float)imx*(float)i; phd=atanp(cos(phi)*sin(tho),cos(tho)); int yi=(int)((float)bi*sin(phi))+bmaxi; int xi=(int)((float)bi*cos(phi))+bmaxi; int HalfLap=0,ExitOnImpact=0,ni; float php,nr,r; do { php=phd+(float)HalfLap*PI; nr=php/h; ni=(int)nr; if (ni<IdLast[bi]) { r=(Trajectories[bi*TRACKPOINTS+ni+1]-Trajectories[bi*TRACKPOINTS+ni])*(nr-ni*1.e0f)+Trajectories[bi*TRACKPOINTS+ni]; } else { r=Trajectories[bi*TRACKPOINTS+ni]; } if ((r<=re)&&(r>=ri)) { ExitOnImpact=1; impact(phi,r,b,tho,m,&zp,&fp,q,db,h,raie); } HalfLap++; } while ((HalfLap<=2)&&(ExitOnImpact==0)); zImage[yi+2*bmaxi*xi]=zp; fImage[yi+2*bmaxi*xi]=fp; barrier(CLK_GLOBAL_MEM_FENCE); } __kernel void Trajectory(__global float *Trajectories,__global int *IdLast, float Mass,float InternalRadius, float ExternalRadius,float Angle, int Line) { // Integer Impact Parameter ID int bi=get_global_id(0); // Integer Impact Parameter Size (half of image) int bmaxi=get_global_size(0); // Perform trajectory for each pixel float m,rs,re; m=Mass; rs=2.e0f*m; re=ExternalRadius; float bmx,b,h; int nh; // Autosize for image bmx=1.25e0f*re; // Angular step of integration h=4.e0f*PI/(float)TRACKPOINTS; // impact parameter b=(float)bi/(float)bmaxi*bmx; float up,vp,pp,us,vs,ps; up=0.e0f; vp=1.e0f; pp=0.e0f; nh=0; rungekutta(&ps,&us,&vs,pp,up,vp,h,m,b); // b versus us float bvus=fabs(b/us); float bvus0=bvus; Trajectories[bi*TRACKPOINTS+nh]=bvus; do { nh++; pp=ps; up=us; vp=vs; rungekutta(&ps,&us,&vs,pp,up,vp,h,m,b); bvus=fabs(b/us); Trajectories[bi*TRACKPOINTS+nh]=bvus; } while ((bvus>=rs)&&(bvus<=bvus0)); IdLast[bi]=nh; barrier(CLK_GLOBAL_MEM_FENCE); } __kernel void EachCircle(__global float *zImage,__global float *fImage, float Mass,float InternalRadius, float ExternalRadius,float Angle, int Line) { // Integer Impact Parameter ID uint bi=(uint)get_global_id(0); // Integer Impact Parameter Size (half of image) uint bmaxi=(uint)get_global_size(0); private float Trajectory[TRACKPOINTS]; float m,rs,ri,re,tho; int raie,q; m=Mass; rs=2.e0f*m; ri=InternalRadius; re=ExternalRadius; tho=Angle; q=-2; raie=Line; float bmx,db,b,h; uint nh; // Autosize for image bmx=1.25e0f*re; // Angular step of integration h=4.e0f*PI/(float)TRACKPOINTS; // impact parameter b=(float)bi/(float)bmaxi*bmx; db=bmx/(2.e0f*(float)bmaxi); float up,vp,pp,us,vs,ps; up=0.e0f; vp=1.e0f; pp=0.e0f; nh=0; rungekutta(&ps,&us,&vs,pp,up,vp,h,m,b); // b versus us float bvus=fabs(b/us); float bvus0=bvus; Trajectory[nh]=bvus; do { nh++; pp=ps; up=us; vp=vs; rungekutta(&ps,&us,&vs,pp,up,vp,h,m,b); bvus=(float)fabs(b/us); Trajectory[nh]=bvus; } while ((bvus>=rs)&&(bvus<=bvus0)); for (uint i=(uint)nh+1;i<TRACKPOINTS;i++) { Trajectory[i]=0.e0f; } uint imx=(uint)(16*bi); for (uint i=0;i<imx;i++) { float zp=0.e0f,fp=0.e0f; float phi=2.e0f*PI/(float)imx*(float)i; float phd=atanp(cos(phi)*sin(tho),cos(tho)); uint yi=(uint)((float)bi*sin(phi)+bmaxi); uint xi=(uint)((float)bi*cos(phi)+bmaxi); uint HalfLap=0,ExitOnImpact=0,ni; float php,nr,r; do { php=phd+(float)HalfLap*PI; nr=php/h; ni=(int)nr; if (ni<nh) { r=(Trajectory[ni+1]-Trajectory[ni])*(nr-ni*1.e0f)+Trajectory[ni]; } else { r=Trajectory[ni]; } if ((r<=re)&&(r>=ri)) { ExitOnImpact=1; impact(phi,r,b,tho,m,&zp,&fp,q,db,h,raie); } HalfLap++; } while ((HalfLap<=2)&&(ExitOnImpact==0)); zImage[yi+2*bmaxi*xi]=zp; fImage[yi+2*bmaxi*xi]=fp; } barrier(CLK_GLOBAL_MEM_FENCE); } __kernel void Original(__global float *zImage,__global float *fImage, uint Size,float Mass,float InternalRadius, float ExternalRadius,float Angle, int Line) { // Integer Impact Parameter Size (half of image) uint bmaxi=(uint)Size; float Trajectory[TRACKPOINTS]; // Perform trajectory for each pixel float m,rs,ri,re,tho; int raie,q; m=Mass; rs=2.e0f*m; ri=InternalRadius; re=ExternalRadius; tho=Angle; q=-2; raie=Line; float bmx,db,b,h; uint nh; // Autosize for image bmx=1.25e0f*re; // Angular step of integration h=4.e0f*PI/(float)TRACKPOINTS; // Integer Impact Parameter ID for (int bi=0;bi<bmaxi;bi++) { // impact parameter b=(float)bi/(float)bmaxi*bmx; db=bmx/(2.e0f*(float)bmaxi); float up,vp,pp,us,vs,ps; up=0.e0f; vp=1.e0f; pp=0.e0f; nh=0; rungekutta(&ps,&us,&vs,pp,up,vp,h,m,b); // b versus us float bvus=fabs(b/us); float bvus0=bvus; Trajectory[nh]=bvus; do { nh++; pp=ps; up=us; vp=vs; rungekutta(&ps,&us,&vs,pp,up,vp,h,m,b); bvus=fabs(b/us); Trajectory[nh]=bvus; } while ((bvus>=rs)&&(bvus<=bvus0)); for (uint i=(uint)nh+1;i<TRACKPOINTS;i++) { Trajectory[i]=0.e0f; } int imx=(int)(16*bi); for (int i=0;i<imx;i++) { float zp=0.e0f,fp=0.e0f; float phi=2.e0f*PI/(float)imx*(float)i; float phd=atanp(cos(phi)*sin(tho),cos(tho)); uint yi=(uint)((float)bi*sin(phi)+bmaxi); uint xi=(uint)((float)bi*cos(phi)+bmaxi); uint HalfLap=0,ExitOnImpact=0,ni; float php,nr,r; do { php=phd+(float)HalfLap*PI; nr=php/h; ni=(int)nr; if (ni<nh) { r=(Trajectory[ni+1]-Trajectory[ni])*(nr-ni*1.e0f)+Trajectory[ni]; } else { r=Trajectory[ni]; } if ((r<=re)&&(r>=ri)) { ExitOnImpact=1; impact(phi,r,b,tho,m,&zp,&fp,q,db,h,raie); } HalfLap++; } while ((HalfLap<=2)&&(ExitOnImpact==0)); zImage[yi+2*bmaxi*xi]=zp; fImage[yi+2*bmaxi*xi]=fp; } } barrier(CLK_GLOBAL_MEM_FENCE); } """ def KernelCodeCuda(): BlobCUDA= """ #define PI (float)3.14159265359 #define nbr 256 #define EINSTEIN 0 #define NEWTON 1 #ifdef SETTRACKPOINTS #define TRACKPOINTS SETTRACKPOINTS #else #define TRACKPOINTS #endif __device__ float nothing(float x) { return(x); } __device__ float atanp(float x,float y) { float angle; angle=atan2(y,x); if (angle<0.e0f) { angle+=(float)2.e0f*PI; } return(angle); } __device__ float f(float v) { return(v); } #if PHYSICS == NEWTON __device__ float g(float u,float m,float b) { return (-u); } #else __device__ float g(float u,float m,float b) { return (3.e0f*m/b*pow(u,2)-u); } #endif __device__ void calcul(float *us,float *vs,float up,float vp, float h,float m,float b) { float c0,c1,c2,c3,d0,d1,d2,d3; c0=h*f(vp); c1=h*f(vp+c0/2.); c2=h*f(vp+c1/2.); c3=h*f(vp+c2); d0=h*g(up,m,b); d1=h*g(up+d0/2.,m,b); d2=h*g(up+d1/2.,m,b); d3=h*g(up+d2,m,b); *us=up+(c0+2.*c1+2.*c2+c3)/6.; *vs=vp+(d0+2.*d1+2.*d2+d3)/6.; } __device__ void rungekutta(float *ps,float *us,float *vs, float pp,float up,float vp, float h,float m,float b) { calcul(us,vs,up,vp,h,m,b); *ps=pp+h; } __device__ float decalage_spectral(float r,float b,float phi, float tho,float m) { return (sqrt(1-3*m/r)/(1+sqrt(m/pow(r,3))*b*sin(tho)*sin(phi))); } __device__ float spectre(float rf,int q,float b,float db, float h,float r,float m,float bss) { float flx; // flx=exp(q*log(r/m))*pow(rf,4)*b*db*h; flx=exp(q*log(r/m)+4.*log(rf))*b*db*h; return(flx); } __device__ float spectre_cn(float rf32,float b32,float db32, float h32,float r32,float m32,float bss32) { #define MYFLOAT float MYFLOAT rf=(MYFLOAT)(rf32); MYFLOAT b=(MYFLOAT)(b32); MYFLOAT db=(MYFLOAT)(db32); MYFLOAT h=(MYFLOAT)(h32); MYFLOAT r=(MYFLOAT)(r32); MYFLOAT m=(MYFLOAT)(m32); MYFLOAT bss=(MYFLOAT)(bss32); MYFLOAT flx; MYFLOAT nu_rec,nu_em,qu,temp_em,flux_int; int fi,posfreq; #define planck 6.62e-34 #define k 1.38e-23 #define c2 9.e16 #define temp 3.e7 #define m_point 1. #define lplanck (log(6.62)-34.*log(10.)) #define lk (log(1.38)-23.*log(10.)) #define lc2 (log(9.)+16.*log(10.)) MYFLOAT v=1.-3./r; qu=1./sqrt((1.-3./r)*r)*(sqrt(r)-sqrt(6.)+sqrt(3.)/2.*log((sqrt(r)+sqrt(3.))/(sqrt(r)-sqrt(3.))* 0.17157287525380988 )); temp_em=temp*sqrt(m)*exp(0.25*log(m_point)-0.75*log(r)-0.125*log(v)+0.25*log(fabs(qu))); flux_int=0.; flx=0.; for (fi=0;fi<nbr;fi++) { nu_em=bss*(MYFLOAT)fi/(MYFLOAT)nbr; nu_rec=nu_em*rf; posfreq=(int)(nu_rec*(MYFLOAT)nbr/bss); if ((posfreq>0)&&(posfreq<nbr)) { // Initial version // flux_int=2.*planck/c2*pow(nu_em,3)/(exp(planck*nu_em/(k*temp_em))-1.); // Version with log used //flux_int=2.*exp(lplanck-lc2+3.*log(nu_em))/(exp(exp(lplanck-lk+log(nu_em/temp_em)))-1.); // flux_int*=pow(rf,3)*b*db*h; //flux_int*=exp(3.*log(rf))*b*db*h; flux_int=2.*exp(lplanck-lc2+3.*log(nu_em))/(exp(exp(lplanck-lk+log(nu_em/temp_em)))-1.)*exp(3.*log(rf))*b*db*h; flx+=flux_int; } } return((float)(flx)); } __device__ void impact(float phi,float r,float b,float tho,float m, float *zp,float *fp, int q,float db, float h,int raie) { float flx,rf,bss; rf=decalage_spectral(r,b,phi,tho,m); if (raie==0) { bss=1.e19; flx=spectre_cn(rf,b,db,h,r,m,bss); } else { bss=2.; flx=spectre(rf,q,b,db,h,r,m,bss); } *zp=1./rf; *fp=flx; } __global__ void EachPixel(float *zImage,float *fImage, float Mass,float InternalRadius, float ExternalRadius,float Angle, int Line) { uint xi=(uint)(blockIdx.x*blockDim.x+threadIdx.x); uint yi=(uint)(blockIdx.y*blockDim.y+threadIdx.y); uint sizex=(uint)gridDim.x*blockDim.x; uint sizey=(uint)gridDim.y*blockDim.y; // Perform trajectory for each pixel, exit on hit float m,rs,ri,re,tho; int q,raie; m=Mass; rs=2.*m; ri=InternalRadius; re=ExternalRadius; tho=Angle; q=-2; raie=Line; float bmx,db,b,h; float rp0,rpp,rps; float phi,phd; int nh; float zp,fp; // Autosize for image bmx=1.25*re; b=0.; h=4.e0f*PI/(float)TRACKPOINTS; // set origin as center of image float x=(float)xi-(float)(sizex/2)+(float)5e-1f; float y=(float)yi-(float)(sizey/2)+(float)5e-1f; // angle extracted from cylindric symmetry phi=atanp(x,y); phd=atanp(cos(phi)*sin(tho),cos(tho)); float up,vp,pp,us,vs,ps; // impact parameter b=sqrt(x*x+y*y)*(float)2.e0f/(float)sizex*bmx; // step of impact parameter; // db=bmx/(float)(sizex/2); db=bmx/(float)(sizex); up=0.; vp=1.; pp=0.; nh=0; rungekutta(&ps,&us,&vs,pp,up,vp,h,m,b); rps=fabs(b/us); rp0=rps; int ExitOnImpact=0; do { nh++; pp=ps; up=us; vp=vs; rungekutta(&ps,&us,&vs,pp,up,vp,h,m,b); rpp=rps; rps=fabs(b/us); ExitOnImpact = ((fmod(pp,PI)<fmod(phd,PI))&&(fmod(ps,PI)>fmod(phd,PI)))&&(rps>ri)&&(rps<re)?1:0; } while ((rps>=rs)&&(rps<=rp0)&&(ExitOnImpact==0)); if (ExitOnImpact==1) { impact(phi,rpp,b,tho,m,&zp,&fp,q,db,h,raie); } else { zp=0.e0f; fp=0.e0f; } __syncthreads(); zImage[yi+sizex*xi]=(float)zp; fImage[yi+sizex*xi]=(float)fp; } __global__ void Pixel(float *zImage,float *fImage, float *Trajectories,int *IdLast, uint ImpactParameter, float Mass,float InternalRadius, float ExternalRadius,float Angle, int Line) { uint xi=(uint)(blockIdx.x*blockDim.x+threadIdx.x); uint yi=(uint)(blockIdx.y*blockDim.y+threadIdx.y); uint sizex=(uint)gridDim.x*blockDim.x; uint sizey=(uint)gridDim.y*blockDim.y; // Perform trajectory for each pixel float m,ri,re,tho; int q,raie; m=Mass; ri=InternalRadius; re=ExternalRadius; tho=Angle; q=-2; raie=Line; float bmx,db,b,h; float phi,phd,php,nr,r; float zp=0,fp=0; // Autosize for image, 25% greater than external radius bmx=1.25e0f*re; // Angular step of integration h=4.e0f*PI/(float)TRACKPOINTS; // Step of Impact Parameter db=bmx/(2.e0f*(float)ImpactParameter); // set origin as center of image float x=(float)xi-(float)(sizex/2)+(float)5e-1f; float y=(float)yi-(float)(sizey/2)+(float)5e-1f; // angle extracted from cylindric symmetry phi=atanp(x,y); phd=atanp(cos(phi)*sin(tho),cos(tho)); // Real Impact Parameter b=sqrt(x*x+y*y)*bmx/(float)ImpactParameter; // Integer Impact Parameter uint bi=(uint)sqrt(x*x+y*y); int HalfLap=0,ExitOnImpact=0,ni; if (bi<ImpactParameter) { do { php=phd+(float)HalfLap*PI; nr=php/h; ni=(int)nr; if (ni<IdLast[bi]) { r=(Trajectories[bi*TRACKPOINTS+ni+1]-Trajectories[bi*TRACKPOINTS+ni])*(nr-ni*1.e0f)+Trajectories[bi*TRACKPOINTS+ni]; } else { r=Trajectories[bi*TRACKPOINTS+ni]; } if ((r<=re)&&(r>=ri)) { ExitOnImpact=1; impact(phi,r,b,tho,m,&zp,&fp,q,db,h,raie); } HalfLap++; } while ((HalfLap<=2)&&(ExitOnImpact==0)); } zImage[yi+sizex*xi]=zp; fImage[yi+sizex*xi]=fp; } __global__ void Circle(float *Trajectories,int *IdLast, float *zImage,float *fImage, float Mass,float InternalRadius, float ExternalRadius,float Angle, int Line) { // Integer Impact Parameter ID int bi=blockIdx.x*blockDim.x+threadIdx.x; // Integer points on circle int i=blockIdx.y*blockDim.y+threadIdx.y; // Integer Impact Parameter Size (half of image) int bmaxi=gridDim.x*blockDim.x; // Integer Points on circle int imx=gridDim.y*blockDim.y; // Perform trajectory for each pixel float m,ri,re,tho; int q,raie; m=Mass; ri=InternalRadius; re=ExternalRadius; tho=Angle; raie=Line; float bmx,db,b,h; float phi,phd; float zp=0,fp=0; // Autosize for image bmx=1.25e0f*re; // Angular step of integration h=4.e0f*PI/(float)TRACKPOINTS; // impact parameter b=(float)bi/(float)bmaxi*bmx; db=bmx/(2.e0f*(float)bmaxi); phi=2.e0f*PI/(float)imx*(float)i; phd=atanp(cos(phi)*sin(tho),cos(tho)); int yi=(int)((float)bi*sin(phi))+bmaxi; int xi=(int)((float)bi*cos(phi))+bmaxi; int HalfLap=0,ExitOnImpact=0,ni; float php,nr,r; do { php=phd+(float)HalfLap*PI; nr=php/h; ni=(int)nr; if (ni<IdLast[bi]) { r=(Trajectories[bi*TRACKPOINTS+ni+1]-Trajectories[bi*TRACKPOINTS+ni])*(nr-ni*1.e0f)+Trajectories[bi*TRACKPOINTS+ni]; } else { r=Trajectories[bi*TRACKPOINTS+ni]; } if ((r<=re)&&(r>=ri)) { ExitOnImpact=1; impact(phi,r,b,tho,m,&zp,&fp,q,db,h,raie); } HalfLap++; } while ((HalfLap<=2)&&(ExitOnImpact==0)); zImage[yi+2*bmaxi*xi]=zp; fImage[yi+2*bmaxi*xi]=fp; } __global__ void Trajectory(float *Trajectories,int *IdLast, float Mass,float InternalRadius, float ExternalRadius,float Angle, int Line) { // Integer Impact Parameter ID int bi=blockIdx.x*blockDim.x+threadIdx.x; // Integer Impact Parameter Size (half of image) int bmaxi=gridDim.x*blockDim.x; // Perform trajectory for each pixel float m,rs,re; m=Mass; rs=2.e0f*m; re=ExternalRadius; float bmx,b,h; int nh; // Autosize for image bmx=1.25e0f*re; // Angular step of integration h=4.e0f*PI/(float)TRACKPOINTS; // impact parameter b=(float)bi/(float)bmaxi*bmx; float up,vp,pp,us,vs,ps; up=0.e0f; vp=1.e0f; pp=0.e0f; nh=0; rungekutta(&ps,&us,&vs,pp,up,vp,h,m,b); // b versus us float bvus=fabs(b/us); float bvus0=bvus; Trajectories[bi*TRACKPOINTS+nh]=bvus; do { nh++; pp=ps; up=us; vp=vs; rungekutta(&ps,&us,&vs,pp,up,vp,h,m,b); bvus=fabs(b/us); Trajectories[bi*TRACKPOINTS+nh]=bvus; } while ((bvus>=rs)&&(bvus<=bvus0)); IdLast[bi]=nh; } __global__ void EachCircle(float *zImage,float *fImage, float Mass,float InternalRadius, float ExternalRadius,float Angle, int Line) { // Integer Impact Parameter ID int bi=blockIdx.x*blockDim.x+threadIdx.x; // Integer Impact Parameter Size (half of image) int bmaxi=gridDim.x*blockDim.x; float Trajectory[2048]; // Perform trajectory for each pixel float m,rs,ri,re,tho; int raie,q; m=Mass; rs=2.*m; ri=InternalRadius; re=ExternalRadius; tho=Angle; q=-2; raie=Line; float bmx,db,b,h; int nh; // Autosize for image bmx=1.25e0f*re; // Angular step of integration h=4.e0f*PI/(float)TRACKPOINTS; // impact parameter b=(float)bi/(float)bmaxi*bmx; db=bmx/(2.e0f*(float)bmaxi); float up,vp,pp,us,vs,ps; up=0.; vp=1.; pp=0.; nh=0; rungekutta(&ps,&us,&vs,pp,up,vp,h,m,b); // b versus us float bvus=fabs(b/us); float bvus0=bvus; Trajectory[nh]=bvus; do { nh++; pp=ps; up=us; vp=vs; rungekutta(&ps,&us,&vs,pp,up,vp,h,m,b); bvus=fabs(b/us); Trajectory[nh]=bvus; } while ((bvus>=rs)&&(bvus<=bvus0)); int imx=(int)(16*bi); for (int i=0;i<imx;i++) { float zp=0,fp=0; float phi=2.*PI/(float)imx*(float)i; float phd=atanp(cos(phi)*sin(tho),cos(tho)); uint yi=(uint)((float)bi*sin(phi)+bmaxi); uint xi=(uint)((float)bi*cos(phi)+bmaxi); int HalfLap=0,ExitOnImpact=0,ni; float php,nr,r; do { php=phd+(float)HalfLap*PI; nr=php/h; ni=(int)nr; if (ni<nh) { r=(Trajectory[ni+1]-Trajectory[ni])*(nr-ni*1.)+Trajectory[ni]; } else { r=Trajectory[ni]; } if ((r<=re)&&(r>=ri)) { ExitOnImpact=1; impact(phi,r,b,tho,m,&zp,&fp,q,db,h,raie); } HalfLap++; } while ((HalfLap<=2)&&(ExitOnImpact==0)); __syncthreads(); zImage[yi+2*bmaxi*xi]=zp; fImage[yi+2*bmaxi*xi]=fp; } } __global__ void Original(float *zImage,float *fImage, uint Size,float Mass,float InternalRadius, float ExternalRadius,float Angle, int Line) { // Integer Impact Parameter Size (half of image) uint bmaxi=(uint)Size; float Trajectory[TRACKPOINTS]; // Perform trajectory for each pixel float m,rs,ri,re,tho; int raie,q; m=Mass; rs=2.e0f*m; ri=InternalRadius; re=ExternalRadius; tho=Angle; q=-2; raie=Line; float bmx,db,b,h; int nh; // Autosize for image bmx=1.25e0f*re; // Angular step of integration h=4.e0f*PI/(float)TRACKPOINTS; // Integer Impact Parameter ID for (int bi=0;bi<bmaxi;bi++) { // impact parameter b=(float)bi/(float)bmaxi*bmx; db=bmx/(2.e0f*(float)bmaxi); float up,vp,pp,us,vs,ps; up=0.; vp=1.; pp=0.; nh=0; rungekutta(&ps,&us,&vs,pp,up,vp,h,m,b); // b versus us float bvus=fabs(b/us); float bvus0=bvus; Trajectory[nh]=bvus; do { nh++; pp=ps; up=us; vp=vs; rungekutta(&ps,&us,&vs,pp,up,vp,h,m,b); bvus=fabs(b/us); Trajectory[nh]=bvus; } while ((bvus>=rs)&&(bvus<=bvus0)); for (uint i=(uint)nh+1;i<TRACKPOINTS;i++) { Trajectory[i]=0.e0f; } int imx=(int)(16*bi); for (int i=0;i<imx;i++) { float zp=0,fp=0; float phi=2.e0f*PI/(float)imx*(float)i; float phd=atanp(cos(phi)*sin(tho),cos(tho)); uint yi=(uint)((float)bi*sin(phi)+bmaxi); uint xi=(uint)((float)bi*cos(phi)+bmaxi); int HalfLap=0,ExitOnImpact=0,ni; float php,nr,r; do { php=phd+(float)HalfLap*PI; nr=php/h; ni=(int)nr; if (ni<nh) { r=(Trajectory[ni+1]-Trajectory[ni])*(nr-ni*1.)+Trajectory[ni]; } else { r=Trajectory[ni]; } if ((r<=re)&&(r>=ri)) { ExitOnImpact=1; impact(phi,r,b,tho,m,&zp,&fp,q,db,h,raie); } HalfLap++; } while ((HalfLap<=2)&&(ExitOnImpact==0)); zImage[yi+2*bmaxi*xi]=zp; fImage[yi+2*bmaxi*xi]=fp; } } } """ return(BlobCUDA) # def ImageOutput(sigma,prefix,Colors): # import matplotlib.pyplot as plt # start_time=time.time() # if Colors == 'Red2Yellow': # plt.imsave("%s.png" % prefix, sigma, cmap='afmhot') # else: # plt.imsave("%s.png" % prefix, sigma, cmap='Greys_r') # save_time = time.time()-start_time # print("Save image as %s.png file" % prefix) # print("Save Time : %f" % save_time) def ImageOutput(sigma,prefix,Colors): from PIL import Image Max=sigma.max() Min=sigma.min() # Normalize value as 8bits Integer SigmaInt=(255*(sigma-Min)/(Max-Min)).astype('uint8') image = Image.fromarray(SigmaInt) image.save("%s.jpg" % prefix) def BlackHoleCL(zImage,fImage,InputCL): kernel_params = {} Device=InputCL['Device'] GpuStyle=InputCL['GpuStyle'] VariableType=InputCL['VariableType'] Size=InputCL['Size'] Mass=InputCL['Mass'] InternalRadius=InputCL['InternalRadius'] ExternalRadius=InputCL['ExternalRadius'] Angle=InputCL['Angle'] Method=InputCL['Method'] TrackPoints=InputCL['TrackPoints'] Physics=InputCL['Physics'] NoImage=InputCL['NoImage'] TrackSave=InputCL['TrackSave'] PhysicsList=DictionariesAPI() if InputCL['BlackBody']: # Spectrum is Black Body one Line=0 else: # Spectrum is Monochromatic Line one Line=1 Trajectories=numpy.zeros((int(InputCL['Size']/2),InputCL['TrackPoints']),dtype=numpy.float32) IdLast=numpy.zeros(int(InputCL['Size']/2),dtype=numpy.int32) # Je detecte un peripherique GPU dans la liste des peripheriques Id=0 HasXPU=False for platform in cl.get_platforms(): for device in platform.get_devices(): if Id==Device: PF4XPU=platform.name XPU=device print("CPU/GPU selected: ",device.name.lstrip()) HasXPU=True Id+=1 if HasXPU==False: print("No XPU #%i found in all of %i devices, sorry..." % (Device,Id-1)) sys.exit() ctx = cl.Context([XPU]) queue = cl.CommandQueue(ctx, properties=cl.command_queue_properties.PROFILING_ENABLE) BuildOptions="-DPHYSICS=%i -DSETTRACKPOINTS=%i " % (PhysicsList[Physics],InputCL['TrackPoints']) print('My Platform is ',PF4XPU) if 'Intel' in PF4XPU or 'Experimental' in PF4XPU or 'Clover' in PF4XPU or 'Portable' in PF4XPU : print('No extra options for Intel and Clover!') else: BuildOptions = BuildOptions+" -cl-mad-enable" BlackHoleCL = cl.Program(ctx,BlobOpenCL).build(options = BuildOptions) # Je recupere les flag possibles pour les buffers mf = cl.mem_flags if Method=='TrajectoPixel' or Method=='TrajectoCircle': TrajectoriesCL = cl.Buffer(ctx, mf.WRITE_ONLY | mf.COPY_HOST_PTR, hostbuf=Trajectories) IdLastCL = cl.Buffer(ctx, mf.WRITE_ONLY | mf.COPY_HOST_PTR, hostbuf=IdLast) zImageCL = cl.Buffer(ctx, mf.WRITE_ONLY | mf.COPY_HOST_PTR, hostbuf=zImage) fImageCL = cl.Buffer(ctx, mf.WRITE_ONLY | mf.COPY_HOST_PTR, hostbuf=fImage) start_time=time.time() if Method=='EachPixel': CLLaunch=BlackHoleCL.EachPixel(queue,(zImage.shape[0],zImage.shape[1]), None,zImageCL,fImageCL, numpy.float32(Mass), numpy.float32(InternalRadius), numpy.float32(ExternalRadius), numpy.float32(Angle), numpy.int32(Line)) CLLaunch.wait() elif Method=='Original': CLLaunch=BlackHoleCL.Original(queue,(1,), None,zImageCL,fImageCL, numpy.uint32(zImage.shape[0]/2), numpy.float32(Mass), numpy.float32(InternalRadius), numpy.float32(ExternalRadius), numpy.float32(Angle), numpy.int32(Line)) CLLaunch.wait() elif Method=='EachCircle': CLLaunch=BlackHoleCL.EachCircle(queue,(int(zImage.shape[0]/2),), None,zImageCL,fImageCL, numpy.float32(Mass), numpy.float32(InternalRadius), numpy.float32(ExternalRadius), numpy.float32(Angle), numpy.int32(Line)) CLLaunch.wait() elif Method=='TrajectoCircle': CLLaunch=BlackHoleCL.Trajectory(queue,(Trajectories.shape[0],), None,TrajectoriesCL,IdLastCL, numpy.float32(Mass), numpy.float32(InternalRadius), numpy.float32(ExternalRadius), numpy.float32(Angle), numpy.int32(Line)) CLLaunch=BlackHoleCL.Circle(queue,(Trajectories.shape[0], int(zImage.shape[0]*4)),None, TrajectoriesCL,IdLastCL, zImageCL,fImageCL, numpy.float32(Mass), numpy.float32(InternalRadius), numpy.float32(ExternalRadius), numpy.float32(Angle), numpy.int32(Line)) CLLaunch.wait() else: CLLaunch=BlackHoleCL.Trajectory(queue,(Trajectories.shape[0],), None,TrajectoriesCL,IdLastCL, numpy.float32(Mass), numpy.float32(InternalRadius), numpy.float32(ExternalRadius), numpy.float32(Angle), numpy.int32(Line)) CLLaunch=BlackHoleCL.Pixel(queue,(zImage.shape[0],zImage.shape[1]),None, zImageCL,fImageCL,TrajectoriesCL,IdLastCL, numpy.uint32(Trajectories.shape[0]), numpy.float32(Mass), numpy.float32(InternalRadius), numpy.float32(ExternalRadius), numpy.float32(Angle), numpy.int32(Line)) CLLaunch.wait() compute = time.time()-start_time cl.enqueue_copy(queue,zImage,zImageCL).wait() cl.enqueue_copy(queue,fImage,fImageCL).wait() if Method=='TrajectoPixel' or Method=='TrajectoCircle': cl.enqueue_copy(queue,Trajectories,TrajectoriesCL).wait() cl.enqueue_copy(queue,IdLast,IdLastCL).wait() elapsed = time.time()-start_time print("\nCompute Time : %f" % compute) print("Elapsed Time : %f\n" % elapsed) zMaxPosition=numpy.where(zImage[:,:]==zImage.max()) fMaxPosition=numpy.where(fImage[:,:]==fImage.max()) print("Z max @(%f,%f) : %f" % ((1.*zMaxPosition[1][0]/zImage.shape[1]-0.5,1.*zMaxPosition[0][0]/zImage.shape[0]-0.5,zImage.max()))) print("Flux max @(%f,%f) : %f" % ((1.*fMaxPosition[1][0]/fImage.shape[1]-0.5,1.*fMaxPosition[0][0]/fImage.shape[0]-0.5,fImage.max()))) zImageCL.release() fImageCL.release() if Method=='TrajectoPixel' or Method=='TrajectoCircle': if not NoImage: AngleStep=4*numpy.pi/TrackPoints Angles=numpy.arange(0.,4*numpy.pi,AngleStep) Angles.shape=(1,TrackPoints) Hostname=gethostname() Date=time.strftime("%Y%m%d_%H%M%S") ImageInfo="%s_Device%i_%s_%s" % (Method,Device,Hostname,Date) if TrackSave: # numpy.savetxt("TrouNoirTrajectories_%s.csv" % ImageInfo, # numpy.transpose(numpy.concatenate((Angles,Trajectories),axis=0)), # delimiter=' ', fmt='%.2e') numpy.savetxt("TrouNoirTrajectories.csv", numpy.transpose(numpy.concatenate((Angles,Trajectories),axis=0)),delimiter=' ', fmt='%.2e') TrajectoriesCL.release() IdLastCL.release() return(elapsed) def BlackHoleCUDA(zImage,fImage,InputCL): kernel_params = {} Device=InputCL['Device'] GpuStyle=InputCL['GpuStyle'] VariableType=InputCL['VariableType'] Size=InputCL['Size'] Mass=InputCL['Mass'] InternalRadius=InputCL['InternalRadius'] ExternalRadius=InputCL['ExternalRadius'] Angle=InputCL['Angle'] Method=InputCL['Method'] TrackPoints=InputCL['TrackPoints'] Physics=InputCL['Physics'] Threads=InputCL['Threads'] PhysicsList=DictionariesAPI() if InputCL['BlackBody']: # Spectrum is Black Body one Line=0 else: # Spectrum is Monochromatic Line one Line=1 Trajectories=numpy.zeros((int(InputCL['Size']/2),InputCL['TrackPoints']),dtype=numpy.float32) IdLast=numpy.zeros(int(InputCL['Size']/2),dtype=numpy.int32) try: # For PyCUDA import import pycuda.driver as cuda from pycuda.compiler import SourceModule cuda.init() for Id in range(cuda.Device.count()): if Id==Device: XPU=cuda.Device(Id) print("GPU selected %s" % XPU.name()) print except ImportError: print("Platform does not seem to support CUDA") Context=XPU.make_context() try: mod = SourceModule(KernelCodeCuda(),options=['--compiler-options','-DPHYSICS=%i -DSETTRACKPOINTS=%i' % (PhysicsList[Physics],TrackPoints)]) print("Compilation seems to be OK") except: print("Compilation seems to break") EachPixelCU=mod.get_function("EachPixel") OriginalCU=mod.get_function("Original") EachCircleCU=mod.get_function("EachCircle") TrajectoryCU=mod.get_function("Trajectory") PixelCU=mod.get_function("Pixel") CircleCU=mod.get_function("Circle") TrajectoriesCU = cuda.mem_alloc(Trajectories.size*Trajectories.dtype.itemsize) cuda.memcpy_htod(TrajectoriesCU, Trajectories) zImageCU = cuda.mem_alloc(zImage.size*zImage.dtype.itemsize) cuda.memcpy_htod(zImageCU, zImage) fImageCU = cuda.mem_alloc(fImage.size*fImage.dtype.itemsize) cuda.memcpy_htod(zImageCU, fImage) IdLastCU = cuda.mem_alloc(IdLast.size*IdLast.dtype.itemsize) cuda.memcpy_htod(IdLastCU, IdLast) start_time=time.time() if Method=='EachPixel': EachPixelCU(zImageCU,fImageCU, numpy.float32(Mass), numpy.float32(InternalRadius), numpy.float32(ExternalRadius), numpy.float32(Angle), numpy.int32(Line), grid=(int(zImage.shape[0]/Threads), int(zImage.shape[1]/Threads)), block=(Threads,Threads,1)) elif Method=='EachCircle': EachCircleCU(zImageCU,fImageCU, numpy.float32(Mass), numpy.float32(InternalRadius), numpy.float32(ExternalRadius), numpy.float32(Angle), numpy.int32(Line), grid=(int(zImage.shape[0]/Threads/2),1), block=(Threads,1,1)) elif Method=='Original': OriginalCU(zImageCU,fImageCU, numpy.uint32(zImage.shape[0]/2), numpy.float32(Mass), numpy.float32(InternalRadius), numpy.float32(ExternalRadius), numpy.float32(Angle), numpy.int32(Line), grid=(1,1), block=(1,1,1)) elif Method=='TrajectoCircle': TrajectoryCU(TrajectoriesCU,IdLastCU, numpy.float32(Mass), numpy.float32(InternalRadius), numpy.float32(ExternalRadius), numpy.float32(Angle), numpy.int32(Line), grid=(int(Trajectories.shape[0]/Threads),1), block=(Threads,1,1)) CircleCU(TrajectoriesCU,IdLastCU,zImageCU,fImageCU, numpy.float32(Mass), numpy.float32(InternalRadius), numpy.float32(ExternalRadius), numpy.float32(Angle), numpy.int32(Line), grid=(int(Trajectories.shape[0]/Threads), int(zImage.shape[0]*4/Threads)), block=(Threads,Threads,1)) else: # Default method: TrajectoPixel TrajectoryCU(TrajectoriesCU,IdLastCU, numpy.float32(Mass), numpy.float32(InternalRadius), numpy.float32(ExternalRadius), numpy.float32(Angle), numpy.int32(Line), grid=(int(Trajectories.shape[0]/Threads),1), block=(Threads,1,1)) PixelCU(zImageCU,fImageCU,TrajectoriesCU,IdLastCU, numpy.uint32(Trajectories.shape[0]), numpy.float32(Mass), numpy.float32(InternalRadius), numpy.float32(ExternalRadius), numpy.float32(Angle), numpy.int32(Line), grid=(int(zImage.shape[0]/Threads), int(zImage.shape[1]/Threads),1), block=(Threads,Threads,1)) Context.synchronize() compute = time.time()-start_time cuda.memcpy_dtoh(zImage,zImageCU) cuda.memcpy_dtoh(fImage,fImageCU) if Method=='TrajectoPixel' or Method=='TrajectoCircle': cuda.memcpy_dtoh(Trajectories,TrajectoriesCU) elapsed = time.time()-start_time print("\nCompute Time : %f" % compute) print("Elapsed Time : %f\n" % elapsed) zMaxPosition=numpy.where(zImage[:,:]==zImage.max()) fMaxPosition=numpy.where(fImage[:,:]==fImage.max()) print("Z max @(%f,%f) : %f" % ((1.*zMaxPosition[1][0]/zImage.shape[1]-0.5,1.*zMaxPosition[0][0]/zImage.shape[0]-0.5,zImage.max()))) print("Flux max @(%f,%f) : %f" % ((1.*fMaxPosition[1][0]/fImage.shape[1]-0.5,1.*fMaxPosition[0][0]/fImage.shape[0]-0.5,fImage.max()))) Context.pop() Context.detach() if Method=='TrajectoPixel' or Method=='TrajectoCircle': if not NoImage: AngleStep=4*numpy.pi/TrackPoints Angles=numpy.arange(0.,4*numpy.pi,AngleStep) Angles.shape=(1,TrackPoints) Hostname=gethostname() Date=time.strftime("%Y%m%d_%H%M%S") ImageInfo="%s_Device%i_%s_%s" % (Method,Device,Hostname,Date) # numpy.savetxt("TrouNoirTrajectories_%s.csv" % ImageInfo, # numpy.transpose(numpy.concatenate((Angles,Trajectories),axis=0)), # delimiter=' ', fmt='%.2e') numpy.savetxt("TrouNoirTrajectories.csv", numpy.transpose(numpy.concatenate((Angles,Trajectories),axis=0)), delimiter=' ', fmt='%.2e') return(elapsed) if __name__=='__main__': # Default device: first one! Device=0 # Default implementation: OpenCL, most versatile! GpuStyle = 'OpenCL' Mass = 1. # Internal Radius 3 times de Schwarzschild Radius InternalRadius=6.*Mass # ExternalRadius=12. # # Angle with normal to disc 10 degrees Angle = numpy.pi/180.*(90.-10.) # Radiation of disc : BlackBody or Monochromatic BlackBody = False # Size of image Size=1024 # Variable Type VariableType='FP32' # ? q=-2 # Method of resolution Method='TrajectoPixel' # Colors for output image Colors='Greyscale' # Physics Physics='Einstein' # No output as image NoImage = False # Threads in CUDA Threads = 32 # Trackpoints of trajectories TrackPoints=2048 # Tracksave of trajectories TrackSave=False HowToUse='%s -h [Help] -b [BlackBodyEmission] -j [TrackSave] -n [NoImage] -p <Einstein/Newton> -s <SizeInPixels> -m <Mass> -i <DiscInternalRadius> -x <DiscExternalRadius> -a <AngleAboveDisc> -d <DeviceId> -c <Greyscale/Red2Yellow> -g <CUDA/OpenCL> -o <EachPixel/TrajectoCircle/TrajectoPixel/EachCircle/Original> -t <ThreadsInCuda> -v <FP32/FP64> -k <TrackPoints>' try: opts, args = getopt.getopt(sys.argv[1:],"hbnjs:m:i:x:a:d:g:v:o:t:c:p:k:",["tracksave","blackbody","noimage","camera","size=","mass=","internal=","external=","angle=","device=","gpustyle=","variabletype=","method=","threads=","colors=","physics=","trackpoints="]) except getopt.GetoptError: print(HowToUse % sys.argv[0]) sys.exit(2) # List of Devices Devices=[] Alu={} for opt, arg in opts: if opt == '-h': print(HowToUse % sys.argv[0]) print("\nInformations about devices detected under OpenCL API:") # For PyOpenCL import try: import pyopencl as cl Id=0 for platform in cl.get_platforms(): for device in platform.get_devices(): #deviceType=cl.device_type.to_string(device.type) deviceType="xPU" print("Device #%i from %s of type %s : %s" % (Id,platform.vendor.lstrip(),deviceType,device.name.lstrip())) Id=Id+1 except: print("Your platform does not seem to support OpenCL") print("\nInformations about devices detected under CUDA API:") # For PyCUDA import try: import pycuda.driver as cuda cuda.init() for Id in range(cuda.Device.count()): device=cuda.Device(Id) print("Device #%i of type GPU : %s" % (Id,device.name())) print except: print("Your platform does not seem to support CUDA") sys.exit() elif opt in ("-d", "--device"): # Devices.append(int(arg)) Device=int(arg) elif opt in ("-g", "--gpustyle"): GpuStyle = arg elif opt in ("-v", "--variabletype"): VariableType = arg elif opt in ("-s", "--size"): Size = int(arg) elif opt in ("-k", "--trackpoints"): TrackPoints = int(arg) elif opt in ("-m", "--mass"): Mass = float(arg) elif opt in ("-i", "--internal"): InternalRadius = float(arg) elif opt in ("-e", "--external"): ExternalRadius = float(arg) elif opt in ("-a", "--angle"): Angle = numpy.pi/180.*(90.-float(arg)) elif opt in ("-b", "--blackbody"): BlackBody = True elif opt in ("-j", "--tracksave"): TrackSave = True elif opt in ("-n", "--noimage"): NoImage = True elif opt in ("-o", "--method"): Method = arg elif opt in ("-t", "--threads"): Threads = int(arg) elif opt in ("-c", "--colors"): Colors = arg elif opt in ("-p", "--physics"): Physics = arg print("Device Identification selected : %s" % Device) print("GpuStyle used : %s" % GpuStyle) print("VariableType : %s" % VariableType) print("Size : %i" % Size) print("Mass : %f" % Mass) print("Internal Radius : %f" % InternalRadius) print("External Radius : %f" % ExternalRadius) print("Angle with normal of (in radians) : %f" % Angle) print("Black Body Disc Emission (monochromatic instead) : %s" % BlackBody) print("Method of resolution : %s" % Method) print("Colors for output images : %s" % Colors) print("Physics used for Trajectories : %s" % Physics) print("Trackpoints of Trajectories : %i" % TrackPoints) print("Tracksave of Trajectories : %i" % TrackSave) if GpuStyle=='CUDA': print("\nSelection of CUDA device") try: # For PyCUDA import import pycuda.driver as cuda cuda.init() for Id in range(cuda.Device.count()): device=cuda.Device(Id) print("Device #%i of type GPU : %s" % (Id,device.name())) if Id in Devices: Alu[Id]='GPU' except ImportError: print("Platform does not seem to support CUDA") if GpuStyle=='OpenCL': print("\nSelection of OpenCL device") try: # For PyOpenCL import import pyopencl as cl Id=0 for platform in cl.get_platforms(): for device in platform.get_devices(): #deviceType=cl.device_type.to_string(device.type) deviceType="xPU" print("Device #%i from %s of type %s : %s" % (Id,platform.vendor.lstrip().rstrip(),deviceType,device.name.lstrip().rstrip())) if Id in Devices: # Set the Alu as detected Device Type Alu[Id]=deviceType Id=Id+1 except ImportError: print("Platform does not seem to support OpenCL") zImage=numpy.zeros((Size,Size),dtype=numpy.float32) fImage=numpy.zeros((Size,Size),dtype=numpy.float32) InputCL={} InputCL['Device']=Device InputCL['GpuStyle']=GpuStyle InputCL['VariableType']=VariableType InputCL['Size']=Size InputCL['Mass']=Mass InputCL['InternalRadius']=InternalRadius InputCL['ExternalRadius']=ExternalRadius InputCL['Angle']=Angle InputCL['BlackBody']=BlackBody InputCL['Method']=Method InputCL['TrackPoints']=TrackPoints InputCL['Physics']=Physics InputCL['Threads']=Threads InputCL['NoImage']=NoImage InputCL['TrackSave']=TrackSave if GpuStyle=='OpenCL': duration=BlackHoleCL(zImage,fImage,InputCL) else: duration=BlackHoleCUDA(zImage,fImage,InputCL) Hostname=gethostname() Date=time.strftime("%Y%m%d_%H%M%S") ImageInfo="%s_Device%i_%s_%s" % (Method,Device,Hostname,Date) if not NoImage: ImageOutput(zImage,"TrouNoirZ_%s" % ImageInfo,Colors) ImageOutput(fImage,"TrouNoirF_%s" % ImageInfo,Colors)