black-hole-accretion.py 53.47 KiB
#!/usr/bin/env python3
#
# TrouNoir model using PyOpenCL or PyCUDA
#
# CC BY-NC-SA 2019 : <emmanuel.quemener@ens-lyon.fr>
#
# Part of matrix programs from: https://forge.cbp.ens-lyon.fr/svn/bench4gpu/
#
# Thanks to Andreas Klockner for PyOpenCL and PyCUDA:
# http://mathema.tician.de/software/pyopencl
#
# Original code programmed in Fortran 77 in mars 1994
# for Practical Work of Numerical Simulation
# DEA (old Master2) in astrophysics and spatial techniques in Meudon
# by Herve Aussel & Emmanuel Quemener
#
# Conversion in C done by Emmanuel Quemener in august 1997
# GPUfication in OpenCL under Python in july 2019
# GPUfication in CUDA under Python in august 2019
#
# Thanks to :
#
# - Herve Aussel for his part of code of black body spectrum
# - Didier Pelat for his help to perform this work
# - Jean-Pierre Luminet for his article published in 1979
# - Numerical Recipies for Runge Kutta recipies
# - Luc Blanchet for his disponibility about my questions in General Relativity
# - Pierre Lena for his passion about science and vulgarisation
# If crash on OpenCL Intel implementation, add following options and force
# export PYOPENCL_COMPILER_OUTPUT=1
# export CL_CONFIG_USE_VECTORIZER=True
# export CL_CONFIG_CPU_VECTORIZER_MODE=16
import pyopencl as cl
import numpy
import time
import sys
import getopt
from socket import gethostname
def DictionariesAPI():
PhysicsList = {"Einstein": 0, "Newton": 1}
return PhysicsList
#
# Blank space below to simplify debugging on OpenCL code
#
BlobOpenCL = """
#define PI (float)3.14159265359e0f
#define nbr 256
#define EINSTEIN 0
#define NEWTON 1
#ifdef SETTRACKPOINTS
#define TRACKPOINTS SETTRACKPOINTS
#else
#define TRACKPOINTS 2048
#endif
float atanp(float x,float y)
{
float angle;
angle=atan2(y,x);
if (angle<0.e0f)
{
angle+=(float)2.e0f*PI;
}
return angle;
}
float f(float v)
{
return v;
}
#if PHYSICS == NEWTON
float g(float u,float m,float b)
{
return (-u);
}
#else
float g(float u,float m,float b)
{
return (3.e0f*m/b*pow(u,2)-u);
}
#endif
void calcul(float *us,float *vs,float up,float vp,
float h,float m,float b)
{
float c0,c1,c2,c3,d0,d1,d2,d3;
c0=h*f(vp);
c1=h*f(vp+c0/2.e0f);
c2=h*f(vp+c1/2.e0f);
c3=h*f(vp+c2);
d0=h*g(up,m,b);
d1=h*g(up+d0/2.e0f,m,b);
d2=h*g(up+d1/2.e0f,m,b);
d3=h*g(up+d2,m,b);
*us=up+(c0+2.e0f*c1+2.e0f*c2+c3)/6.e0f;
*vs=vp+(d0+2.e0f*d1+2.e0f*d2+d3)/6.e0f;
}
void rungekutta(float *ps,float *us,float *vs,
float pp,float up,float vp,
float h,float m,float b)
{
calcul(us,vs,up,vp,h,m,b);
*ps=pp+h;
}
float decalage_spectral(float r,float b,float phi,
float tho,float m)
{
return (sqrt(1-3*m/r)/(1+sqrt(m/pow(r,3))*b*sin(tho)*sin(phi)));
}
float spectre(float rf,int q,float b,float db,
float h,float r,float m,float bss)
{
float flx;
// flx=exp(q*log(r/m))*pow(rf,4)*b*db*h;
flx=exp(q*log(r/m)+4.e0f*log(rf))*b*db*h;
return(flx);
}
float spectre_cn(float rf32,float b32,float db32,
float h32,float r32,float m32,float bss32)
{
#define MYFLOAT float
MYFLOAT rf=(MYFLOAT)(rf32);
MYFLOAT b=(MYFLOAT)(b32);
MYFLOAT db=(MYFLOAT)(db32);
MYFLOAT h=(MYFLOAT)(h32);
MYFLOAT r=(MYFLOAT)(r32);
MYFLOAT m=(MYFLOAT)(m32);
MYFLOAT bss=(MYFLOAT)(bss32);
MYFLOAT flx;
MYFLOAT nu_rec,nu_em,qu,temp_em,flux_int;
int fi,posfreq;
#define planck 6.62e-34f
#define k 1.38e-23f
#define c2 9.e16f
#define temp 3.e7f
#define m_point 1.e0f
#define lplanck (log(6.62e0f)-34.e0f*log(10.e0f))
#define lk (log(1.38e0f)-23.e0f*log(10.e0f))
#define lc2 (log(9.e0f)+16.e0f*log(10.e0f))
MYFLOAT v=1.e0f-3.e0f/r;
qu=1.e0f/sqrt((1.e0f-3.e0f/r)*r)*(sqrt(r)-sqrt(6.e0f)+sqrt(3.e0f)/2.e0f*log((sqrt(r)+sqrt(3.e0f))/(sqrt(r)-sqrt(3.e0f))* 0.17157287525380988e0f )); // # noqa: E501
temp_em=temp*sqrt(m)*exp(0.25e0f*log(m_point)-0.75e0f*log(r)-0.125e0f*log(v)+0.25e0f*log(fabs(qu)));
flux_int=0.e0f;
flx=0.e0f;
for (fi=0;fi<nbr;fi++)
{
nu_em=bss*(MYFLOAT)fi/(MYFLOAT)nbr;
nu_rec=nu_em*rf;
posfreq=(int)(nu_rec*(MYFLOAT)nbr/bss);
if ((posfreq>0)&&(posfreq<nbr))
{
// Initial version
// flux_int=2.*planck/c2*pow(nu_em,3)/(exp(planck*nu_em/(k*temp_em))-1.);
// Version with log used
//flux_int=2.*exp(lplanck-lc2+3.*log(nu_em))/(exp(exp(lplanck-lk+log(nu_em/temp_em)))-1.);
// flux_int*=pow(rf,3)*b*db*h;
//flux_int*=exp(3.e0f*log(rf))*b*db*h;
flux_int=2.e0f*exp(lplanck-lc2+3.e0f*log(nu_em))/(exp(exp(lplanck-lk+log(nu_em/temp_em)))-1.e0f)*exp(3.e0f*log(rf))*b*db*h;
flx+=flux_int;
}
}
return((float)(flx));
}
void impact(float phi,float r,float b,float tho,float m,
float *zp,float *fp,
int q,float db,
float h,int raie)
{
float flx,rf,bss;
rf=decalage_spectral(r,b,phi,tho,m);
if (raie==0)
{
bss=1.e19f;
flx=spectre_cn(rf,b,db,h,r,m,bss);
}
else
{
bss=2.e0f;
flx=spectre(rf,q,b,db,h,r,m,bss);
}
*zp=1.e0f/rf;
*fp=flx;
}
__kernel void EachPixel(__global float *zImage,__global float *fImage,
float Mass,float InternalRadius,
float ExternalRadius,float Angle,
int Line)
{
uint xi=(uint)get_global_id(0);
uint yi=(uint)get_global_id(1);
uint sizex=(uint)get_global_size(0);
uint sizey=(uint)get_global_size(1);
// Perform trajectory for each pixel, exit on hit
float m,rs,ri,re,tho;
int q,raie;
m=Mass;
rs=2.e0f*m;
ri=InternalRadius;
re=ExternalRadius;
tho=Angle;
q=-2;
raie=Line;
float bmx,db,b,h;
float rp0,rps;
float phi,phd;
uint nh=0;
float zp=0.e0f,fp=0.e0f;
// Autosize for image
bmx=1.25e0f*re;
h=4.e0f*PI/(float)TRACKPOINTS;
// set origin as center of image
float x=(float)xi-(float)(sizex/2)+(float)5.e-1f;
float y=(float)yi-(float)(sizey/2)+(float)5.e-1f;
// angle extracted from cylindric symmetry
phi=atanp(x,y);
phd=atanp(cos(phi)*sin(tho),cos(tho));
float up,vp,pp,us,vs,ps;
// impact parameter
b=sqrt(x*x+y*y)*(float)2.e0f/(float)sizex*bmx;
// step of impact parameter;
db=bmx/(float)(sizex);
up=0.e0f;
vp=1.e0f;
pp=0.e0f;
rungekutta(&ps,&us,&vs,pp,up,vp,h,m,b);
rps=fabs(b/us);
rp0=rps;
int ExitOnImpact=0;
do
{
nh++;
pp=ps;
up=us;
vp=vs;
rungekutta(&ps,&us,&vs,pp,up,vp,h,m,b);
rps=fabs(b/us);
ExitOnImpact = ((fmod(pp,PI)<fmod(phd,PI))&&(fmod(ps,PI)>fmod(phd,PI)))&&(rps>=ri)&&(rps<=re)?1:0;
} while ((rps>=rs)&&(rps<=rp0)&&(ExitOnImpact==0)&&(nh<TRACKPOINTS));
if (ExitOnImpact==1) {
impact(phi,rps,b,tho,m,&zp,&fp,q,db,h,raie);
}
else
{
zp=0.e0f;
fp=0.e0f;
}
barrier(CLK_GLOBAL_MEM_FENCE);
zImage[yi+sizex*xi]=(float)zp;
fImage[yi+sizex*xi]=(float)fp;
}
__kernel void Pixel(__global float *zImage,__global float *fImage,
__global float *Trajectories,__global int *IdLast,
uint ImpactParameter,
float Mass,float InternalRadius,
float ExternalRadius,float Angle,
int Line)
{
uint xi=(uint)get_global_id(0);
uint yi=(uint)get_global_id(1);
uint sizex=(uint)get_global_size(0);
uint sizey=(uint)get_global_size(1);
// Perform trajectory for each pixel
float m,ri,re,tho;
int q,raie;
m=Mass;
ri=InternalRadius;
re=ExternalRadius;
tho=Angle;
q=-2;
raie=Line;
float bmx,db,b,h;
float phi,phd,php,nr,r;
float zp=0.e0f,fp=0.e0f;
// Autosize for image, 25% greater than external radius
bmx=1.25e0f*re;
// Angular step of integration
h=4.e0f*PI/(float)TRACKPOINTS;
// Step of Impact Parameter
db=bmx/(2.e0f*(float)ImpactParameter);
// set origin as center of image
float x=(float)xi-(float)(sizex/2)+(float)5.e-1f;
float y=(float)yi-(float)(sizey/2)+(float)5.e-1f;
// angle extracted from cylindric symmetry
phi=atanp(x,y);
phd=atanp(cos(phi)*sin(tho),cos(tho));
// Real Impact Parameter
b=sqrt(x*x+y*y)*bmx/(float)ImpactParameter;
// Integer Impact Parameter
uint bi=(uint)sqrt(x*x+y*y);
int HalfLap=0,ExitOnImpact=0,ni;
if (bi<ImpactParameter)
{
do
{
php=phd+(float)HalfLap*PI;
nr=php/h;
ni=(int)nr;
if (ni<IdLast[bi])
{
r=(Trajectories[bi*TRACKPOINTS+ni+1]-Trajectories[bi*TRACKPOINTS+ni])*(nr-ni*1.e0f)+Trajectories[bi*TRACKPOINTS+ni];
}
else
{
r=Trajectories[bi*TRACKPOINTS+ni];
}
if ((r<=re)&&(r>=ri))
{
ExitOnImpact=1;
impact(phi,r,b,tho,m,&zp,&fp,q,db,h,raie);
}
HalfLap++;
} while ((HalfLap<=2)&&(ExitOnImpact==0));
}
barrier(CLK_GLOBAL_MEM_FENCE);
zImage[yi+sizex*xi]=zp;
fImage[yi+sizex*xi]=fp;
}
__kernel void Circle(__global float *Trajectories,__global int *IdLast,
__global float *zImage,__global float *fImage,
float Mass,float InternalRadius,
float ExternalRadius,float Angle,
int Line)
{
// Integer Impact Parameter ID
int bi=get_global_id(0);
// Integer points on circle
int i=get_global_id(1);
// Integer Impact Parameter Size (half of image)
int bmaxi=get_global_size(0);
// Integer Points on circle
int imx=get_global_size(1);
// Perform trajectory for each pixel
float m,ri,re,tho;
int q,raie;
m=Mass;
ri=InternalRadius;
re=ExternalRadius;
tho=Angle;
raie=Line;
float bmx,db,b,h;
float phi,phd;
float zp=0.e0f,fp=0.e0f;
// Autosize for image
bmx=1.25e0f*re;
// Angular step of integration
h=4.e0f*PI/(float)TRACKPOINTS;
// impact parameter
b=(float)bi/(float)bmaxi*bmx;
db=bmx/(2.e0f*(float)bmaxi);
phi=2.e0f*PI/(float)imx*(float)i;
phd=atanp(cos(phi)*sin(tho),cos(tho));
int yi=(int)((float)bi*sin(phi))+bmaxi;
int xi=(int)((float)bi*cos(phi))+bmaxi;
int HalfLap=0,ExitOnImpact=0,ni;
float php,nr,r;
do
{
php=phd+(float)HalfLap*PI;
nr=php/h;
ni=(int)nr;
if (ni<IdLast[bi])
{
r=(Trajectories[bi*TRACKPOINTS+ni+1]-Trajectories[bi*TRACKPOINTS+ni])*(nr-ni*1.e0f)+Trajectories[bi*TRACKPOINTS+ni];
}
else
{
r=Trajectories[bi*TRACKPOINTS+ni];
}
if ((r<=re)&&(r>=ri))
{
ExitOnImpact=1;
impact(phi,r,b,tho,m,&zp,&fp,q,db,h,raie);
}
HalfLap++;
} while ((HalfLap<=2)&&(ExitOnImpact==0));
zImage[yi+2*bmaxi*xi]=zp;
fImage[yi+2*bmaxi*xi]=fp;
barrier(CLK_GLOBAL_MEM_FENCE);
}
__kernel void Trajectory(__global float *Trajectories,__global int *IdLast,
float Mass,float InternalRadius,
float ExternalRadius,float Angle,
int Line)
{
// Integer Impact Parameter ID
int bi=get_global_id(0);
// Integer Impact Parameter Size (half of image)
int bmaxi=get_global_size(0);
// Perform trajectory for each pixel
float m,rs,re;
m=Mass;
rs=2.e0f*m;
re=ExternalRadius;
float bmx,b,h;
int nh;
// Autosize for image
bmx=1.25e0f*re;
// Angular step of integration
h=4.e0f*PI/(float)TRACKPOINTS;
// impact parameter
b=(float)bi/(float)bmaxi*bmx;
float up,vp,pp,us,vs,ps;
up=0.e0f;
vp=1.e0f;
pp=0.e0f;
nh=0;
rungekutta(&ps,&us,&vs,pp,up,vp,h,m,b);
// b versus us
float bvus=fabs(b/us);
float bvus0=bvus;
Trajectories[bi*TRACKPOINTS+nh]=bvus;
do
{
nh++;
pp=ps;
up=us;
vp=vs;
rungekutta(&ps,&us,&vs,pp,up,vp,h,m,b);
bvus=fabs(b/us);
Trajectories[bi*TRACKPOINTS+nh]=bvus;
} while ((bvus>=rs)&&(bvus<=bvus0));
IdLast[bi]=nh;
barrier(CLK_GLOBAL_MEM_FENCE);
}
__kernel void EachCircle(__global float *zImage,__global float *fImage,
float Mass,float InternalRadius,
float ExternalRadius,float Angle,
int Line)
{
// Integer Impact Parameter ID
uint bi=(uint)get_global_id(0);
// Integer Impact Parameter Size (half of image)
uint bmaxi=(uint)get_global_size(0);
private float Trajectory[TRACKPOINTS];
float m,rs,ri,re,tho;
int raie,q;
m=Mass;
rs=2.e0f*m;
ri=InternalRadius;
re=ExternalRadius;
tho=Angle;
q=-2;
raie=Line;
float bmx,db,b,h;
uint nh;
// Autosize for image
bmx=1.25e0f*re;
// Angular step of integration
h=4.e0f*PI/(float)TRACKPOINTS;
// impact parameter
b=(float)bi/(float)bmaxi*bmx;
db=bmx/(2.e0f*(float)bmaxi);
float up,vp,pp,us,vs,ps;
up=0.e0f;
vp=1.e0f;
pp=0.e0f;
nh=0;
rungekutta(&ps,&us,&vs,pp,up,vp,h,m,b);
// b versus us
float bvus=fabs(b/us);
float bvus0=bvus;
Trajectory[nh]=bvus;
do
{
nh++;
pp=ps;
up=us;
vp=vs;
rungekutta(&ps,&us,&vs,pp,up,vp,h,m,b);
bvus=(float)fabs(b/us);
Trajectory[nh]=bvus;
} while ((bvus>=rs)&&(bvus<=bvus0));
for (uint i=(uint)nh+1;i<TRACKPOINTS;i++) {
Trajectory[i]=0.e0f;
}
uint imx=(uint)(16*bi);
for (uint i=0;i<imx;i++)
{
float zp=0.e0f,fp=0.e0f;
float phi=2.e0f*PI/(float)imx*(float)i;
float phd=atanp(cos(phi)*sin(tho),cos(tho));
uint yi=(uint)((float)bi*sin(phi)+bmaxi);
uint xi=(uint)((float)bi*cos(phi)+bmaxi);
uint HalfLap=0,ExitOnImpact=0,ni;
float php,nr,r;
do
{
php=phd+(float)HalfLap*PI;
nr=php/h;
ni=(int)nr;
if (ni<nh)
{
r=(Trajectory[ni+1]-Trajectory[ni])*(nr-ni*1.e0f)+Trajectory[ni];
}
else
{
r=Trajectory[ni];
}
if ((r<=re)&&(r>=ri))
{
ExitOnImpact=1;
impact(phi,r,b,tho,m,&zp,&fp,q,db,h,raie);
}
HalfLap++;
} while ((HalfLap<=2)&&(ExitOnImpact==0));
zImage[yi+2*bmaxi*xi]=zp;
fImage[yi+2*bmaxi*xi]=fp;
}
barrier(CLK_GLOBAL_MEM_FENCE);
}
__kernel void Original(__global float *zImage,__global float *fImage,
uint Size,float Mass,float InternalRadius,
float ExternalRadius,float Angle,
int Line)
{
// Integer Impact Parameter Size (half of image)
uint bmaxi=(uint)Size;
float Trajectory[TRACKPOINTS];
// Perform trajectory for each pixel
float m,rs,ri,re,tho;
int raie,q;
m=Mass;
rs=2.e0f*m;
ri=InternalRadius;
re=ExternalRadius;
tho=Angle;
q=-2;
raie=Line;
float bmx,db,b,h;
uint nh;
// Autosize for image
bmx=1.25e0f*re;
// Angular step of integration
h=4.e0f*PI/(float)TRACKPOINTS;
// Integer Impact Parameter ID
for (int bi=0;bi<bmaxi;bi++)
{
// impact parameter
b=(float)bi/(float)bmaxi*bmx;
db=bmx/(2.e0f*(float)bmaxi);
float up,vp,pp,us,vs,ps;
up=0.e0f;
vp=1.e0f;
pp=0.e0f;
nh=0;
rungekutta(&ps,&us,&vs,pp,up,vp,h,m,b);
// b versus us
float bvus=fabs(b/us);
float bvus0=bvus;
Trajectory[nh]=bvus;
do
{
nh++;
pp=ps;
up=us;
vp=vs;
rungekutta(&ps,&us,&vs,pp,up,vp,h,m,b);
bvus=fabs(b/us);
Trajectory[nh]=bvus;
} while ((bvus>=rs)&&(bvus<=bvus0));
for (uint i=(uint)nh+1;i<TRACKPOINTS;i++) {
Trajectory[i]=0.e0f;
}
int imx=(int)(16*bi);
for (int i=0;i<imx;i++)
{
float zp=0.e0f,fp=0.e0f;
float phi=2.e0f*PI/(float)imx*(float)i;
float phd=atanp(cos(phi)*sin(tho),cos(tho));
uint yi=(uint)((float)bi*sin(phi)+bmaxi);
uint xi=(uint)((float)bi*cos(phi)+bmaxi);
uint HalfLap=0,ExitOnImpact=0,ni;
float php,nr,r;
do
{
php=phd+(float)HalfLap*PI;
nr=php/h;
ni=(int)nr;
if (ni<nh)
{
r=(Trajectory[ni+1]-Trajectory[ni])*(nr-ni*1.e0f)+Trajectory[ni];
}
else
{
r=Trajectory[ni];
}
if ((r<=re)&&(r>=ri))
{
ExitOnImpact=1;
impact(phi,r,b,tho,m,&zp,&fp,q,db,h,raie);
}
HalfLap++;
} while ((HalfLap<=2)&&(ExitOnImpact==0));
zImage[yi+2*bmaxi*xi]=zp;
fImage[yi+2*bmaxi*xi]=fp;
}
}
barrier(CLK_GLOBAL_MEM_FENCE);
}
"""
def KernelCodeCuda():
BlobCUDA = """
#define PI (float)3.14159265359
#define nbr 256
#define EINSTEIN 0
#define NEWTON 1
#ifdef SETTRACKPOINTS
#define TRACKPOINTS SETTRACKPOINTS
#else
#define TRACKPOINTS
#endif
__device__ float nothing(float x)
{
return(x);
}
__device__ float atanp(float x,float y)
{
float angle;
angle=atan2(y,x);
if (angle<0.e0f)
{
angle+=(float)2.e0f*PI;
}
return(angle);
}
__device__ float f(float v)
{
return(v);
}
#if PHYSICS == NEWTON
__device__ float g(float u,float m,float b)
{
return (-u);
}
#else
__device__ float g(float u,float m,float b)
{
return (3.e0f*m/b*pow(u,2)-u);
}
#endif
__device__ void calcul(float *us,float *vs,float up,float vp,
float h,float m,float b)
{
float c0,c1,c2,c3,d0,d1,d2,d3;
c0=h*f(vp);
c1=h*f(vp+c0/2.);
c2=h*f(vp+c1/2.);
c3=h*f(vp+c2);
d0=h*g(up,m,b);
d1=h*g(up+d0/2.,m,b);
d2=h*g(up+d1/2.,m,b);
d3=h*g(up+d2,m,b);
*us=up+(c0+2.*c1+2.*c2+c3)/6.;
*vs=vp+(d0+2.*d1+2.*d2+d3)/6.;
}
__device__ void rungekutta(float *ps,float *us,float *vs,
float pp,float up,float vp,
float h,float m,float b)
{
calcul(us,vs,up,vp,h,m,b);
*ps=pp+h;
}
__device__ float decalage_spectral(float r,float b,float phi,
float tho,float m)
{
return (sqrt(1-3*m/r)/(1+sqrt(m/pow(r,3))*b*sin(tho)*sin(phi)));
}
__device__ float spectre(float rf,int q,float b,float db,
float h,float r,float m,float bss)
{
float flx;
// flx=exp(q*log(r/m))*pow(rf,4)*b*db*h;
flx=exp(q*log(r/m)+4.*log(rf))*b*db*h;
return(flx);
}
__device__ float spectre_cn(float rf32,float b32,float db32,
float h32,float r32,float m32,float bss32)
{
#define MYFLOAT float
MYFLOAT rf=(MYFLOAT)(rf32);
MYFLOAT b=(MYFLOAT)(b32);
MYFLOAT db=(MYFLOAT)(db32);
MYFLOAT h=(MYFLOAT)(h32);
MYFLOAT r=(MYFLOAT)(r32);
MYFLOAT m=(MYFLOAT)(m32);
MYFLOAT bss=(MYFLOAT)(bss32);
MYFLOAT flx;
MYFLOAT nu_rec,nu_em,qu,temp_em,flux_int;
int fi,posfreq;
#define planck 6.62e-34
#define k 1.38e-23
#define c2 9.e16
#define temp 3.e7
#define m_point 1.
#define lplanck (log(6.62)-34.*log(10.))
#define lk (log(1.38)-23.*log(10.))
#define lc2 (log(9.)+16.*log(10.))
MYFLOAT v=1.-3./r;
qu=1./sqrt((1.-3./r)*r)*(sqrt(r)-sqrt(6.)+sqrt(3.)/2.*log((sqrt(r)+sqrt(3.))/(sqrt(r)-sqrt(3.))* 0.17157287525380988 )); // # noqa: #051
temp_em=temp*sqrt(m)*exp(0.25*log(m_point)-0.75*log(r)-0.125*log(v)+0.25*log(fabs(qu)));
flux_int=0.;
flx=0.;
for (fi=0;fi<nbr;fi++)
{
nu_em=bss*(MYFLOAT)fi/(MYFLOAT)nbr;
nu_rec=nu_em*rf;
posfreq=(int)(nu_rec*(MYFLOAT)nbr/bss);
if ((posfreq>0)&&(posfreq<nbr))
{
// Initial version
// flux_int=2.*planck/c2*pow(nu_em,3)/(exp(planck*nu_em/(k*temp_em))-1.);
// Version with log used
//flux_int=2.*exp(lplanck-lc2+3.*log(nu_em))/(exp(exp(lplanck-lk+log(nu_em/temp_em)))-1.);
// flux_int*=pow(rf,3)*b*db*h;
//flux_int*=exp(3.*log(rf))*b*db*h;
flux_int=2.*exp(lplanck-lc2+3.*log(nu_em))/(exp(exp(lplanck-lk+log(nu_em/temp_em)))-1.)*exp(3.*log(rf))*b*db*h;
flx+=flux_int;
}
}
return((float)(flx));
}
__device__ void impact(float phi,float r,float b,float tho,float m,
float *zp,float *fp,
int q,float db,
float h,int raie)
{
float flx,rf,bss;
rf=decalage_spectral(r,b,phi,tho,m);
if (raie==0)
{
bss=1.e19;
flx=spectre_cn(rf,b,db,h,r,m,bss);
}
else
{
bss=2.;
flx=spectre(rf,q,b,db,h,r,m,bss);
}
*zp=1./rf;
*fp=flx;
}
__global__ void EachPixel(float *zImage,float *fImage,
float Mass,float InternalRadius,
float ExternalRadius,float Angle,
int Line)
{
uint xi=(uint)(blockIdx.x*blockDim.x+threadIdx.x);
uint yi=(uint)(blockIdx.y*blockDim.y+threadIdx.y);
uint sizex=(uint)gridDim.x*blockDim.x;
uint sizey=(uint)gridDim.y*blockDim.y;
// Perform trajectory for each pixel, exit on hit
float m,rs,ri,re,tho;
int q,raie;
m=Mass;
rs=2.*m;
ri=InternalRadius;
re=ExternalRadius;
tho=Angle;
q=-2;
raie=Line;
float bmx,db,b,h;
float rp0,rpp,rps;
float phi,phd;
int nh;
float zp,fp;
// Autosize for image
bmx=1.25*re;
b=0.;
h=4.e0f*PI/(float)TRACKPOINTS;
// set origin as center of image
float x=(float)xi-(float)(sizex/2)+(float)5e-1f;
float y=(float)yi-(float)(sizey/2)+(float)5e-1f;
// angle extracted from cylindric symmetry
phi=atanp(x,y);
phd=atanp(cos(phi)*sin(tho),cos(tho));
float up,vp,pp,us,vs,ps;
// impact parameter
b=sqrt(x*x+y*y)*(float)2.e0f/(float)sizex*bmx;
// step of impact parameter;
// db=bmx/(float)(sizex/2);
db=bmx/(float)(sizex);
up=0.;
vp=1.;
pp=0.;
nh=0;
rungekutta(&ps,&us,&vs,pp,up,vp,h,m,b);
rps=fabs(b/us);
rp0=rps;
int ExitOnImpact=0;
do
{
nh++;
pp=ps;
up=us;
vp=vs;
rungekutta(&ps,&us,&vs,pp,up,vp,h,m,b);
rpp=rps;
rps=fabs(b/us);
ExitOnImpact = ((fmod(pp,PI)<fmod(phd,PI))&&(fmod(ps,PI)>fmod(phd,PI)))&&(rps>ri)&&(rps<re)?1:0;
} while ((rps>=rs)&&(rps<=rp0)&&(ExitOnImpact==0));
if (ExitOnImpact==1) {
impact(phi,rpp,b,tho,m,&zp,&fp,q,db,h,raie);
}
else
{
zp=0.e0f;
fp=0.e0f;
}
__syncthreads();
zImage[yi+sizex*xi]=(float)zp;
fImage[yi+sizex*xi]=(float)fp;
}
__global__ void Pixel(float *zImage,float *fImage,
float *Trajectories,int *IdLast,
uint ImpactParameter,
float Mass,float InternalRadius,
float ExternalRadius,float Angle,
int Line)
{
uint xi=(uint)(blockIdx.x*blockDim.x+threadIdx.x);
uint yi=(uint)(blockIdx.y*blockDim.y+threadIdx.y);
uint sizex=(uint)gridDim.x*blockDim.x;
uint sizey=(uint)gridDim.y*blockDim.y;
// Perform trajectory for each pixel
float m,ri,re,tho;
int q,raie;
m=Mass;
ri=InternalRadius;
re=ExternalRadius;
tho=Angle;
q=-2;
raie=Line;
float bmx,db,b,h;
float phi,phd,php,nr,r;
float zp=0,fp=0;
// Autosize for image, 25% greater than external radius
bmx=1.25e0f*re;
// Angular step of integration
h=4.e0f*PI/(float)TRACKPOINTS;
// Step of Impact Parameter
db=bmx/(2.e0f*(float)ImpactParameter);
// set origin as center of image
float x=(float)xi-(float)(sizex/2)+(float)5e-1f;
float y=(float)yi-(float)(sizey/2)+(float)5e-1f;
// angle extracted from cylindric symmetry
phi=atanp(x,y);
phd=atanp(cos(phi)*sin(tho),cos(tho));
// Real Impact Parameter
b=sqrt(x*x+y*y)*bmx/(float)ImpactParameter;
// Integer Impact Parameter
uint bi=(uint)sqrt(x*x+y*y);
int HalfLap=0,ExitOnImpact=0,ni;
if (bi<ImpactParameter)
{
do
{
php=phd+(float)HalfLap*PI;
nr=php/h;
ni=(int)nr;
if (ni<IdLast[bi])
{
r=(Trajectories[bi*TRACKPOINTS+ni+1]-Trajectories[bi*TRACKPOINTS+ni])*(nr-ni*1.e0f)+Trajectories[bi*TRACKPOINTS+ni];
}
else
{
r=Trajectories[bi*TRACKPOINTS+ni];
}
if ((r<=re)&&(r>=ri))
{
ExitOnImpact=1;
impact(phi,r,b,tho,m,&zp,&fp,q,db,h,raie);
}
HalfLap++;
} while ((HalfLap<=2)&&(ExitOnImpact==0));
}
zImage[yi+sizex*xi]=zp;
fImage[yi+sizex*xi]=fp;
}
__global__ void Circle(float *Trajectories,int *IdLast,
float *zImage,float *fImage,
float Mass,float InternalRadius,
float ExternalRadius,float Angle,
int Line)
{
// Integer Impact Parameter ID
int bi=blockIdx.x*blockDim.x+threadIdx.x;
// Integer points on circle
int i=blockIdx.y*blockDim.y+threadIdx.y;
// Integer Impact Parameter Size (half of image)
int bmaxi=gridDim.x*blockDim.x;
// Integer Points on circle
int imx=gridDim.y*blockDim.y;
// Perform trajectory for each pixel
float m,ri,re,tho;
int q,raie;
m=Mass;
ri=InternalRadius;
re=ExternalRadius;
tho=Angle;
raie=Line;
float bmx,db,b,h;
float phi,phd;
float zp=0,fp=0;
// Autosize for image
bmx=1.25e0f*re;
// Angular step of integration
h=4.e0f*PI/(float)TRACKPOINTS;
// impact parameter
b=(float)bi/(float)bmaxi*bmx;
db=bmx/(2.e0f*(float)bmaxi);
phi=2.e0f*PI/(float)imx*(float)i;
phd=atanp(cos(phi)*sin(tho),cos(tho));
int yi=(int)((float)bi*sin(phi))+bmaxi;
int xi=(int)((float)bi*cos(phi))+bmaxi;
int HalfLap=0,ExitOnImpact=0,ni;
float php,nr,r;
do
{
php=phd+(float)HalfLap*PI;
nr=php/h;
ni=(int)nr;
if (ni<IdLast[bi])
{
r=(Trajectories[bi*TRACKPOINTS+ni+1]-Trajectories[bi*TRACKPOINTS+ni])*(nr-ni*1.e0f)+Trajectories[bi*TRACKPOINTS+ni];
}
else
{
r=Trajectories[bi*TRACKPOINTS+ni];
}
if ((r<=re)&&(r>=ri))
{
ExitOnImpact=1;
impact(phi,r,b,tho,m,&zp,&fp,q,db,h,raie);
}
HalfLap++;
} while ((HalfLap<=2)&&(ExitOnImpact==0));
zImage[yi+2*bmaxi*xi]=zp;
fImage[yi+2*bmaxi*xi]=fp;
}
__global__ void Trajectory(float *Trajectories,int *IdLast,
float Mass,float InternalRadius,
float ExternalRadius,float Angle,
int Line)
{
// Integer Impact Parameter ID
int bi=blockIdx.x*blockDim.x+threadIdx.x;
// Integer Impact Parameter Size (half of image)
int bmaxi=gridDim.x*blockDim.x;
// Perform trajectory for each pixel
float m,rs,re;
m=Mass;
rs=2.e0f*m;
re=ExternalRadius;
float bmx,b,h;
int nh;
// Autosize for image
bmx=1.25e0f*re;
// Angular step of integration
h=4.e0f*PI/(float)TRACKPOINTS;
// impact parameter
b=(float)bi/(float)bmaxi*bmx;
float up,vp,pp,us,vs,ps;
up=0.e0f;
vp=1.e0f;
pp=0.e0f;
nh=0;
rungekutta(&ps,&us,&vs,pp,up,vp,h,m,b);
// b versus us
float bvus=fabs(b/us);
float bvus0=bvus;
Trajectories[bi*TRACKPOINTS+nh]=bvus;
do
{
nh++;
pp=ps;
up=us;
vp=vs;
rungekutta(&ps,&us,&vs,pp,up,vp,h,m,b);
bvus=fabs(b/us);
Trajectories[bi*TRACKPOINTS+nh]=bvus;
} while ((bvus>=rs)&&(bvus<=bvus0));
IdLast[bi]=nh;
}
__global__ void EachCircle(float *zImage,float *fImage,
float Mass,float InternalRadius,
float ExternalRadius,float Angle,
int Line)
{
// Integer Impact Parameter ID
int bi=blockIdx.x*blockDim.x+threadIdx.x;
// Integer Impact Parameter Size (half of image)
int bmaxi=gridDim.x*blockDim.x;
float Trajectory[2048];
// Perform trajectory for each pixel
float m,rs,ri,re,tho;
int raie,q;
m=Mass;
rs=2.*m;
ri=InternalRadius;
re=ExternalRadius;
tho=Angle;
q=-2;
raie=Line;
float bmx,db,b,h;
int nh;
// Autosize for image
bmx=1.25e0f*re;
// Angular step of integration
h=4.e0f*PI/(float)TRACKPOINTS;
// impact parameter
b=(float)bi/(float)bmaxi*bmx;
db=bmx/(2.e0f*(float)bmaxi);
float up,vp,pp,us,vs,ps;
up=0.;
vp=1.;
pp=0.;
nh=0;
rungekutta(&ps,&us,&vs,pp,up,vp,h,m,b);
// b versus us
float bvus=fabs(b/us);
float bvus0=bvus;
Trajectory[nh]=bvus;
do
{
nh++;
pp=ps;
up=us;
vp=vs;
rungekutta(&ps,&us,&vs,pp,up,vp,h,m,b);
bvus=fabs(b/us);
Trajectory[nh]=bvus;
} while ((bvus>=rs)&&(bvus<=bvus0));
int imx=(int)(16*bi);
for (int i=0;i<imx;i++)
{
float zp=0,fp=0;
float phi=2.*PI/(float)imx*(float)i;
float phd=atanp(cos(phi)*sin(tho),cos(tho));
uint yi=(uint)((float)bi*sin(phi)+bmaxi);
uint xi=(uint)((float)bi*cos(phi)+bmaxi);
int HalfLap=0,ExitOnImpact=0,ni;
float php,nr,r;
do
{
php=phd+(float)HalfLap*PI;
nr=php/h;
ni=(int)nr;
if (ni<nh)
{
r=(Trajectory[ni+1]-Trajectory[ni])*(nr-ni*1.)+Trajectory[ni];
}
else
{
r=Trajectory[ni];
}
if ((r<=re)&&(r>=ri))
{
ExitOnImpact=1;
impact(phi,r,b,tho,m,&zp,&fp,q,db,h,raie);
}
HalfLap++;
} while ((HalfLap<=2)&&(ExitOnImpact==0));
__syncthreads();
zImage[yi+2*bmaxi*xi]=zp;
fImage[yi+2*bmaxi*xi]=fp;
}
}
__global__ void Original(float *zImage,float *fImage,
uint Size,float Mass,float InternalRadius,
float ExternalRadius,float Angle,
int Line)
{
// Integer Impact Parameter Size (half of image)
uint bmaxi=(uint)Size;
float Trajectory[TRACKPOINTS];
// Perform trajectory for each pixel
float m,rs,ri,re,tho;
int raie,q;
m=Mass;
rs=2.e0f*m;
ri=InternalRadius;
re=ExternalRadius;
tho=Angle;
q=-2;
raie=Line;
float bmx,db,b,h;
int nh;
// Autosize for image
bmx=1.25e0f*re;
// Angular step of integration
h=4.e0f*PI/(float)TRACKPOINTS;
// Integer Impact Parameter ID
for (int bi=0;bi<bmaxi;bi++)
{
// impact parameter
b=(float)bi/(float)bmaxi*bmx;
db=bmx/(2.e0f*(float)bmaxi);
float up,vp,pp,us,vs,ps;
up=0.;
vp=1.;
pp=0.;
nh=0;
rungekutta(&ps,&us,&vs,pp,up,vp,h,m,b);
// b versus us
float bvus=fabs(b/us);
float bvus0=bvus;
Trajectory[nh]=bvus;
do
{
nh++;
pp=ps;
up=us;
vp=vs;
rungekutta(&ps,&us,&vs,pp,up,vp,h,m,b);
bvus=fabs(b/us);
Trajectory[nh]=bvus;
} while ((bvus>=rs)&&(bvus<=bvus0));
for (uint i=(uint)nh+1;i<TRACKPOINTS;i++) {
Trajectory[i]=0.e0f;
}
int imx=(int)(16*bi);
for (int i=0;i<imx;i++)
{
float zp=0,fp=0;
float phi=2.e0f*PI/(float)imx*(float)i;
float phd=atanp(cos(phi)*sin(tho),cos(tho));
uint yi=(uint)((float)bi*sin(phi)+bmaxi);
uint xi=(uint)((float)bi*cos(phi)+bmaxi);
int HalfLap=0,ExitOnImpact=0,ni;
float php,nr,r;
do
{
php=phd+(float)HalfLap*PI;
nr=php/h;
ni=(int)nr;
if (ni<nh)
{
r=(Trajectory[ni+1]-Trajectory[ni])*(nr-ni*1.)+Trajectory[ni];
}
else
{
r=Trajectory[ni];
}
if ((r<=re)&&(r>=ri))
{
ExitOnImpact=1;
impact(phi,r,b,tho,m,&zp,&fp,q,db,h,raie);
}
HalfLap++;
} while ((HalfLap<=2)&&(ExitOnImpact==0));
zImage[yi+2*bmaxi*xi]=zp;
fImage[yi+2*bmaxi*xi]=fp;
}
}
}
"""
return BlobCUDA
# def ImageOutput(sigma,prefix,Colors):
# import matplotlib.pyplot as plt
# start_time=time.time()
# if Colors == 'Red2Yellow':
# plt.imsave("%s.png" % prefix, sigma, cmap='afmhot')
# else:
# plt.imsave("%s.png" % prefix, sigma, cmap='Greys_r')
# save_time = time.time()-start_time
# print("Save image as %s.png file" % prefix)
# print("Save Time : %f" % save_time)
def ImageOutput(sigma, prefix, Colors):
from PIL import Image
Max = sigma.max()
Min = sigma.min()
# Normalize value as 8bits Integer
SigmaInt = (255 * (sigma - Min) / (Max - Min)).astype("uint8")
image = Image.fromarray(SigmaInt)
image.save("%s.jpg" % prefix)
def BlackHoleCL(zImage, fImage, InputCL):
Device = InputCL["Device"]
Mass = InputCL["Mass"]
InternalRadius = InputCL["InternalRadius"]
ExternalRadius = InputCL["ExternalRadius"]
Angle = InputCL["Angle"]
Method = InputCL["Method"]
TrackPoints = InputCL["TrackPoints"]
Physics = InputCL["Physics"]
NoImage = InputCL["NoImage"]
TrackSave = InputCL["TrackSave"]
PhysicsList = DictionariesAPI()
if InputCL["BlackBody"]:
# Spectrum is Black Body one
Line = 0
else:
# Spectrum is Monochromatic Line one
Line = 1
Trajectories = numpy.zeros(
(int(InputCL["Size"] / 2), InputCL["TrackPoints"]), dtype=numpy.float32
)
IdLast = numpy.zeros(int(InputCL["Size"] / 2), dtype=numpy.int32)
# Je detecte un peripherique GPU dans la liste des peripheriques
Id = 0
HasXPU = False
for platform in cl.get_platforms():
for device in platform.get_devices():
if Id == Device:
PF4XPU = platform.name
XPU = device
print("CPU/GPU selected: ", device.name.lstrip())
HasXPU = True
Id += 1
if not HasXPU:
print("No XPU #%i found in all of %i devices, sorry..." % (Device, Id - 1))
sys.exit()
ctx = cl.Context([XPU])
queue = cl.CommandQueue(
ctx, properties=cl.command_queue_properties.PROFILING_ENABLE
)
BuildOptions = "-DPHYSICS=%i -DSETTRACKPOINTS=%i " % (
PhysicsList[Physics],
InputCL["TrackPoints"],
)
print("My Platform is ", PF4XPU)
if (
"Intel" in PF4XPU
or "Experimental" in PF4XPU
or "Clover" in PF4XPU
or "Portable" in PF4XPU
):
print("No extra options for Intel and Clover!")
else:
BuildOptions = BuildOptions + " -cl-mad-enable"
BlackHoleCL = cl.Program(ctx, BlobOpenCL).build(options=BuildOptions)
# Je recupere les flag possibles pour les buffers
mf = cl.mem_flags
if Method == "TrajectoPixel" or Method == "TrajectoCircle":
TrajectoriesCL = cl.Buffer(
ctx, mf.WRITE_ONLY | mf.COPY_HOST_PTR, hostbuf=Trajectories
)
IdLastCL = cl.Buffer(ctx, mf.WRITE_ONLY | mf.COPY_HOST_PTR, hostbuf=IdLast)
zImageCL = cl.Buffer(ctx, mf.WRITE_ONLY | mf.COPY_HOST_PTR, hostbuf=zImage)
fImageCL = cl.Buffer(ctx, mf.WRITE_ONLY | mf.COPY_HOST_PTR, hostbuf=fImage)
start_time = time.time()
if Method == "EachPixel":
CLLaunch = BlackHoleCL.EachPixel(
queue,
(zImage.shape[0], zImage.shape[1]),
None,
zImageCL,
fImageCL,
numpy.float32(Mass),
numpy.float32(InternalRadius),
numpy.float32(ExternalRadius),
numpy.float32(Angle),
numpy.int32(Line),
)
CLLaunch.wait()
elif Method == "Original":
CLLaunch = BlackHoleCL.Original(
queue,
(1,),
None,
zImageCL,
fImageCL,
numpy.uint32(zImage.shape[0] / 2),
numpy.float32(Mass),
numpy.float32(InternalRadius),
numpy.float32(ExternalRadius),
numpy.float32(Angle),
numpy.int32(Line),
)
CLLaunch.wait()
elif Method == "EachCircle":
CLLaunch = BlackHoleCL.EachCircle(
queue,
(int(zImage.shape[0] / 2),),
None,
zImageCL,
fImageCL,
numpy.float32(Mass),
numpy.float32(InternalRadius),
numpy.float32(ExternalRadius),
numpy.float32(Angle),
numpy.int32(Line),
)
CLLaunch.wait()
elif Method == "TrajectoCircle":
CLLaunch = BlackHoleCL.Trajectory(
queue,
(Trajectories.shape[0],),
None,
TrajectoriesCL,
IdLastCL,
numpy.float32(Mass),
numpy.float32(InternalRadius),
numpy.float32(ExternalRadius),
numpy.float32(Angle),
numpy.int32(Line),
)
CLLaunch = BlackHoleCL.Circle(
queue,
(Trajectories.shape[0], int(zImage.shape[0] * 4)),
None,
TrajectoriesCL,
IdLastCL,
zImageCL,
fImageCL,
numpy.float32(Mass),
numpy.float32(InternalRadius),
numpy.float32(ExternalRadius),
numpy.float32(Angle),
numpy.int32(Line),
)
CLLaunch.wait()
else:
CLLaunch = BlackHoleCL.Trajectory(
queue,
(Trajectories.shape[0],),
None,
TrajectoriesCL,
IdLastCL,
numpy.float32(Mass),
numpy.float32(InternalRadius),
numpy.float32(ExternalRadius),
numpy.float32(Angle),
numpy.int32(Line),
)
CLLaunch = BlackHoleCL.Pixel(
queue,
(zImage.shape[0], zImage.shape[1]),
None,
zImageCL,
fImageCL,
TrajectoriesCL,
IdLastCL,
numpy.uint32(Trajectories.shape[0]),
numpy.float32(Mass),
numpy.float32(InternalRadius),
numpy.float32(ExternalRadius),
numpy.float32(Angle),
numpy.int32(Line),
)
CLLaunch.wait()
compute = time.time() - start_time
cl.enqueue_copy(queue, zImage, zImageCL).wait()
cl.enqueue_copy(queue, fImage, fImageCL).wait()
if Method == "TrajectoPixel" or Method == "TrajectoCircle":
cl.enqueue_copy(queue, Trajectories, TrajectoriesCL).wait()
cl.enqueue_copy(queue, IdLast, IdLastCL).wait()
elapsed = time.time() - start_time
print("\nCompute Time : %f" % compute)
print("Elapsed Time : %f\n" % elapsed)
zMaxPosition = numpy.where(zImage[:, :] == zImage.max())
fMaxPosition = numpy.where(fImage[:, :] == fImage.max())
print(
"Z max @(%f,%f) : %f"
% (
(
1.0 * zMaxPosition[1][0] / zImage.shape[1] - 0.5,
1.0 * zMaxPosition[0][0] / zImage.shape[0] - 0.5,
zImage.max(),
)
)
)
print(
"Flux max @(%f,%f) : %f"
% (
(
1.0 * fMaxPosition[1][0] / fImage.shape[1] - 0.5,
1.0 * fMaxPosition[0][0] / fImage.shape[0] - 0.5,
fImage.max(),
)
)
)
zImageCL.release()
fImageCL.release()
if Method == "TrajectoPixel" or Method == "TrajectoCircle":
if not NoImage:
AngleStep = 4 * numpy.pi / TrackPoints
Angles = numpy.arange(0.0, 4 * numpy.pi, AngleStep)
Angles.shape = (1, TrackPoints)
if TrackSave:
# numpy.savetxt("TrouNoirTrajectories_%s.csv" % ImageInfo,
# numpy.transpose(numpy.concatenate((Angles,Trajectories),axis=0)),
# delimiter=' ', fmt='%.2e')
numpy.savetxt(
"TrouNoirTrajectories.csv",
numpy.transpose(numpy.concatenate((Angles, Trajectories),
axis=0)),
delimiter=" ",
fmt="%.2e",
)
TrajectoriesCL.release()
IdLastCL.release()
return elapsed
def BlackHoleCUDA(zImage, fImage, InputCL):
Device = InputCL["Device"]
Mass = InputCL["Mass"]
InternalRadius = InputCL["InternalRadius"]
ExternalRadius = InputCL["ExternalRadius"]
Angle = InputCL["Angle"]
Method = InputCL["Method"]
TrackPoints = InputCL["TrackPoints"]
Physics = InputCL["Physics"]
Threads = InputCL["Threads"]
PhysicsList = DictionariesAPI()
if InputCL["BlackBody"]:
# Spectrum is Black Body one
Line = 0
else:
# Spectrum is Monochromatic Line one
Line = 1
Trajectories = numpy.zeros(
(int(InputCL["Size"] / 2), InputCL["TrackPoints"]), dtype=numpy.float32
)
IdLast = numpy.zeros(int(InputCL["Size"] / 2), dtype=numpy.int32)
try:
# For PyCUDA import
import pycuda.driver as cuda
from pycuda.compiler import SourceModule
cuda.init()
for Id in range(cuda.Device.count()):
if Id == Device:
XPU = cuda.Device(Id)
print("GPU selected %s" % XPU.name())
print
except ImportError:
print("Platform does not seem to support CUDA")
Context = XPU.make_context()
try:
mod = SourceModule(
KernelCodeCuda(),
options=[
"--compiler-options",
"-DPHYSICS=%i -DSETTRACKPOINTS=%i"
% (PhysicsList[Physics], TrackPoints),
],
)
print("Compilation seems to be OK")
except Exception:
print("Compilation seems to break")
EachPixelCU = mod.get_function("EachPixel")
OriginalCU = mod.get_function("Original")
EachCircleCU = mod.get_function("EachCircle")
TrajectoryCU = mod.get_function("Trajectory")
PixelCU = mod.get_function("Pixel")
CircleCU = mod.get_function("Circle")
TrajectoriesCU = cuda.mem_alloc(Trajectories.size * Trajectories.dtype.itemsize)
cuda.memcpy_htod(TrajectoriesCU, Trajectories)
zImageCU = cuda.mem_alloc(zImage.size * zImage.dtype.itemsize)
cuda.memcpy_htod(zImageCU, zImage)
fImageCU = cuda.mem_alloc(fImage.size * fImage.dtype.itemsize)
cuda.memcpy_htod(zImageCU, fImage)
IdLastCU = cuda.mem_alloc(IdLast.size * IdLast.dtype.itemsize)
cuda.memcpy_htod(IdLastCU, IdLast)
start_time = time.time()
if Method == "EachPixel":
EachPixelCU(
zImageCU,
fImageCU,
numpy.float32(Mass),
numpy.float32(InternalRadius),
numpy.float32(ExternalRadius),
numpy.float32(Angle),
numpy.int32(Line),
grid=(int(zImage.shape[0] / Threads), int(zImage.shape[1] / Threads)),
block=(Threads, Threads, 1),
)
elif Method == "EachCircle":
EachCircleCU(
zImageCU,
fImageCU,
numpy.float32(Mass),
numpy.float32(InternalRadius),
numpy.float32(ExternalRadius),
numpy.float32(Angle),
numpy.int32(Line),
grid=(int(zImage.shape[0] / Threads / 2), 1),
block=(Threads, 1, 1),
)
elif Method == "Original":
OriginalCU(
zImageCU,
fImageCU,
numpy.uint32(zImage.shape[0] / 2),
numpy.float32(Mass),
numpy.float32(InternalRadius),
numpy.float32(ExternalRadius),
numpy.float32(Angle),
numpy.int32(Line),
grid=(1, 1),
block=(1, 1, 1),
)
elif Method == "TrajectoCircle":
TrajectoryCU(
TrajectoriesCU,
IdLastCU,
numpy.float32(Mass),
numpy.float32(InternalRadius),
numpy.float32(ExternalRadius),
numpy.float32(Angle),
numpy.int32(Line),
grid=(int(Trajectories.shape[0] / Threads), 1),
block=(Threads, 1, 1),
)
CircleCU(
TrajectoriesCU,
IdLastCU,
zImageCU,
fImageCU,
numpy.float32(Mass),
numpy.float32(InternalRadius),
numpy.float32(ExternalRadius),
numpy.float32(Angle),
numpy.int32(Line),
grid=(
int(Trajectories.shape[0] / Threads),
int(zImage.shape[0] * 4 / Threads),
),
block=(Threads, Threads, 1),
)
else:
# Default method: TrajectoPixel
TrajectoryCU(
TrajectoriesCU,
IdLastCU,
numpy.float32(Mass),
numpy.float32(InternalRadius),
numpy.float32(ExternalRadius),
numpy.float32(Angle),
numpy.int32(Line),
grid=(int(Trajectories.shape[0] / Threads), 1),
block=(Threads, 1, 1),
)
PixelCU(
zImageCU,
fImageCU,
TrajectoriesCU,
IdLastCU,
numpy.uint32(Trajectories.shape[0]),
numpy.float32(Mass),
numpy.float32(InternalRadius),
numpy.float32(ExternalRadius),
numpy.float32(Angle),
numpy.int32(Line),
grid=(int(zImage.shape[0] / Threads), int(zImage.shape[1] / Threads), 1),
block=(Threads, Threads, 1),
)
Context.synchronize()
compute = time.time() - start_time
cuda.memcpy_dtoh(zImage, zImageCU)
cuda.memcpy_dtoh(fImage, fImageCU)
if Method == "TrajectoPixel" or Method == "TrajectoCircle":
cuda.memcpy_dtoh(Trajectories, TrajectoriesCU)
elapsed = time.time() - start_time
print("\nCompute Time : %f" % compute)
print("Elapsed Time : %f\n" % elapsed)
zMaxPosition = numpy.where(zImage[:, :] == zImage.max())
fMaxPosition = numpy.where(fImage[:, :] == fImage.max())
print(
"Z max @(%f,%f) : %f"
% (
(
1.0 * zMaxPosition[1][0] / zImage.shape[1] - 0.5,
1.0 * zMaxPosition[0][0] / zImage.shape[0] - 0.5,
zImage.max(),
)
)
)
print(
"Flux max @(%f,%f) : %f"
% (
(
1.0 * fMaxPosition[1][0] / fImage.shape[1] - 0.5,
1.0 * fMaxPosition[0][0] / fImage.shape[0] - 0.5,
fImage.max(),
)
)
)
Context.pop()
Context.detach()
if Method == "TrajectoPixel" or Method == "TrajectoCircle":
if not NoImage:
AngleStep = 4 * numpy.pi / TrackPoints
Angles = numpy.arange(0.0, 4 * numpy.pi, AngleStep)
Angles.shape = (1, TrackPoints)
# numpy.savetxt("TrouNoirTrajectories_%s.csv" % ImageInfo,
# numpy.transpose(numpy.concatenate((Angles,Trajectories),axis=0)),
# delimiter=' ', fmt='%.2e')
numpy.savetxt(
"TrouNoirTrajectories.csv",
numpy.transpose(numpy.concatenate((Angles, Trajectories), axis=0)),
delimiter=" ",
fmt="%.2e",
)
return elapsed
if __name__ == "__main__":
# Default device: first one!
Device = 0
# Default implementation: OpenCL, most versatile!
GpuStyle = "OpenCL"
Mass = 1.0
# Internal Radius 3 times de Schwarzschild Radius
InternalRadius = 6.0 * Mass
#
ExternalRadius = 12.0
#
# Angle with normal to disc 10 degrees
Angle = numpy.pi / 180.0 * (90.0 - 10.0)
# Radiation of disc : BlackBody or Monochromatic
BlackBody = False
# Size of image
Size = 1024
# Variable Type
VariableType = "FP32"
# ?
q = -2
# Method of resolution
Method = "TrajectoPixel"
# Colors for output image
Colors = "Greyscale"
# Physics
Physics = "Einstein"
# No output as image
NoImage = False
# Threads in CUDA
Threads = 32
# Trackpoints of trajectories
TrackPoints = 2048
# Tracksave of trajectories
TrackSave = False
HowToUse = "%s -h [Help] -b [BlackBodyEmission] -j [TrackSave] -n [NoImage] -p <Einstein/Newton> -s <SizeInPixels> -m <Mass> -i <DiscInternalRadius> -x <DiscExternalRadius> -a <AngleAboveDisc> -d <DeviceId> -c <Greyscale/Red2Yellow> -g <CUDA/OpenCL> -o <EachPixel/TrajectoCircle/TrajectoPixel/EachCircle/Original> -t <ThreadsInCuda> -v <FP32/FP64> -k <TrackPoints>" # noqa: E501
try:
opts, args = getopt.getopt(
sys.argv[1:],
"hbnjs:m:i:x:a:d:g:v:o:t:c:p:k:",
[
"tracksave",
"blackbody",
"noimage",
"camera",
"size=",
"mass=",
"internal=",
"external=",
"angle=",
"device=",
"gpustyle=",
"variabletype=",
"method=",
"threads=",
"colors=",
"physics=",
"trackpoints=",
],
)
except getopt.GetoptError:
print(HowToUse % sys.argv[0])
sys.exit(2)
# List of Devices
Devices = []
Alu = {}
for opt, arg in opts:
if opt == "-h":
print(HowToUse % sys.argv[0])
print("\nInformations about devices detected under OpenCL API:")
# For PyOpenCL import
try:
Id = 0
for platform in cl.get_platforms():
for device in platform.get_devices():
# deviceType=cl.device_type.to_string(device.type)
deviceType = "xPU"
print(
"Device #%i from %s of type %s : %s"
% (
Id,
platform.vendor.lstrip(),
deviceType,
device.name.lstrip(),
)
)
Id = Id + 1
except Exception:
print("Your platform does not seem to support OpenCL")
print("\nInformations about devices detected under CUDA API:")
# For PyCUDA import
try:
import pycuda.driver as cuda
cuda.init()
for Id in range(cuda.Device.count()):
device = cuda.Device(Id)
print("Device #%i of type GPU : %s" % (Id, device.name()))
print
except Exception:
print("Your platform does not seem to support CUDA")
sys.exit()
elif opt in ("-d", "--device"):
# Devices.append(int(arg))
Device = int(arg)
elif opt in ("-g", "--gpustyle"):
GpuStyle = arg
elif opt in ("-v", "--variabletype"):
VariableType = arg
elif opt in ("-s", "--size"):
Size = int(arg)
elif opt in ("-k", "--trackpoints"):
TrackPoints = int(arg)
elif opt in ("-m", "--mass"):
Mass = float(arg)
elif opt in ("-i", "--internal"):
InternalRadius = float(arg)
elif opt in ("-e", "--external"):
ExternalRadius = float(arg)
elif opt in ("-a", "--angle"):
Angle = numpy.pi / 180.0 * (90.0 - float(arg))
elif opt in ("-b", "--blackbody"):
BlackBody = True
elif opt in ("-j", "--tracksave"):
TrackSave = True
elif opt in ("-n", "--noimage"):
NoImage = True
elif opt in ("-o", "--method"):
Method = arg
elif opt in ("-t", "--threads"):
Threads = int(arg)
elif opt in ("-c", "--colors"):
Colors = arg
elif opt in ("-p", "--physics"):
Physics = arg
print("Device Identification selected : %s" % Device)
print("GpuStyle used : %s" % GpuStyle)
print("VariableType : %s" % VariableType)
print("Size : %i" % Size)
print("Mass : %f" % Mass)
print("Internal Radius : %f" % InternalRadius)
print("External Radius : %f" % ExternalRadius)
print("Angle with normal of (in radians) : %f" % Angle)
print("Black Body Disc Emission (monochromatic instead) : %s" % BlackBody)
print("Method of resolution : %s" % Method)
print("Colors for output images : %s" % Colors)
print("Physics used for Trajectories : %s" % Physics)
print("Trackpoints of Trajectories : %i" % TrackPoints)
print("Tracksave of Trajectories : %i" % TrackSave)
if GpuStyle == "CUDA":
print("\nSelection of CUDA device")
try:
# For PyCUDA import
import pycuda.driver as cuda
cuda.init()
for Id in range(cuda.Device.count()):
device = cuda.Device(Id)
print("Device #%i of type GPU : %s" % (Id, device.name()))
if Id in Devices:
Alu[Id] = "GPU"
except ImportError:
print("Platform does not seem to support CUDA")
if GpuStyle == "OpenCL":
print("\nSelection of OpenCL device")
try:
# For PyOpenCL import
import pyopencl as cl
Id = 0
for platform in cl.get_platforms():
for device in platform.get_devices():
# deviceType=cl.device_type.to_string(device.type)
deviceType = "xPU"
print(
"Device #%i from %s of type %s : %s"
% (
Id,
platform.vendor.lstrip().rstrip(),
deviceType,
device.name.lstrip().rstrip(),
)
)
if Id in Devices:
# Set the Alu as detected Device Type
Alu[Id] = deviceType
Id = Id + 1
except ImportError:
print("Platform does not seem to support OpenCL")
zImage = numpy.zeros((Size, Size), dtype=numpy.float32)
fImage = numpy.zeros((Size, Size), dtype=numpy.float32)
InputCL = {}
InputCL["Device"] = Device
InputCL["GpuStyle"] = GpuStyle
InputCL["VariableType"] = VariableType
InputCL["Size"] = Size
InputCL["Mass"] = Mass
InputCL["InternalRadius"] = InternalRadius
InputCL["ExternalRadius"] = ExternalRadius
InputCL["Angle"] = Angle
InputCL["BlackBody"] = BlackBody
InputCL["Method"] = Method
InputCL["TrackPoints"] = TrackPoints
InputCL["Physics"] = Physics
InputCL["Threads"] = Threads
InputCL["NoImage"] = NoImage
InputCL["TrackSave"] = TrackSave
if GpuStyle == "OpenCL":
duration = BlackHoleCL(zImage, fImage, InputCL)
else:
duration = BlackHoleCUDA(zImage, fImage, InputCL)
Hostname = gethostname()
Date = time.strftime("%Y%m%d_%H%M%S")
ImageInfo = "%s_Device%i_%s_%s" % (Method, Device, Hostname, Date)
if not NoImage:
ImageOutput(zImage, "TrouNoirZ_%s" % ImageInfo, Colors)
ImageOutput(fImage, "TrouNoirF_%s" % ImageInfo, Colors)