Skip to content
Snippets Groups Projects
black-hole-accretion.py 53.47 KiB
#!/usr/bin/env python3
#
# TrouNoir model using PyOpenCL or PyCUDA
#
# CC BY-NC-SA 2019 : <emmanuel.quemener@ens-lyon.fr>
#
# Part of matrix programs from: https://forge.cbp.ens-lyon.fr/svn/bench4gpu/
#
# Thanks to Andreas Klockner for PyOpenCL and PyCUDA:
# http://mathema.tician.de/software/pyopencl
#
# Original code programmed in Fortran 77 in mars 1994
# for Practical Work of Numerical Simulation
# DEA (old Master2) in astrophysics and spatial techniques in Meudon
# by Herve Aussel & Emmanuel Quemener
#
# Conversion in C done by Emmanuel Quemener in august 1997
# GPUfication in OpenCL under Python in july 2019
# GPUfication in CUDA under Python in august 2019
#
# Thanks to :
#
# - Herve Aussel for his part of code of black body spectrum
# - Didier Pelat for his help to perform this work
# - Jean-Pierre Luminet for his article published in 1979
# - Numerical Recipies for Runge Kutta recipies
# - Luc Blanchet for his disponibility about my questions in General Relativity
# - Pierre Lena for his passion about science and vulgarisation

# If crash on OpenCL Intel implementation, add following options and force
# export PYOPENCL_COMPILER_OUTPUT=1
# export CL_CONFIG_USE_VECTORIZER=True
# export CL_CONFIG_CPU_VECTORIZER_MODE=16

import pyopencl as cl
import numpy
import time
import sys
import getopt
from socket import gethostname


def DictionariesAPI():
    PhysicsList = {"Einstein": 0, "Newton": 1}
    return PhysicsList


#
# Blank space below to simplify debugging on OpenCL code
#


BlobOpenCL = """

#define PI (float)3.14159265359e0f
#define nbr 256

#define EINSTEIN 0
#define NEWTON 1

#ifdef SETTRACKPOINTS
#define TRACKPOINTS SETTRACKPOINTS
#else
#define TRACKPOINTS 2048
#endif

float atanp(float x,float y)
{
  float angle;

  angle=atan2(y,x);

  if (angle<0.e0f)
    {
      angle+=(float)2.e0f*PI;
    }

  return angle;
}

float f(float v)
{
  return v;
}

#if PHYSICS == NEWTON
float g(float u,float m,float b)
{
  return (-u);
}
#else
float g(float u,float m,float b)
{
  return (3.e0f*m/b*pow(u,2)-u);
}
#endif

void calcul(float *us,float *vs,float up,float vp,
            float h,float m,float b)
{
  float c0,c1,c2,c3,d0,d1,d2,d3;

  c0=h*f(vp);
  c1=h*f(vp+c0/2.e0f);
  c2=h*f(vp+c1/2.e0f);
  c3=h*f(vp+c2);
  d0=h*g(up,m,b);
  d1=h*g(up+d0/2.e0f,m,b);
  d2=h*g(up+d1/2.e0f,m,b);
  d3=h*g(up+d2,m,b);

  *us=up+(c0+2.e0f*c1+2.e0f*c2+c3)/6.e0f;
  *vs=vp+(d0+2.e0f*d1+2.e0f*d2+d3)/6.e0f;
}

void rungekutta(float *ps,float *us,float *vs,
                float pp,float up,float vp,
                float h,float m,float b)
{
  calcul(us,vs,up,vp,h,m,b);
  *ps=pp+h;
}

float decalage_spectral(float r,float b,float phi,
                        float tho,float m)
{
  return (sqrt(1-3*m/r)/(1+sqrt(m/pow(r,3))*b*sin(tho)*sin(phi)));
}

float spectre(float rf,int q,float b,float db,
              float h,float r,float m,float bss)
{
  float flx;

//  flx=exp(q*log(r/m))*pow(rf,4)*b*db*h;
  flx=exp(q*log(r/m)+4.e0f*log(rf))*b*db*h;
  return(flx);
}

float spectre_cn(float rf32,float b32,float db32,
                 float h32,float r32,float m32,float bss32)
{

#define MYFLOAT float

  MYFLOAT rf=(MYFLOAT)(rf32);
  MYFLOAT b=(MYFLOAT)(b32);
  MYFLOAT db=(MYFLOAT)(db32);
  MYFLOAT h=(MYFLOAT)(h32);
  MYFLOAT r=(MYFLOAT)(r32);
  MYFLOAT m=(MYFLOAT)(m32);
  MYFLOAT bss=(MYFLOAT)(bss32);

  MYFLOAT flx;
  MYFLOAT nu_rec,nu_em,qu,temp_em,flux_int;
  int fi,posfreq;

#define planck 6.62e-34f
#define k 1.38e-23f
#define c2 9.e16f
#define temp 3.e7f
#define m_point 1.e0f

#define lplanck (log(6.62e0f)-34.e0f*log(10.e0f))
#define lk (log(1.38e0f)-23.e0f*log(10.e0f))
#define lc2 (log(9.e0f)+16.e0f*log(10.e0f))

  MYFLOAT v=1.e0f-3.e0f/r;

  qu=1.e0f/sqrt((1.e0f-3.e0f/r)*r)*(sqrt(r)-sqrt(6.e0f)+sqrt(3.e0f)/2.e0f*log((sqrt(r)+sqrt(3.e0f))/(sqrt(r)-sqrt(3.e0f))* 0.17157287525380988e0f ));  // # noqa: E501

  temp_em=temp*sqrt(m)*exp(0.25e0f*log(m_point)-0.75e0f*log(r)-0.125e0f*log(v)+0.25e0f*log(fabs(qu)));

  flux_int=0.e0f;
  flx=0.e0f;

  for (fi=0;fi<nbr;fi++)
    {
      nu_em=bss*(MYFLOAT)fi/(MYFLOAT)nbr;
      nu_rec=nu_em*rf;
      posfreq=(int)(nu_rec*(MYFLOAT)nbr/bss);
      if ((posfreq>0)&&(posfreq<nbr))
        {
          // Initial version
          // flux_int=2.*planck/c2*pow(nu_em,3)/(exp(planck*nu_em/(k*temp_em))-1.);
          // Version with log used
          //flux_int=2.*exp(lplanck-lc2+3.*log(nu_em))/(exp(exp(lplanck-lk+log(nu_em/temp_em)))-1.);
          // flux_int*=pow(rf,3)*b*db*h;
          //flux_int*=exp(3.e0f*log(rf))*b*db*h;
          flux_int=2.e0f*exp(lplanck-lc2+3.e0f*log(nu_em))/(exp(exp(lplanck-lk+log(nu_em/temp_em)))-1.e0f)*exp(3.e0f*log(rf))*b*db*h;

          flx+=flux_int;
        }
    }

  return((float)(flx));
}

void impact(float phi,float r,float b,float tho,float m,
            float *zp,float *fp,
            int q,float db,
            float h,int raie)
{
  float flx,rf,bss;

  rf=decalage_spectral(r,b,phi,tho,m);

  if (raie==0)
    {
      bss=1.e19f;
      flx=spectre_cn(rf,b,db,h,r,m,bss);
    }
  else
    {
      bss=2.e0f;
      flx=spectre(rf,q,b,db,h,r,m,bss);
    }

  *zp=1.e0f/rf;
  *fp=flx;

}

__kernel void EachPixel(__global float *zImage,__global float *fImage,
                        float Mass,float InternalRadius,
                        float ExternalRadius,float Angle,
                        int Line)
{
   uint xi=(uint)get_global_id(0);
   uint yi=(uint)get_global_id(1);
   uint sizex=(uint)get_global_size(0);
   uint sizey=(uint)get_global_size(1);

   // Perform trajectory for each pixel, exit on hit

  float m,rs,ri,re,tho;
  int q,raie;

  m=Mass;
  rs=2.e0f*m;
  ri=InternalRadius;
  re=ExternalRadius;
  tho=Angle;
  q=-2;
  raie=Line;

  float bmx,db,b,h;
  float rp0,rps;
  float phi,phd;
  uint nh=0;
  float zp=0.e0f,fp=0.e0f;

  // Autosize for image
  bmx=1.25e0f*re;

  h=4.e0f*PI/(float)TRACKPOINTS;

  // set origin as center of image
  float x=(float)xi-(float)(sizex/2)+(float)5.e-1f;
  float y=(float)yi-(float)(sizey/2)+(float)5.e-1f;
  // angle extracted from cylindric symmetry
  phi=atanp(x,y);
  phd=atanp(cos(phi)*sin(tho),cos(tho));


  float up,vp,pp,us,vs,ps;

  // impact parameter
  b=sqrt(x*x+y*y)*(float)2.e0f/(float)sizex*bmx;
  // step of impact parameter;
  db=bmx/(float)(sizex);

  up=0.e0f;
  vp=1.e0f;
  pp=0.e0f;

  rungekutta(&ps,&us,&vs,pp,up,vp,h,m,b);

  rps=fabs(b/us);
  rp0=rps;

  int ExitOnImpact=0;

  do
  {
     nh++;
     pp=ps;
     up=us;
     vp=vs;
     rungekutta(&ps,&us,&vs,pp,up,vp,h,m,b);
     rps=fabs(b/us);
     ExitOnImpact = ((fmod(pp,PI)<fmod(phd,PI))&&(fmod(ps,PI)>fmod(phd,PI)))&&(rps>=ri)&&(rps<=re)?1:0;

  } while ((rps>=rs)&&(rps<=rp0)&&(ExitOnImpact==0)&&(nh<TRACKPOINTS));


  if (ExitOnImpact==1) {
     impact(phi,rps,b,tho,m,&zp,&fp,q,db,h,raie);
  }
  else
  {
     zp=0.e0f;
     fp=0.e0f;
  }

  barrier(CLK_GLOBAL_MEM_FENCE);

  zImage[yi+sizex*xi]=(float)zp;
  fImage[yi+sizex*xi]=(float)fp;
}

__kernel void Pixel(__global float *zImage,__global float *fImage,
                    __global float *Trajectories,__global int *IdLast,
                    uint ImpactParameter,
                    float Mass,float InternalRadius,
                    float ExternalRadius,float Angle,
                    int Line)
{
   uint xi=(uint)get_global_id(0);
   uint yi=(uint)get_global_id(1);
   uint sizex=(uint)get_global_size(0);
   uint sizey=(uint)get_global_size(1);

   // Perform trajectory for each pixel

  float m,ri,re,tho;
  int q,raie;

  m=Mass;
  ri=InternalRadius;
  re=ExternalRadius;
  tho=Angle;
  q=-2;
  raie=Line;

  float bmx,db,b,h;
  float phi,phd,php,nr,r;
  float zp=0.e0f,fp=0.e0f;

  // Autosize for image, 25% greater than external radius
  bmx=1.25e0f*re;

  // Angular step of integration
  h=4.e0f*PI/(float)TRACKPOINTS;

  // Step of Impact Parameter
  db=bmx/(2.e0f*(float)ImpactParameter);

  // set origin as center of image
  float x=(float)xi-(float)(sizex/2)+(float)5.e-1f;
  float y=(float)yi-(float)(sizey/2)+(float)5.e-1f;

  // angle extracted from cylindric symmetry
  phi=atanp(x,y);
  phd=atanp(cos(phi)*sin(tho),cos(tho));

  // Real Impact Parameter
  b=sqrt(x*x+y*y)*bmx/(float)ImpactParameter;

  // Integer Impact Parameter
  uint bi=(uint)sqrt(x*x+y*y);

  int HalfLap=0,ExitOnImpact=0,ni;

  if (bi<ImpactParameter)
  {
    do
    {
      php=phd+(float)HalfLap*PI;
      nr=php/h;
      ni=(int)nr;

      if (ni<IdLast[bi])
      {
        r=(Trajectories[bi*TRACKPOINTS+ni+1]-Trajectories[bi*TRACKPOINTS+ni])*(nr-ni*1.e0f)+Trajectories[bi*TRACKPOINTS+ni];
      }
      else
      {
        r=Trajectories[bi*TRACKPOINTS+ni];
      }

      if ((r<=re)&&(r>=ri))
      {
        ExitOnImpact=1;
        impact(phi,r,b,tho,m,&zp,&fp,q,db,h,raie);
      }

      HalfLap++;
    } while ((HalfLap<=2)&&(ExitOnImpact==0));

  }

  barrier(CLK_GLOBAL_MEM_FENCE);

  zImage[yi+sizex*xi]=zp;
  fImage[yi+sizex*xi]=fp;
}

__kernel void Circle(__global float *Trajectories,__global int *IdLast,
                     __global float *zImage,__global float *fImage,
                     float Mass,float InternalRadius,
                     float ExternalRadius,float Angle,
                     int Line)
{
   // Integer Impact Parameter ID
   int bi=get_global_id(0);
   // Integer points on circle
   int i=get_global_id(1);
   // Integer Impact Parameter Size (half of image)
   int bmaxi=get_global_size(0);
   // Integer Points on circle
   int imx=get_global_size(1);

   // Perform trajectory for each pixel

  float m,ri,re,tho;
  int q,raie;

  m=Mass;
  ri=InternalRadius;
  re=ExternalRadius;
  tho=Angle;
  raie=Line;

  float bmx,db,b,h;
  float phi,phd;
  float zp=0.e0f,fp=0.e0f;

  // Autosize for image
  bmx=1.25e0f*re;

  // Angular step of integration
  h=4.e0f*PI/(float)TRACKPOINTS;

  // impact parameter
  b=(float)bi/(float)bmaxi*bmx;
  db=bmx/(2.e0f*(float)bmaxi);

  phi=2.e0f*PI/(float)imx*(float)i;
  phd=atanp(cos(phi)*sin(tho),cos(tho));
  int yi=(int)((float)bi*sin(phi))+bmaxi;
  int xi=(int)((float)bi*cos(phi))+bmaxi;

  int HalfLap=0,ExitOnImpact=0,ni;
  float php,nr,r;

  do
  {
     php=phd+(float)HalfLap*PI;
     nr=php/h;
     ni=(int)nr;

     if (ni<IdLast[bi])
     {
        r=(Trajectories[bi*TRACKPOINTS+ni+1]-Trajectories[bi*TRACKPOINTS+ni])*(nr-ni*1.e0f)+Trajectories[bi*TRACKPOINTS+ni];
     }
     else
     {
        r=Trajectories[bi*TRACKPOINTS+ni];
     }

     if ((r<=re)&&(r>=ri))
     {
        ExitOnImpact=1;
        impact(phi,r,b,tho,m,&zp,&fp,q,db,h,raie);
     }

     HalfLap++;
  } while ((HalfLap<=2)&&(ExitOnImpact==0));

  zImage[yi+2*bmaxi*xi]=zp;
  fImage[yi+2*bmaxi*xi]=fp;

  barrier(CLK_GLOBAL_MEM_FENCE);

}

__kernel void Trajectory(__global float *Trajectories,__global int *IdLast,
                         float Mass,float InternalRadius,
                         float ExternalRadius,float Angle,
                         int Line)
{
  // Integer Impact Parameter ID
  int bi=get_global_id(0);
  // Integer Impact Parameter Size (half of image)
  int bmaxi=get_global_size(0);

  // Perform trajectory for each pixel

  float m,rs,re;

  m=Mass;
  rs=2.e0f*m;
  re=ExternalRadius;

  float bmx,b,h;
  int nh;

  // Autosize for image
  bmx=1.25e0f*re;

  // Angular step of integration
  h=4.e0f*PI/(float)TRACKPOINTS;

  // impact parameter
  b=(float)bi/(float)bmaxi*bmx;

  float up,vp,pp,us,vs,ps;

  up=0.e0f;
  vp=1.e0f;

  pp=0.e0f;
  nh=0;

  rungekutta(&ps,&us,&vs,pp,up,vp,h,m,b);

  // b versus us
  float bvus=fabs(b/us);
  float bvus0=bvus;
  Trajectories[bi*TRACKPOINTS+nh]=bvus;

  do
  {
     nh++;
     pp=ps;
     up=us;
     vp=vs;
     rungekutta(&ps,&us,&vs,pp,up,vp,h,m,b);
     bvus=fabs(b/us);
     Trajectories[bi*TRACKPOINTS+nh]=bvus;

  } while ((bvus>=rs)&&(bvus<=bvus0));

  IdLast[bi]=nh;

  barrier(CLK_GLOBAL_MEM_FENCE);

}

__kernel void EachCircle(__global float *zImage,__global float *fImage,
                         float Mass,float InternalRadius,
                         float ExternalRadius,float Angle,
                         int Line)
{
   // Integer Impact Parameter ID
   uint bi=(uint)get_global_id(0);
   // Integer Impact Parameter Size (half of image)
   uint bmaxi=(uint)get_global_size(0);

  private float Trajectory[TRACKPOINTS];

  float m,rs,ri,re,tho;
  int raie,q;

  m=Mass;
  rs=2.e0f*m;
  ri=InternalRadius;
  re=ExternalRadius;
  tho=Angle;
  q=-2;
  raie=Line;

  float bmx,db,b,h;
  uint nh;


  // Autosize for image
  bmx=1.25e0f*re;

  // Angular step of integration
  h=4.e0f*PI/(float)TRACKPOINTS;

  // impact parameter
  b=(float)bi/(float)bmaxi*bmx;
  db=bmx/(2.e0f*(float)bmaxi);

  float up,vp,pp,us,vs,ps;

  up=0.e0f;
  vp=1.e0f;

  pp=0.e0f;
  nh=0;

  rungekutta(&ps,&us,&vs,pp,up,vp,h,m,b);

  // b versus us
  float bvus=fabs(b/us);
  float bvus0=bvus;
  Trajectory[nh]=bvus;

  do
  {
     nh++;
     pp=ps;
     up=us;
     vp=vs;
     rungekutta(&ps,&us,&vs,pp,up,vp,h,m,b);
     bvus=(float)fabs(b/us);
     Trajectory[nh]=bvus;

  } while ((bvus>=rs)&&(bvus<=bvus0));


  for (uint i=(uint)nh+1;i<TRACKPOINTS;i++) {
     Trajectory[i]=0.e0f;
  }


  uint imx=(uint)(16*bi);

  for (uint i=0;i<imx;i++)
  {
     float zp=0.e0f,fp=0.e0f;
     float phi=2.e0f*PI/(float)imx*(float)i;
     float phd=atanp(cos(phi)*sin(tho),cos(tho));
     uint yi=(uint)((float)bi*sin(phi)+bmaxi);
     uint xi=(uint)((float)bi*cos(phi)+bmaxi);

     uint HalfLap=0,ExitOnImpact=0,ni;
     float php,nr,r;

     do
     {
        php=phd+(float)HalfLap*PI;
        nr=php/h;
        ni=(int)nr;

        if (ni<nh)
        {
           r=(Trajectory[ni+1]-Trajectory[ni])*(nr-ni*1.e0f)+Trajectory[ni];
        }
        else
        {
           r=Trajectory[ni];
        }

        if ((r<=re)&&(r>=ri))
        {
           ExitOnImpact=1;
           impact(phi,r,b,tho,m,&zp,&fp,q,db,h,raie);
        }

        HalfLap++;

     } while ((HalfLap<=2)&&(ExitOnImpact==0));

     zImage[yi+2*bmaxi*xi]=zp;
     fImage[yi+2*bmaxi*xi]=fp;

  }

  barrier(CLK_GLOBAL_MEM_FENCE);

}

__kernel void Original(__global float *zImage,__global float *fImage,
                       uint Size,float Mass,float InternalRadius,
                       float ExternalRadius,float Angle,
                       int Line)
{
   // Integer Impact Parameter Size (half of image)
   uint bmaxi=(uint)Size;

   float Trajectory[TRACKPOINTS];

   // Perform trajectory for each pixel

   float m,rs,ri,re,tho;
   int raie,q;

   m=Mass;
   rs=2.e0f*m;
   ri=InternalRadius;
   re=ExternalRadius;
   tho=Angle;
   q=-2;
   raie=Line;

   float bmx,db,b,h;
   uint nh;

   // Autosize for image
   bmx=1.25e0f*re;

   // Angular step of integration
   h=4.e0f*PI/(float)TRACKPOINTS;

   // Integer Impact Parameter ID
   for (int bi=0;bi<bmaxi;bi++)
   {
      // impact parameter
      b=(float)bi/(float)bmaxi*bmx;
      db=bmx/(2.e0f*(float)bmaxi);

      float up,vp,pp,us,vs,ps;

      up=0.e0f;
      vp=1.e0f;

      pp=0.e0f;
      nh=0;

      rungekutta(&ps,&us,&vs,pp,up,vp,h,m,b);

      // b versus us
      float bvus=fabs(b/us);
      float bvus0=bvus;
      Trajectory[nh]=bvus;

      do
      {
         nh++;
         pp=ps;
         up=us;
         vp=vs;
         rungekutta(&ps,&us,&vs,pp,up,vp,h,m,b);
         bvus=fabs(b/us);
         Trajectory[nh]=bvus;

      } while ((bvus>=rs)&&(bvus<=bvus0));

      for (uint i=(uint)nh+1;i<TRACKPOINTS;i++) {
         Trajectory[i]=0.e0f;
      }

      int imx=(int)(16*bi);

      for (int i=0;i<imx;i++)
      {
         float zp=0.e0f,fp=0.e0f;
         float phi=2.e0f*PI/(float)imx*(float)i;
         float phd=atanp(cos(phi)*sin(tho),cos(tho));
         uint yi=(uint)((float)bi*sin(phi)+bmaxi);
         uint xi=(uint)((float)bi*cos(phi)+bmaxi);

         uint HalfLap=0,ExitOnImpact=0,ni;
         float php,nr,r;

         do
         {
            php=phd+(float)HalfLap*PI;
            nr=php/h;
            ni=(int)nr;

            if (ni<nh)
            {
               r=(Trajectory[ni+1]-Trajectory[ni])*(nr-ni*1.e0f)+Trajectory[ni];
            }
            else
            {
               r=Trajectory[ni];
            }

            if ((r<=re)&&(r>=ri))
            {
               ExitOnImpact=1;
               impact(phi,r,b,tho,m,&zp,&fp,q,db,h,raie);
            }

            HalfLap++;

         } while ((HalfLap<=2)&&(ExitOnImpact==0));

         zImage[yi+2*bmaxi*xi]=zp;
         fImage[yi+2*bmaxi*xi]=fp;

      }

   }

   barrier(CLK_GLOBAL_MEM_FENCE);

}
"""


def KernelCodeCuda():
    BlobCUDA = """

#define PI (float)3.14159265359
#define nbr 256

#define EINSTEIN 0
#define NEWTON 1

#ifdef SETTRACKPOINTS
#define TRACKPOINTS SETTRACKPOINTS
#else
#define TRACKPOINTS
#endif
__device__ float nothing(float x)
{
  return(x);
}

__device__ float atanp(float x,float y)
{
  float angle;

  angle=atan2(y,x);

  if (angle<0.e0f)
    {
      angle+=(float)2.e0f*PI;
    }

  return(angle);
}

__device__ float f(float v)
{
  return(v);
}

#if PHYSICS == NEWTON
__device__ float g(float u,float m,float b)
{
  return (-u);
}
#else
__device__ float g(float u,float m,float b)
{
  return (3.e0f*m/b*pow(u,2)-u);
}
#endif

__device__ void calcul(float *us,float *vs,float up,float vp,
                       float h,float m,float b)
{
  float c0,c1,c2,c3,d0,d1,d2,d3;

  c0=h*f(vp);
  c1=h*f(vp+c0/2.);
  c2=h*f(vp+c1/2.);
  c3=h*f(vp+c2);
  d0=h*g(up,m,b);
  d1=h*g(up+d0/2.,m,b);
  d2=h*g(up+d1/2.,m,b);
  d3=h*g(up+d2,m,b);

  *us=up+(c0+2.*c1+2.*c2+c3)/6.;
  *vs=vp+(d0+2.*d1+2.*d2+d3)/6.;
}

__device__ void rungekutta(float *ps,float *us,float *vs,
                           float pp,float up,float vp,
                           float h,float m,float b)
{
  calcul(us,vs,up,vp,h,m,b);
  *ps=pp+h;
}

__device__ float decalage_spectral(float r,float b,float phi,
                                   float tho,float m)
{
  return (sqrt(1-3*m/r)/(1+sqrt(m/pow(r,3))*b*sin(tho)*sin(phi)));
}

__device__ float spectre(float rf,int q,float b,float db,
                         float h,float r,float m,float bss)
{
  float flx;

//  flx=exp(q*log(r/m))*pow(rf,4)*b*db*h;
  flx=exp(q*log(r/m)+4.*log(rf))*b*db*h;
  return(flx);
}

__device__ float spectre_cn(float rf32,float b32,float db32,
                            float h32,float r32,float m32,float bss32)
{

#define MYFLOAT float

  MYFLOAT rf=(MYFLOAT)(rf32);
  MYFLOAT b=(MYFLOAT)(b32);
  MYFLOAT db=(MYFLOAT)(db32);
  MYFLOAT h=(MYFLOAT)(h32);
  MYFLOAT r=(MYFLOAT)(r32);
  MYFLOAT m=(MYFLOAT)(m32);
  MYFLOAT bss=(MYFLOAT)(bss32);

  MYFLOAT flx;
  MYFLOAT nu_rec,nu_em,qu,temp_em,flux_int;
  int fi,posfreq;

#define planck 6.62e-34
#define k 1.38e-23
#define c2 9.e16
#define temp 3.e7
#define m_point 1.

#define lplanck (log(6.62)-34.*log(10.))
#define lk (log(1.38)-23.*log(10.))
#define lc2 (log(9.)+16.*log(10.))

  MYFLOAT v=1.-3./r;

  qu=1./sqrt((1.-3./r)*r)*(sqrt(r)-sqrt(6.)+sqrt(3.)/2.*log((sqrt(r)+sqrt(3.))/(sqrt(r)-sqrt(3.))* 0.17157287525380988 ));  // # noqa: #051

  temp_em=temp*sqrt(m)*exp(0.25*log(m_point)-0.75*log(r)-0.125*log(v)+0.25*log(fabs(qu)));

  flux_int=0.;
  flx=0.;

  for (fi=0;fi<nbr;fi++)
    {
      nu_em=bss*(MYFLOAT)fi/(MYFLOAT)nbr;
      nu_rec=nu_em*rf;
      posfreq=(int)(nu_rec*(MYFLOAT)nbr/bss);
      if ((posfreq>0)&&(posfreq<nbr))
        {
          // Initial version
          // flux_int=2.*planck/c2*pow(nu_em,3)/(exp(planck*nu_em/(k*temp_em))-1.);
          // Version with log used
          //flux_int=2.*exp(lplanck-lc2+3.*log(nu_em))/(exp(exp(lplanck-lk+log(nu_em/temp_em)))-1.);
          // flux_int*=pow(rf,3)*b*db*h;
          //flux_int*=exp(3.*log(rf))*b*db*h;
          flux_int=2.*exp(lplanck-lc2+3.*log(nu_em))/(exp(exp(lplanck-lk+log(nu_em/temp_em)))-1.)*exp(3.*log(rf))*b*db*h;

          flx+=flux_int;
        }
    }

  return((float)(flx));
}

__device__ void impact(float phi,float r,float b,float tho,float m,
                       float *zp,float *fp,
                       int q,float db,
                       float h,int raie)
{
  float flx,rf,bss;

  rf=decalage_spectral(r,b,phi,tho,m);

  if (raie==0)
    {
      bss=1.e19;
      flx=spectre_cn(rf,b,db,h,r,m,bss);
    }
  else
    {
      bss=2.;
      flx=spectre(rf,q,b,db,h,r,m,bss);
    }

  *zp=1./rf;
  *fp=flx;

}

__global__ void EachPixel(float *zImage,float *fImage,
                          float Mass,float InternalRadius,
                          float ExternalRadius,float Angle,
                          int Line)
{
   uint xi=(uint)(blockIdx.x*blockDim.x+threadIdx.x);
   uint yi=(uint)(blockIdx.y*blockDim.y+threadIdx.y);
   uint sizex=(uint)gridDim.x*blockDim.x;
   uint sizey=(uint)gridDim.y*blockDim.y;


   // Perform trajectory for each pixel, exit on hit

  float m,rs,ri,re,tho;
  int q,raie;

  m=Mass;
  rs=2.*m;
  ri=InternalRadius;
  re=ExternalRadius;
  tho=Angle;
  q=-2;
  raie=Line;

  float bmx,db,b,h;
  float rp0,rpp,rps;
  float phi,phd;
  int nh;
  float zp,fp;

  // Autosize for image
  bmx=1.25*re;
  b=0.;

  h=4.e0f*PI/(float)TRACKPOINTS;

  // set origin as center of image
  float x=(float)xi-(float)(sizex/2)+(float)5e-1f;
  float y=(float)yi-(float)(sizey/2)+(float)5e-1f;
  // angle extracted from cylindric symmetry
  phi=atanp(x,y);
  phd=atanp(cos(phi)*sin(tho),cos(tho));

  float up,vp,pp,us,vs,ps;

  // impact parameter
  b=sqrt(x*x+y*y)*(float)2.e0f/(float)sizex*bmx;
  // step of impact parameter;
//  db=bmx/(float)(sizex/2);
  db=bmx/(float)(sizex);

  up=0.;
  vp=1.;
  pp=0.;
  nh=0;

  rungekutta(&ps,&us,&vs,pp,up,vp,h,m,b);

  rps=fabs(b/us);
  rp0=rps;

  int ExitOnImpact=0;

  do
  {
     nh++;
     pp=ps;
     up=us;
     vp=vs;
     rungekutta(&ps,&us,&vs,pp,up,vp,h,m,b);
     rpp=rps;
     rps=fabs(b/us);
     ExitOnImpact = ((fmod(pp,PI)<fmod(phd,PI))&&(fmod(ps,PI)>fmod(phd,PI)))&&(rps>ri)&&(rps<re)?1:0;

  } while ((rps>=rs)&&(rps<=rp0)&&(ExitOnImpact==0));

  if (ExitOnImpact==1) {
     impact(phi,rpp,b,tho,m,&zp,&fp,q,db,h,raie);
  }
  else
  {
     zp=0.e0f;
     fp=0.e0f;
  }

  __syncthreads();

  zImage[yi+sizex*xi]=(float)zp;
  fImage[yi+sizex*xi]=(float)fp;
}

__global__ void Pixel(float *zImage,float *fImage,
                      float *Trajectories,int *IdLast,
                      uint ImpactParameter,
                      float Mass,float InternalRadius,
                      float ExternalRadius,float Angle,
                      int Line)
{
   uint xi=(uint)(blockIdx.x*blockDim.x+threadIdx.x);
   uint yi=(uint)(blockIdx.y*blockDim.y+threadIdx.y);
   uint sizex=(uint)gridDim.x*blockDim.x;
   uint sizey=(uint)gridDim.y*blockDim.y;

  // Perform trajectory for each pixel

  float m,ri,re,tho;
  int q,raie;

  m=Mass;
  ri=InternalRadius;
  re=ExternalRadius;
  tho=Angle;
  q=-2;
  raie=Line;

  float bmx,db,b,h;
  float phi,phd,php,nr,r;
  float zp=0,fp=0;
  // Autosize for image, 25% greater than external radius
  bmx=1.25e0f*re;

  // Angular step of integration
  h=4.e0f*PI/(float)TRACKPOINTS;

  // Step of Impact Parameter
  db=bmx/(2.e0f*(float)ImpactParameter);

  // set origin as center of image
  float x=(float)xi-(float)(sizex/2)+(float)5e-1f;
  float y=(float)yi-(float)(sizey/2)+(float)5e-1f;
  // angle extracted from cylindric symmetry
  phi=atanp(x,y);
  phd=atanp(cos(phi)*sin(tho),cos(tho));

  // Real Impact Parameter
  b=sqrt(x*x+y*y)*bmx/(float)ImpactParameter;

  // Integer Impact Parameter
  uint bi=(uint)sqrt(x*x+y*y);

  int HalfLap=0,ExitOnImpact=0,ni;

  if (bi<ImpactParameter)
  {
    do
    {
      php=phd+(float)HalfLap*PI;
      nr=php/h;
      ni=(int)nr;

      if (ni<IdLast[bi])
      {
        r=(Trajectories[bi*TRACKPOINTS+ni+1]-Trajectories[bi*TRACKPOINTS+ni])*(nr-ni*1.e0f)+Trajectories[bi*TRACKPOINTS+ni];
      }
      else
      {
        r=Trajectories[bi*TRACKPOINTS+ni];
      }

      if ((r<=re)&&(r>=ri))
      {
        ExitOnImpact=1;
        impact(phi,r,b,tho,m,&zp,&fp,q,db,h,raie);
      }

      HalfLap++;
    } while ((HalfLap<=2)&&(ExitOnImpact==0));

  }

  zImage[yi+sizex*xi]=zp;
  fImage[yi+sizex*xi]=fp;
}

__global__ void Circle(float *Trajectories,int *IdLast,
                       float *zImage,float *fImage,
                       float Mass,float InternalRadius,
                       float ExternalRadius,float Angle,
                       int Line)
{
   // Integer Impact Parameter ID
   int bi=blockIdx.x*blockDim.x+threadIdx.x;
   // Integer points on circle
   int i=blockIdx.y*blockDim.y+threadIdx.y;
   // Integer Impact Parameter Size (half of image)
   int bmaxi=gridDim.x*blockDim.x;
   // Integer Points on circle
   int imx=gridDim.y*blockDim.y;

   // Perform trajectory for each pixel

  float m,ri,re,tho;
  int q,raie;

  m=Mass;
  ri=InternalRadius;
  re=ExternalRadius;
  tho=Angle;
  raie=Line;

  float bmx,db,b,h;
  float phi,phd;
  float zp=0,fp=0;

  // Autosize for image
  bmx=1.25e0f*re;

  // Angular step of integration
  h=4.e0f*PI/(float)TRACKPOINTS;

  // impact parameter
  b=(float)bi/(float)bmaxi*bmx;
  db=bmx/(2.e0f*(float)bmaxi);

  phi=2.e0f*PI/(float)imx*(float)i;
  phd=atanp(cos(phi)*sin(tho),cos(tho));
  int yi=(int)((float)bi*sin(phi))+bmaxi;
  int xi=(int)((float)bi*cos(phi))+bmaxi;

  int HalfLap=0,ExitOnImpact=0,ni;
  float php,nr,r;

  do
  {
     php=phd+(float)HalfLap*PI;
     nr=php/h;
     ni=(int)nr;

     if (ni<IdLast[bi])
     {
        r=(Trajectories[bi*TRACKPOINTS+ni+1]-Trajectories[bi*TRACKPOINTS+ni])*(nr-ni*1.e0f)+Trajectories[bi*TRACKPOINTS+ni];
     }
     else
     {
        r=Trajectories[bi*TRACKPOINTS+ni];
     }

     if ((r<=re)&&(r>=ri))
     {
        ExitOnImpact=1;
        impact(phi,r,b,tho,m,&zp,&fp,q,db,h,raie);
     }

     HalfLap++;
  } while ((HalfLap<=2)&&(ExitOnImpact==0));

  zImage[yi+2*bmaxi*xi]=zp;
  fImage[yi+2*bmaxi*xi]=fp;

}

__global__ void Trajectory(float *Trajectories,int *IdLast,
                           float Mass,float InternalRadius,
                           float ExternalRadius,float Angle,
                           int Line)
{
  // Integer Impact Parameter ID
  int bi=blockIdx.x*blockDim.x+threadIdx.x;
  // Integer Impact Parameter Size (half of image)
  int bmaxi=gridDim.x*blockDim.x;

  // Perform trajectory for each pixel

  float m,rs,re;

  m=Mass;
  rs=2.e0f*m;
  re=ExternalRadius;

  float bmx,b,h;
  int nh;

  // Autosize for image
  bmx=1.25e0f*re;

  // Angular step of integration
  h=4.e0f*PI/(float)TRACKPOINTS;

  // impact parameter
  b=(float)bi/(float)bmaxi*bmx;

  float up,vp,pp,us,vs,ps;

  up=0.e0f;
  vp=1.e0f;
  pp=0.e0f;
  nh=0;

  rungekutta(&ps,&us,&vs,pp,up,vp,h,m,b);

  // b versus us
  float bvus=fabs(b/us);
  float bvus0=bvus;
  Trajectories[bi*TRACKPOINTS+nh]=bvus;

  do
  {
     nh++;
     pp=ps;
     up=us;
     vp=vs;
     rungekutta(&ps,&us,&vs,pp,up,vp,h,m,b);
     bvus=fabs(b/us);
     Trajectories[bi*TRACKPOINTS+nh]=bvus;

  } while ((bvus>=rs)&&(bvus<=bvus0));

  IdLast[bi]=nh;

}

__global__ void EachCircle(float *zImage,float *fImage,
                           float Mass,float InternalRadius,
                           float ExternalRadius,float Angle,
                           int Line)
{
  // Integer Impact Parameter ID
  int bi=blockIdx.x*blockDim.x+threadIdx.x;

  // Integer Impact Parameter Size (half of image)
  int bmaxi=gridDim.x*blockDim.x;

  float Trajectory[2048];

  // Perform trajectory for each pixel

  float m,rs,ri,re,tho;
  int raie,q;

  m=Mass;
  rs=2.*m;
  ri=InternalRadius;
  re=ExternalRadius;
  tho=Angle;
  q=-2;
  raie=Line;

  float bmx,db,b,h;
  int nh;

  // Autosize for image
  bmx=1.25e0f*re;

  // Angular step of integration
  h=4.e0f*PI/(float)TRACKPOINTS;

  // impact parameter
  b=(float)bi/(float)bmaxi*bmx;
  db=bmx/(2.e0f*(float)bmaxi);

  float up,vp,pp,us,vs,ps;

  up=0.;
  vp=1.;
  pp=0.;
  nh=0;

  rungekutta(&ps,&us,&vs,pp,up,vp,h,m,b);

  // b versus us
  float bvus=fabs(b/us);
  float bvus0=bvus;
  Trajectory[nh]=bvus;

  do
  {
     nh++;
     pp=ps;
     up=us;
     vp=vs;
     rungekutta(&ps,&us,&vs,pp,up,vp,h,m,b);
     bvus=fabs(b/us);
     Trajectory[nh]=bvus;

  } while ((bvus>=rs)&&(bvus<=bvus0));

  int imx=(int)(16*bi);

  for (int i=0;i<imx;i++)
  {
     float zp=0,fp=0;
     float phi=2.*PI/(float)imx*(float)i;
     float phd=atanp(cos(phi)*sin(tho),cos(tho));
     uint yi=(uint)((float)bi*sin(phi)+bmaxi);
     uint xi=(uint)((float)bi*cos(phi)+bmaxi);

     int HalfLap=0,ExitOnImpact=0,ni;
     float php,nr,r;

     do
     {
        php=phd+(float)HalfLap*PI;
        nr=php/h;
        ni=(int)nr;

        if (ni<nh)
        {
           r=(Trajectory[ni+1]-Trajectory[ni])*(nr-ni*1.)+Trajectory[ni];
        }
        else
        {
           r=Trajectory[ni];
        }

        if ((r<=re)&&(r>=ri))
        {
           ExitOnImpact=1;
           impact(phi,r,b,tho,m,&zp,&fp,q,db,h,raie);
        }

        HalfLap++;

     } while ((HalfLap<=2)&&(ExitOnImpact==0));

   __syncthreads();

   zImage[yi+2*bmaxi*xi]=zp;
   fImage[yi+2*bmaxi*xi]=fp;

  }

}

__global__ void Original(float *zImage,float *fImage,
                         uint Size,float Mass,float InternalRadius,
                         float ExternalRadius,float Angle,
                         int Line)
{
   // Integer Impact Parameter Size (half of image)
   uint bmaxi=(uint)Size;

   float Trajectory[TRACKPOINTS];

   // Perform trajectory for each pixel

   float m,rs,ri,re,tho;
   int raie,q;

   m=Mass;
   rs=2.e0f*m;
   ri=InternalRadius;
   re=ExternalRadius;
   tho=Angle;
   q=-2;
   raie=Line;

   float bmx,db,b,h;
   int nh;

   // Autosize for image
   bmx=1.25e0f*re;

   // Angular step of integration
   h=4.e0f*PI/(float)TRACKPOINTS;

   // Integer Impact Parameter ID
   for (int bi=0;bi<bmaxi;bi++)
   {
      // impact parameter
      b=(float)bi/(float)bmaxi*bmx;
      db=bmx/(2.e0f*(float)bmaxi);

      float up,vp,pp,us,vs,ps;

      up=0.;
      vp=1.;
      pp=0.;
      nh=0;

      rungekutta(&ps,&us,&vs,pp,up,vp,h,m,b);

      // b versus us
      float bvus=fabs(b/us);
      float bvus0=bvus;
      Trajectory[nh]=bvus;

      do
      {
         nh++;
         pp=ps;
         up=us;
         vp=vs;
         rungekutta(&ps,&us,&vs,pp,up,vp,h,m,b);
         bvus=fabs(b/us);
         Trajectory[nh]=bvus;

      } while ((bvus>=rs)&&(bvus<=bvus0));

      for (uint i=(uint)nh+1;i<TRACKPOINTS;i++) {
         Trajectory[i]=0.e0f;
      }

      int imx=(int)(16*bi);

      for (int i=0;i<imx;i++)
      {
         float zp=0,fp=0;
         float phi=2.e0f*PI/(float)imx*(float)i;
         float phd=atanp(cos(phi)*sin(tho),cos(tho));
         uint yi=(uint)((float)bi*sin(phi)+bmaxi);
         uint xi=(uint)((float)bi*cos(phi)+bmaxi);

         int HalfLap=0,ExitOnImpact=0,ni;
         float php,nr,r;

         do
         {
            php=phd+(float)HalfLap*PI;
            nr=php/h;
            ni=(int)nr;

            if (ni<nh)
            {
               r=(Trajectory[ni+1]-Trajectory[ni])*(nr-ni*1.)+Trajectory[ni];
            }
            else
            {
               r=Trajectory[ni];
            }

            if ((r<=re)&&(r>=ri))
            {
               ExitOnImpact=1;
               impact(phi,r,b,tho,m,&zp,&fp,q,db,h,raie);
            }

            HalfLap++;

         } while ((HalfLap<=2)&&(ExitOnImpact==0));

         zImage[yi+2*bmaxi*xi]=zp;
         fImage[yi+2*bmaxi*xi]=fp;

      }

   }

}
"""
    return BlobCUDA


# def ImageOutput(sigma,prefix,Colors):
#     import matplotlib.pyplot as plt
#     start_time=time.time()
#     if Colors == 'Red2Yellow':
#         plt.imsave("%s.png" % prefix, sigma, cmap='afmhot')
#     else:
#         plt.imsave("%s.png" % prefix, sigma, cmap='Greys_r')
#     save_time = time.time()-start_time
#     print("Save image as %s.png file" % prefix)
#     print("Save Time : %f" % save_time)


def ImageOutput(sigma, prefix, Colors):
    from PIL import Image

    Max = sigma.max()
    Min = sigma.min()
    # Normalize value as 8bits Integer
    SigmaInt = (255 * (sigma - Min) / (Max - Min)).astype("uint8")
    image = Image.fromarray(SigmaInt)
    image.save("%s.jpg" % prefix)


def BlackHoleCL(zImage, fImage, InputCL):
    Device = InputCL["Device"]
    Mass = InputCL["Mass"]
    InternalRadius = InputCL["InternalRadius"]
    ExternalRadius = InputCL["ExternalRadius"]
    Angle = InputCL["Angle"]
    Method = InputCL["Method"]
    TrackPoints = InputCL["TrackPoints"]
    Physics = InputCL["Physics"]
    NoImage = InputCL["NoImage"]
    TrackSave = InputCL["TrackSave"]

    PhysicsList = DictionariesAPI()

    if InputCL["BlackBody"]:
        # Spectrum is Black Body one
        Line = 0
    else:
        # Spectrum is Monochromatic Line one
        Line = 1

    Trajectories = numpy.zeros(
        (int(InputCL["Size"] / 2), InputCL["TrackPoints"]), dtype=numpy.float32
    )
    IdLast = numpy.zeros(int(InputCL["Size"] / 2), dtype=numpy.int32)

    # Je detecte un peripherique GPU dans la liste des peripheriques
    Id = 0
    HasXPU = False
    for platform in cl.get_platforms():
        for device in platform.get_devices():
            if Id == Device:
                PF4XPU = platform.name
                XPU = device
                print("CPU/GPU selected: ", device.name.lstrip())
                HasXPU = True
            Id += 1

    if not HasXPU:
        print("No XPU #%i found in all of %i devices, sorry..." % (Device, Id - 1))
        sys.exit()

    ctx = cl.Context([XPU])
    queue = cl.CommandQueue(
        ctx, properties=cl.command_queue_properties.PROFILING_ENABLE
    )

    BuildOptions = "-DPHYSICS=%i -DSETTRACKPOINTS=%i " % (
        PhysicsList[Physics],
        InputCL["TrackPoints"],
    )

    print("My Platform is ", PF4XPU)

    if (
        "Intel" in PF4XPU
        or "Experimental" in PF4XPU
        or "Clover" in PF4XPU
        or "Portable" in PF4XPU
    ):
        print("No extra options for Intel and Clover!")
    else:
        BuildOptions = BuildOptions + " -cl-mad-enable"

    BlackHoleCL = cl.Program(ctx, BlobOpenCL).build(options=BuildOptions)

    # Je recupere les flag possibles pour les buffers
    mf = cl.mem_flags

    if Method == "TrajectoPixel" or Method == "TrajectoCircle":
        TrajectoriesCL = cl.Buffer(
            ctx, mf.WRITE_ONLY | mf.COPY_HOST_PTR, hostbuf=Trajectories
        )
        IdLastCL = cl.Buffer(ctx, mf.WRITE_ONLY | mf.COPY_HOST_PTR, hostbuf=IdLast)

    zImageCL = cl.Buffer(ctx, mf.WRITE_ONLY | mf.COPY_HOST_PTR, hostbuf=zImage)
    fImageCL = cl.Buffer(ctx, mf.WRITE_ONLY | mf.COPY_HOST_PTR, hostbuf=fImage)

    start_time = time.time()

    if Method == "EachPixel":
        CLLaunch = BlackHoleCL.EachPixel(
            queue,
            (zImage.shape[0], zImage.shape[1]),
            None,
            zImageCL,
            fImageCL,
            numpy.float32(Mass),
            numpy.float32(InternalRadius),
            numpy.float32(ExternalRadius),
            numpy.float32(Angle),
            numpy.int32(Line),
        )
        CLLaunch.wait()
    elif Method == "Original":
        CLLaunch = BlackHoleCL.Original(
            queue,
            (1,),
            None,
            zImageCL,
            fImageCL,
            numpy.uint32(zImage.shape[0] / 2),
            numpy.float32(Mass),
            numpy.float32(InternalRadius),
            numpy.float32(ExternalRadius),
            numpy.float32(Angle),
            numpy.int32(Line),
        )
        CLLaunch.wait()
    elif Method == "EachCircle":
        CLLaunch = BlackHoleCL.EachCircle(
            queue,
            (int(zImage.shape[0] / 2),),
            None,
            zImageCL,
            fImageCL,
            numpy.float32(Mass),
            numpy.float32(InternalRadius),
            numpy.float32(ExternalRadius),
            numpy.float32(Angle),
            numpy.int32(Line),
        )
        CLLaunch.wait()
    elif Method == "TrajectoCircle":
        CLLaunch = BlackHoleCL.Trajectory(
            queue,
            (Trajectories.shape[0],),
            None,
            TrajectoriesCL,
            IdLastCL,
            numpy.float32(Mass),
            numpy.float32(InternalRadius),
            numpy.float32(ExternalRadius),
            numpy.float32(Angle),
            numpy.int32(Line),
        )

        CLLaunch = BlackHoleCL.Circle(
            queue,
            (Trajectories.shape[0], int(zImage.shape[0] * 4)),
            None,
            TrajectoriesCL,
            IdLastCL,
            zImageCL,
            fImageCL,
            numpy.float32(Mass),
            numpy.float32(InternalRadius),
            numpy.float32(ExternalRadius),
            numpy.float32(Angle),
            numpy.int32(Line),
        )
        CLLaunch.wait()
    else:
        CLLaunch = BlackHoleCL.Trajectory(
            queue,
            (Trajectories.shape[0],),
            None,
            TrajectoriesCL,
            IdLastCL,
            numpy.float32(Mass),
            numpy.float32(InternalRadius),
            numpy.float32(ExternalRadius),
            numpy.float32(Angle),
            numpy.int32(Line),
        )

        CLLaunch = BlackHoleCL.Pixel(
            queue,
            (zImage.shape[0], zImage.shape[1]),
            None,
            zImageCL,
            fImageCL,
            TrajectoriesCL,
            IdLastCL,
            numpy.uint32(Trajectories.shape[0]),
            numpy.float32(Mass),
            numpy.float32(InternalRadius),
            numpy.float32(ExternalRadius),
            numpy.float32(Angle),
            numpy.int32(Line),
        )
        CLLaunch.wait()

    compute = time.time() - start_time

    cl.enqueue_copy(queue, zImage, zImageCL).wait()
    cl.enqueue_copy(queue, fImage, fImageCL).wait()
    if Method == "TrajectoPixel" or Method == "TrajectoCircle":
        cl.enqueue_copy(queue, Trajectories, TrajectoriesCL).wait()
        cl.enqueue_copy(queue, IdLast, IdLastCL).wait()
    elapsed = time.time() - start_time
    print("\nCompute Time : %f" % compute)
    print("Elapsed Time : %f\n" % elapsed)

    zMaxPosition = numpy.where(zImage[:, :] == zImage.max())
    fMaxPosition = numpy.where(fImage[:, :] == fImage.max())
    print(
        "Z max @(%f,%f) : %f"
        % (
            (
                1.0 * zMaxPosition[1][0] / zImage.shape[1] - 0.5,
                1.0 * zMaxPosition[0][0] / zImage.shape[0] - 0.5,
                zImage.max(),
            )
        )
    )
    print(
        "Flux max @(%f,%f) : %f"
        % (
            (
                1.0 * fMaxPosition[1][0] / fImage.shape[1] - 0.5,
                1.0 * fMaxPosition[0][0] / fImage.shape[0] - 0.5,
                fImage.max(),
            )
        )
    )
    zImageCL.release()
    fImageCL.release()

    if Method == "TrajectoPixel" or Method == "TrajectoCircle":
        if not NoImage:
            AngleStep = 4 * numpy.pi / TrackPoints
            Angles = numpy.arange(0.0, 4 * numpy.pi, AngleStep)
            Angles.shape = (1, TrackPoints)

            if TrackSave:
                # numpy.savetxt("TrouNoirTrajectories_%s.csv" % ImageInfo,
                #               numpy.transpose(numpy.concatenate((Angles,Trajectories),axis=0)),
                #               delimiter=' ', fmt='%.2e')
                numpy.savetxt(
                    "TrouNoirTrajectories.csv",
                    numpy.transpose(numpy.concatenate((Angles, Trajectories),
                        axis=0)),
                    delimiter=" ",
                    fmt="%.2e",
                )

        TrajectoriesCL.release()
        IdLastCL.release()

    return elapsed


def BlackHoleCUDA(zImage, fImage, InputCL):
    Device = InputCL["Device"]
    Mass = InputCL["Mass"]
    InternalRadius = InputCL["InternalRadius"]
    ExternalRadius = InputCL["ExternalRadius"]
    Angle = InputCL["Angle"]
    Method = InputCL["Method"]
    TrackPoints = InputCL["TrackPoints"]
    Physics = InputCL["Physics"]
    Threads = InputCL["Threads"]

    PhysicsList = DictionariesAPI()

    if InputCL["BlackBody"]:
        # Spectrum is Black Body one
        Line = 0
    else:
        # Spectrum is Monochromatic Line one
        Line = 1

    Trajectories = numpy.zeros(
        (int(InputCL["Size"] / 2), InputCL["TrackPoints"]), dtype=numpy.float32
    )
    IdLast = numpy.zeros(int(InputCL["Size"] / 2), dtype=numpy.int32)

    try:
        # For PyCUDA import
        import pycuda.driver as cuda
        from pycuda.compiler import SourceModule

        cuda.init()
        for Id in range(cuda.Device.count()):
            if Id == Device:
                XPU = cuda.Device(Id)
                print("GPU selected %s" % XPU.name())
        print

    except ImportError:
        print("Platform does not seem to support CUDA")

    Context = XPU.make_context()

    try:
        mod = SourceModule(
            KernelCodeCuda(),
            options=[
                "--compiler-options",
                "-DPHYSICS=%i -DSETTRACKPOINTS=%i"
                % (PhysicsList[Physics], TrackPoints),
            ],
        )
        print("Compilation seems to be OK")
    except Exception:
        print("Compilation seems to break")

    EachPixelCU = mod.get_function("EachPixel")
    OriginalCU = mod.get_function("Original")
    EachCircleCU = mod.get_function("EachCircle")
    TrajectoryCU = mod.get_function("Trajectory")
    PixelCU = mod.get_function("Pixel")
    CircleCU = mod.get_function("Circle")

    TrajectoriesCU = cuda.mem_alloc(Trajectories.size * Trajectories.dtype.itemsize)
    cuda.memcpy_htod(TrajectoriesCU, Trajectories)
    zImageCU = cuda.mem_alloc(zImage.size * zImage.dtype.itemsize)
    cuda.memcpy_htod(zImageCU, zImage)
    fImageCU = cuda.mem_alloc(fImage.size * fImage.dtype.itemsize)
    cuda.memcpy_htod(zImageCU, fImage)
    IdLastCU = cuda.mem_alloc(IdLast.size * IdLast.dtype.itemsize)
    cuda.memcpy_htod(IdLastCU, IdLast)

    start_time = time.time()

    if Method == "EachPixel":
        EachPixelCU(
            zImageCU,
            fImageCU,
            numpy.float32(Mass),
            numpy.float32(InternalRadius),
            numpy.float32(ExternalRadius),
            numpy.float32(Angle),
            numpy.int32(Line),
            grid=(int(zImage.shape[0] / Threads), int(zImage.shape[1] / Threads)),
            block=(Threads, Threads, 1),
        )
    elif Method == "EachCircle":
        EachCircleCU(
            zImageCU,
            fImageCU,
            numpy.float32(Mass),
            numpy.float32(InternalRadius),
            numpy.float32(ExternalRadius),
            numpy.float32(Angle),
            numpy.int32(Line),
            grid=(int(zImage.shape[0] / Threads / 2), 1),
            block=(Threads, 1, 1),
        )
    elif Method == "Original":
        OriginalCU(
            zImageCU,
            fImageCU,
            numpy.uint32(zImage.shape[0] / 2),
            numpy.float32(Mass),
            numpy.float32(InternalRadius),
            numpy.float32(ExternalRadius),
            numpy.float32(Angle),
            numpy.int32(Line),
            grid=(1, 1),
            block=(1, 1, 1),
        )
    elif Method == "TrajectoCircle":
        TrajectoryCU(
            TrajectoriesCU,
            IdLastCU,
            numpy.float32(Mass),
            numpy.float32(InternalRadius),
            numpy.float32(ExternalRadius),
            numpy.float32(Angle),
            numpy.int32(Line),
            grid=(int(Trajectories.shape[0] / Threads), 1),
            block=(Threads, 1, 1),
        )

        CircleCU(
            TrajectoriesCU,
            IdLastCU,
            zImageCU,
            fImageCU,
            numpy.float32(Mass),
            numpy.float32(InternalRadius),
            numpy.float32(ExternalRadius),
            numpy.float32(Angle),
            numpy.int32(Line),
            grid=(
                int(Trajectories.shape[0] / Threads),
                int(zImage.shape[0] * 4 / Threads),
            ),
            block=(Threads, Threads, 1),
        )
    else:
        # Default method: TrajectoPixel
        TrajectoryCU(
            TrajectoriesCU,
            IdLastCU,
            numpy.float32(Mass),
            numpy.float32(InternalRadius),
            numpy.float32(ExternalRadius),
            numpy.float32(Angle),
            numpy.int32(Line),
            grid=(int(Trajectories.shape[0] / Threads), 1),
            block=(Threads, 1, 1),
        )

        PixelCU(
            zImageCU,
            fImageCU,
            TrajectoriesCU,
            IdLastCU,
            numpy.uint32(Trajectories.shape[0]),
            numpy.float32(Mass),
            numpy.float32(InternalRadius),
            numpy.float32(ExternalRadius),
            numpy.float32(Angle),
            numpy.int32(Line),
            grid=(int(zImage.shape[0] / Threads), int(zImage.shape[1] / Threads), 1),
            block=(Threads, Threads, 1),
        )

    Context.synchronize()

    compute = time.time() - start_time

    cuda.memcpy_dtoh(zImage, zImageCU)
    cuda.memcpy_dtoh(fImage, fImageCU)
    if Method == "TrajectoPixel" or Method == "TrajectoCircle":
        cuda.memcpy_dtoh(Trajectories, TrajectoriesCU)
    elapsed = time.time() - start_time
    print("\nCompute Time : %f" % compute)
    print("Elapsed Time : %f\n" % elapsed)

    zMaxPosition = numpy.where(zImage[:, :] == zImage.max())
    fMaxPosition = numpy.where(fImage[:, :] == fImage.max())
    print(
        "Z max @(%f,%f) : %f"
        % (
            (
                1.0 * zMaxPosition[1][0] / zImage.shape[1] - 0.5,
                1.0 * zMaxPosition[0][0] / zImage.shape[0] - 0.5,
                zImage.max(),
            )
        )
    )
    print(
        "Flux max @(%f,%f) : %f"
        % (
            (
                1.0 * fMaxPosition[1][0] / fImage.shape[1] - 0.5,
                1.0 * fMaxPosition[0][0] / fImage.shape[0] - 0.5,
                fImage.max(),
            )
        )
    )

    Context.pop()

    Context.detach()

    if Method == "TrajectoPixel" or Method == "TrajectoCircle":
        if not NoImage:
            AngleStep = 4 * numpy.pi / TrackPoints
            Angles = numpy.arange(0.0, 4 * numpy.pi, AngleStep)
            Angles.shape = (1, TrackPoints)

            # numpy.savetxt("TrouNoirTrajectories_%s.csv" % ImageInfo,
            #               numpy.transpose(numpy.concatenate((Angles,Trajectories),axis=0)),
            #               delimiter=' ', fmt='%.2e')
            numpy.savetxt(
                "TrouNoirTrajectories.csv",
                numpy.transpose(numpy.concatenate((Angles, Trajectories), axis=0)),
                delimiter=" ",
                fmt="%.2e",
            )

    return elapsed


if __name__ == "__main__":

    # Default device: first one!
    Device = 0
    # Default implementation: OpenCL, most versatile!
    GpuStyle = "OpenCL"
    Mass = 1.0
    # Internal Radius 3 times de Schwarzschild Radius
    InternalRadius = 6.0 * Mass
    #
    ExternalRadius = 12.0
    #
    # Angle with normal to disc 10 degrees
    Angle = numpy.pi / 180.0 * (90.0 - 10.0)
    # Radiation of disc : BlackBody or Monochromatic
    BlackBody = False
    # Size of image
    Size = 1024
    # Variable Type
    VariableType = "FP32"
    # ?
    q = -2
    # Method of resolution
    Method = "TrajectoPixel"
    # Colors for output image
    Colors = "Greyscale"
    # Physics
    Physics = "Einstein"
    # No output as image
    NoImage = False
    # Threads in CUDA
    Threads = 32
    # Trackpoints of trajectories
    TrackPoints = 2048
    # Tracksave of trajectories
    TrackSave = False

    HowToUse = "%s -h [Help] -b [BlackBodyEmission] -j [TrackSave] -n [NoImage] -p <Einstein/Newton> -s <SizeInPixels> -m <Mass> -i <DiscInternalRadius> -x <DiscExternalRadius> -a <AngleAboveDisc> -d <DeviceId> -c <Greyscale/Red2Yellow> -g <CUDA/OpenCL> -o <EachPixel/TrajectoCircle/TrajectoPixel/EachCircle/Original> -t <ThreadsInCuda> -v <FP32/FP64> -k <TrackPoints>"  # noqa: E501

    try:
        opts, args = getopt.getopt(
            sys.argv[1:],
            "hbnjs:m:i:x:a:d:g:v:o:t:c:p:k:",
            [
                "tracksave",
                "blackbody",
                "noimage",
                "camera",
                "size=",
                "mass=",
                "internal=",
                "external=",
                "angle=",
                "device=",
                "gpustyle=",
                "variabletype=",
                "method=",
                "threads=",
                "colors=",
                "physics=",
                "trackpoints=",
            ],
        )
    except getopt.GetoptError:
        print(HowToUse % sys.argv[0])
        sys.exit(2)

    # List of Devices
    Devices = []
    Alu = {}

    for opt, arg in opts:
        if opt == "-h":
            print(HowToUse % sys.argv[0])

            print("\nInformations about devices detected under OpenCL API:")
            # For PyOpenCL import
            try:
                Id = 0
                for platform in cl.get_platforms():
                    for device in platform.get_devices():
                        # deviceType=cl.device_type.to_string(device.type)
                        deviceType = "xPU"
                        print(
                            "Device #%i from %s of type %s : %s"
                            % (
                                Id,
                                platform.vendor.lstrip(),
                                deviceType,
                                device.name.lstrip(),
                            )
                        )
                        Id = Id + 1

            except Exception:
                print("Your platform does not seem to support OpenCL")

            print("\nInformations about devices detected under CUDA API:")
            # For PyCUDA import
            try:
                import pycuda.driver as cuda

                cuda.init()
                for Id in range(cuda.Device.count()):
                    device = cuda.Device(Id)
                    print("Device #%i of type GPU : %s" % (Id, device.name()))
                print
            except Exception:
                print("Your platform does not seem to support CUDA")

            sys.exit()

        elif opt in ("-d", "--device"):
            #            Devices.append(int(arg))
            Device = int(arg)
        elif opt in ("-g", "--gpustyle"):
            GpuStyle = arg
        elif opt in ("-v", "--variabletype"):
            VariableType = arg
        elif opt in ("-s", "--size"):
            Size = int(arg)
        elif opt in ("-k", "--trackpoints"):
            TrackPoints = int(arg)
        elif opt in ("-m", "--mass"):
            Mass = float(arg)
        elif opt in ("-i", "--internal"):
            InternalRadius = float(arg)
        elif opt in ("-e", "--external"):
            ExternalRadius = float(arg)
        elif opt in ("-a", "--angle"):
            Angle = numpy.pi / 180.0 * (90.0 - float(arg))
        elif opt in ("-b", "--blackbody"):
            BlackBody = True
        elif opt in ("-j", "--tracksave"):
            TrackSave = True
        elif opt in ("-n", "--noimage"):
            NoImage = True
        elif opt in ("-o", "--method"):
            Method = arg
        elif opt in ("-t", "--threads"):
            Threads = int(arg)
        elif opt in ("-c", "--colors"):
            Colors = arg
        elif opt in ("-p", "--physics"):
            Physics = arg

    print("Device Identification selected : %s" % Device)
    print("GpuStyle used : %s" % GpuStyle)
    print("VariableType : %s" % VariableType)
    print("Size : %i" % Size)
    print("Mass : %f" % Mass)
    print("Internal Radius : %f" % InternalRadius)
    print("External Radius : %f" % ExternalRadius)
    print("Angle with normal of (in radians) : %f" % Angle)
    print("Black Body Disc Emission (monochromatic instead) : %s" % BlackBody)
    print("Method of resolution : %s" % Method)
    print("Colors for output images : %s" % Colors)
    print("Physics used for Trajectories : %s" % Physics)
    print("Trackpoints of Trajectories : %i" % TrackPoints)
    print("Tracksave of Trajectories : %i" % TrackSave)

    if GpuStyle == "CUDA":
        print("\nSelection of CUDA device")
        try:
            # For PyCUDA import
            import pycuda.driver as cuda

            cuda.init()
            for Id in range(cuda.Device.count()):
                device = cuda.Device(Id)
                print("Device #%i of type GPU : %s" % (Id, device.name()))
                if Id in Devices:
                    Alu[Id] = "GPU"

        except ImportError:
            print("Platform does not seem to support CUDA")

    if GpuStyle == "OpenCL":
        print("\nSelection of OpenCL device")
        try:
            # For PyOpenCL import
            import pyopencl as cl

            Id = 0
            for platform in cl.get_platforms():
                for device in platform.get_devices():
                    # deviceType=cl.device_type.to_string(device.type)
                    deviceType = "xPU"
                    print(
                        "Device #%i from %s of type %s : %s"
                        % (
                            Id,
                            platform.vendor.lstrip().rstrip(),
                            deviceType,
                            device.name.lstrip().rstrip(),
                        )
                    )

                    if Id in Devices:
                        # Set the Alu as detected Device Type
                        Alu[Id] = deviceType
                    Id = Id + 1
        except ImportError:
            print("Platform does not seem to support OpenCL")

    zImage = numpy.zeros((Size, Size), dtype=numpy.float32)
    fImage = numpy.zeros((Size, Size), dtype=numpy.float32)

    InputCL = {}
    InputCL["Device"] = Device
    InputCL["GpuStyle"] = GpuStyle
    InputCL["VariableType"] = VariableType
    InputCL["Size"] = Size
    InputCL["Mass"] = Mass
    InputCL["InternalRadius"] = InternalRadius
    InputCL["ExternalRadius"] = ExternalRadius
    InputCL["Angle"] = Angle
    InputCL["BlackBody"] = BlackBody
    InputCL["Method"] = Method
    InputCL["TrackPoints"] = TrackPoints
    InputCL["Physics"] = Physics
    InputCL["Threads"] = Threads
    InputCL["NoImage"] = NoImage
    InputCL["TrackSave"] = TrackSave

    if GpuStyle == "OpenCL":
        duration = BlackHoleCL(zImage, fImage, InputCL)
    else:
        duration = BlackHoleCUDA(zImage, fImage, InputCL)

    Hostname = gethostname()
    Date = time.strftime("%Y%m%d_%H%M%S")
    ImageInfo = "%s_Device%i_%s_%s" % (Method, Device, Hostname, Date)

    if not NoImage:
        ImageOutput(zImage, "TrouNoirZ_%s" % ImageInfo, Colors)
        ImageOutput(fImage, "TrouNoirF_%s" % ImageInfo, Colors)