#!/usr/bin/env python3
#
# TrouNoir model using PyOpenCL or PyCUDA
#
# CC BY-NC-SA 2019 : <emmanuel.quemener@ens-lyon.fr>
#
# Part of matrix programs from: https://forge.cbp.ens-lyon.fr/svn/bench4gpu/
#
# Thanks to Andreas Klockner for PyOpenCL and PyCUDA:
# http://mathema.tician.de/software/pyopencl
#
# Original code programmed in Fortran 77 in mars 1994
# for Practical Work of Numerical Simulation
# DEA (old Master2) in astrophysics and spatial techniques in Meudon
# by Herve Aussel & Emmanuel Quemener
#
# Conversion in C done by Emmanuel Quemener in august 1997
# GPUfication in OpenCL under Python in july 2019
# GPUfication in CUDA under Python in august 2019
#
# Thanks to :
#
# - Herve Aussel for his part of code of black body spectrum
# - Didier Pelat for his help to perform this work
# - Jean-Pierre Luminet for his article published in 1979
# - Numerical Recipies for Runge Kutta recipies
# - Luc Blanchet for his disponibility about my questions in General Relativity
# - Pierre Lena for his passion about science and vulgarisation

# If crash on OpenCL Intel implementation, add following options and force
#export PYOPENCL_COMPILER_OUTPUT=1
#export CL_CONFIG_USE_VECTORIZER=True
#export CL_CONFIG_CPU_VECTORIZER_MODE=16

import pyopencl as cl
import numpy
import time,string
from numpy.random import randint as nprnd
import sys
import getopt
from socket import gethostname

def DictionariesAPI():
    PhysicsList={'Einstein':0,'Newton':1}
    return(PhysicsList)

#
# Blank space below to simplify debugging on OpenCL code
#






















































BlobOpenCL= """

#define PI (float)3.14159265359e0f
#define nbr 256

#define EINSTEIN 0
#define NEWTON 1

#ifdef SETTRACKPOINTS
#define TRACKPOINTS SETTRACKPOINTS
#else
#define TRACKPOINTS 2048
#endif

float atanp(float x,float y)
{
  float angle;

  angle=atan2(y,x);

  if (angle<0.e0f)
    {
      angle+=(float)2.e0f*PI;
    }

  return angle;
}

float f(float v)
{
  return v;
}

#if PHYSICS == NEWTON
float g(float u,float m,float b)
{
  return (-u);
}
#else
float g(float u,float m,float b)
{
  return (3.e0f*m/b*pow(u,2)-u);
}
#endif

void calcul(float *us,float *vs,float up,float vp,
            float h,float m,float b)
{
  float c0,c1,c2,c3,d0,d1,d2,d3;

  c0=h*f(vp);
  c1=h*f(vp+c0/2.e0f);
  c2=h*f(vp+c1/2.e0f);
  c3=h*f(vp+c2);
  d0=h*g(up,m,b);
  d1=h*g(up+d0/2.e0f,m,b);
  d2=h*g(up+d1/2.e0f,m,b);
  d3=h*g(up+d2,m,b);

  *us=up+(c0+2.e0f*c1+2.e0f*c2+c3)/6.e0f;
  *vs=vp+(d0+2.e0f*d1+2.e0f*d2+d3)/6.e0f;
}

void rungekutta(float *ps,float *us,float *vs,
                float pp,float up,float vp,
                float h,float m,float b)
{
  calcul(us,vs,up,vp,h,m,b);
  *ps=pp+h;
}

float decalage_spectral(float r,float b,float phi,
                        float tho,float m)
{
  return (sqrt(1-3*m/r)/(1+sqrt(m/pow(r,3))*b*sin(tho)*sin(phi)));
}

float spectre(float rf,int q,float b,float db,
              float h,float r,float m,float bss)
{
  float flx;

//  flx=exp(q*log(r/m))*pow(rf,4)*b*db*h;
  flx=exp(q*log(r/m)+4.e0f*log(rf))*b*db*h;
  return(flx);
}

float spectre_cn(float rf32,float b32,float db32,
                 float h32,float r32,float m32,float bss32)
{

#define MYFLOAT float

  MYFLOAT rf=(MYFLOAT)(rf32);
  MYFLOAT b=(MYFLOAT)(b32);
  MYFLOAT db=(MYFLOAT)(db32);
  MYFLOAT h=(MYFLOAT)(h32);
  MYFLOAT r=(MYFLOAT)(r32);
  MYFLOAT m=(MYFLOAT)(m32);
  MYFLOAT bss=(MYFLOAT)(bss32);

  MYFLOAT flx;
  MYFLOAT nu_rec,nu_em,qu,temp_em,flux_int;
  int fi,posfreq;

#define planck 6.62e-34f
#define k 1.38e-23f
#define c2 9.e16f
#define temp 3.e7f
#define m_point 1.e0f

#define lplanck (log(6.62e0f)-34.e0f*log(10.e0f))
#define lk (log(1.38e0f)-23.e0f*log(10.e0f))
#define lc2 (log(9.e0f)+16.e0f*log(10.e0f))

  MYFLOAT v=1.e0f-3.e0f/r;

  qu=1.e0f/sqrt((1.e0f-3.e0f/r)*r)*(sqrt(r)-sqrt(6.e0f)+sqrt(3.e0f)/2.e0f*log((sqrt(r)+sqrt(3.e0f))/(sqrt(r)-sqrt(3.e0f))* 0.17157287525380988e0f ));

  temp_em=temp*sqrt(m)*exp(0.25e0f*log(m_point)-0.75e0f*log(r)-0.125e0f*log(v)+0.25e0f*log(fabs(qu)));

  flux_int=0.e0f;
  flx=0.e0f;

  for (fi=0;fi<nbr;fi++)
    {
      nu_em=bss*(MYFLOAT)fi/(MYFLOAT)nbr;
      nu_rec=nu_em*rf;
      posfreq=(int)(nu_rec*(MYFLOAT)nbr/bss);
      if ((posfreq>0)&&(posfreq<nbr))
        {
          // Initial version
          // flux_int=2.*planck/c2*pow(nu_em,3)/(exp(planck*nu_em/(k*temp_em))-1.);
          // Version with log used
          //flux_int=2.*exp(lplanck-lc2+3.*log(nu_em))/(exp(exp(lplanck-lk+log(nu_em/temp_em)))-1.);
          // flux_int*=pow(rf,3)*b*db*h;
          //flux_int*=exp(3.e0f*log(rf))*b*db*h;
          flux_int=2.e0f*exp(lplanck-lc2+3.e0f*log(nu_em))/(exp(exp(lplanck-lk+log(nu_em/temp_em)))-1.e0f)*exp(3.e0f*log(rf))*b*db*h;

          flx+=flux_int;
        }
    }

  return((float)(flx));
}

void impact(float phi,float r,float b,float tho,float m,
            float *zp,float *fp,
            int q,float db,
            float h,int raie)
{
  float flx,rf,bss;

  rf=decalage_spectral(r,b,phi,tho,m);

  if (raie==0)
    {
      bss=1.e19f;
      flx=spectre_cn(rf,b,db,h,r,m,bss);
    }
  else
    {
      bss=2.e0f;
      flx=spectre(rf,q,b,db,h,r,m,bss);
    }

  *zp=1.e0f/rf;
  *fp=flx;

}

__kernel void EachPixel(__global float *zImage,__global float *fImage,
                        float Mass,float InternalRadius,
                        float ExternalRadius,float Angle,
                        int Line)
{
   uint xi=(uint)get_global_id(0);
   uint yi=(uint)get_global_id(1);
   uint sizex=(uint)get_global_size(0);
   uint sizey=(uint)get_global_size(1);

   // Perform trajectory for each pixel, exit on hit

  float m,rs,ri,re,tho;
  int q,raie;

  m=Mass;
  rs=2.e0f*m;
  ri=InternalRadius;
  re=ExternalRadius;
  tho=Angle;
  q=-2;
  raie=Line;

  float bmx,db,b,h;
  float rp0,rps;
  float phi,phd;
  uint nh=0;
  float zp=0.e0f,fp=0.e0f;

  // Autosize for image
  bmx=1.25e0f*re;

  h=4.e0f*PI/(float)TRACKPOINTS;

  // set origin as center of image
  float x=(float)xi-(float)(sizex/2)+(float)5.e-1f;
  float y=(float)yi-(float)(sizey/2)+(float)5.e-1f;
  // angle extracted from cylindric symmetry
  phi=atanp(x,y);
  phd=atanp(cos(phi)*sin(tho),cos(tho));


  float up,vp,pp,us,vs,ps;

  // impact parameter
  b=sqrt(x*x+y*y)*(float)2.e0f/(float)sizex*bmx;
  // step of impact parameter;
  db=bmx/(float)(sizex);

  up=0.e0f;
  vp=1.e0f;
  pp=0.e0f;

  rungekutta(&ps,&us,&vs,pp,up,vp,h,m,b);

  rps=fabs(b/us);
  rp0=rps;

  int ExitOnImpact=0;

  do
  {
     nh++;
     pp=ps;
     up=us;
     vp=vs;
     rungekutta(&ps,&us,&vs,pp,up,vp,h,m,b);
     rps=fabs(b/us);
     ExitOnImpact = ((fmod(pp,PI)<fmod(phd,PI))&&(fmod(ps,PI)>fmod(phd,PI)))&&(rps>=ri)&&(rps<=re)?1:0;       

  } while ((rps>=rs)&&(rps<=rp0)&&(ExitOnImpact==0)&&(nh<TRACKPOINTS));


  if (ExitOnImpact==1) {
     impact(phi,rps,b,tho,m,&zp,&fp,q,db,h,raie);
  }
  else
  {
     zp=0.e0f;
     fp=0.e0f;
  }

  barrier(CLK_GLOBAL_MEM_FENCE);

  zImage[yi+sizex*xi]=(float)zp;
  fImage[yi+sizex*xi]=(float)fp;
}

__kernel void Pixel(__global float *zImage,__global float *fImage,
                    __global float *Trajectories,__global int *IdLast,
                    uint ImpactParameter,
                    float Mass,float InternalRadius,
                    float ExternalRadius,float Angle,
                    int Line)
{
   uint xi=(uint)get_global_id(0);
   uint yi=(uint)get_global_id(1);
   uint sizex=(uint)get_global_size(0);
   uint sizey=(uint)get_global_size(1);

   // Perform trajectory for each pixel

  float m,ri,re,tho;
  int q,raie;

  m=Mass;
  ri=InternalRadius;
  re=ExternalRadius;
  tho=Angle;
  q=-2;
  raie=Line;

  float bmx,db,b,h;
  float phi,phd,php,nr,r;
  float zp=0.e0f,fp=0.e0f;

  // Autosize for image, 25% greater than external radius
  bmx=1.25e0f*re;

  // Angular step of integration
  h=4.e0f*PI/(float)TRACKPOINTS;

  // Step of Impact Parameter
  db=bmx/(2.e0f*(float)ImpactParameter);

  // set origin as center of image
  float x=(float)xi-(float)(sizex/2)+(float)5.e-1f;
  float y=(float)yi-(float)(sizey/2)+(float)5.e-1f;

  // angle extracted from cylindric symmetry
  phi=atanp(x,y);
  phd=atanp(cos(phi)*sin(tho),cos(tho));

  // Real Impact Parameter
  b=sqrt(x*x+y*y)*bmx/(float)ImpactParameter;

  // Integer Impact Parameter
  uint bi=(uint)sqrt(x*x+y*y);

  int HalfLap=0,ExitOnImpact=0,ni;

  if (bi<ImpactParameter)
  {
    do
    {
      php=phd+(float)HalfLap*PI;
      nr=php/h;
      ni=(int)nr;

      if (ni<IdLast[bi])
      {
        r=(Trajectories[bi*TRACKPOINTS+ni+1]-Trajectories[bi*TRACKPOINTS+ni])*(nr-ni*1.e0f)+Trajectories[bi*TRACKPOINTS+ni];
      }
      else
      {
        r=Trajectories[bi*TRACKPOINTS+ni];
      }

      if ((r<=re)&&(r>=ri))
      {
        ExitOnImpact=1;
        impact(phi,r,b,tho,m,&zp,&fp,q,db,h,raie);
      }

      HalfLap++;
    } while ((HalfLap<=2)&&(ExitOnImpact==0));

  }

  barrier(CLK_GLOBAL_MEM_FENCE);

  zImage[yi+sizex*xi]=zp;
  fImage[yi+sizex*xi]=fp;
}

__kernel void Circle(__global float *Trajectories,__global int *IdLast,
                     __global float *zImage,__global float *fImage,
                     float Mass,float InternalRadius,
                     float ExternalRadius,float Angle,
                     int Line)
{
   // Integer Impact Parameter ID
   int bi=get_global_id(0);
   // Integer points on circle
   int i=get_global_id(1);
   // Integer Impact Parameter Size (half of image)
   int bmaxi=get_global_size(0);
   // Integer Points on circle
   int imx=get_global_size(1);

   // Perform trajectory for each pixel

  float m,ri,re,tho;
  int q,raie;

  m=Mass;
  ri=InternalRadius;
  re=ExternalRadius;
  tho=Angle;
  raie=Line;

  float bmx,db,b,h;
  float phi,phd;
  float zp=0.e0f,fp=0.e0f;

  // Autosize for image
  bmx=1.25e0f*re;

  // Angular step of integration
  h=4.e0f*PI/(float)TRACKPOINTS;

  // impact parameter
  b=(float)bi/(float)bmaxi*bmx;
  db=bmx/(2.e0f*(float)bmaxi);

  phi=2.e0f*PI/(float)imx*(float)i;
  phd=atanp(cos(phi)*sin(tho),cos(tho));
  int yi=(int)((float)bi*sin(phi))+bmaxi;
  int xi=(int)((float)bi*cos(phi))+bmaxi;

  int HalfLap=0,ExitOnImpact=0,ni;
  float php,nr,r;

  do
  {
     php=phd+(float)HalfLap*PI;
     nr=php/h;
     ni=(int)nr;

     if (ni<IdLast[bi])
     {
        r=(Trajectories[bi*TRACKPOINTS+ni+1]-Trajectories[bi*TRACKPOINTS+ni])*(nr-ni*1.e0f)+Trajectories[bi*TRACKPOINTS+ni];
     }
     else
     {
        r=Trajectories[bi*TRACKPOINTS+ni];
     }

     if ((r<=re)&&(r>=ri))
     {
        ExitOnImpact=1;
        impact(phi,r,b,tho,m,&zp,&fp,q,db,h,raie);
     }

     HalfLap++;
  } while ((HalfLap<=2)&&(ExitOnImpact==0));

  zImage[yi+2*bmaxi*xi]=zp;
  fImage[yi+2*bmaxi*xi]=fp;

  barrier(CLK_GLOBAL_MEM_FENCE);

}

__kernel void Trajectory(__global float *Trajectories,__global int *IdLast,
                         float Mass,float InternalRadius,
                         float ExternalRadius,float Angle,
                         int Line)
{
  // Integer Impact Parameter ID
  int bi=get_global_id(0);
  // Integer Impact Parameter Size (half of image)
  int bmaxi=get_global_size(0);

  // Perform trajectory for each pixel

  float m,rs,re;

  m=Mass;
  rs=2.e0f*m;
  re=ExternalRadius;

  float bmx,b,h;
  int nh;

  // Autosize for image
  bmx=1.25e0f*re;

  // Angular step of integration
  h=4.e0f*PI/(float)TRACKPOINTS;

  // impact parameter
  b=(float)bi/(float)bmaxi*bmx;

  float up,vp,pp,us,vs,ps;

  up=0.e0f;
  vp=1.e0f;

  pp=0.e0f;
  nh=0;

  rungekutta(&ps,&us,&vs,pp,up,vp,h,m,b);

  // b versus us
  float bvus=fabs(b/us);
  float bvus0=bvus;
  Trajectories[bi*TRACKPOINTS+nh]=bvus;

  do
  {
     nh++;
     pp=ps;
     up=us;
     vp=vs;
     rungekutta(&ps,&us,&vs,pp,up,vp,h,m,b);
     bvus=fabs(b/us);
     Trajectories[bi*TRACKPOINTS+nh]=bvus;

  } while ((bvus>=rs)&&(bvus<=bvus0));

  IdLast[bi]=nh;

  barrier(CLK_GLOBAL_MEM_FENCE);

}

__kernel void EachCircle(__global float *zImage,__global float *fImage,
                         float Mass,float InternalRadius,
                         float ExternalRadius,float Angle,
                         int Line)
{
   // Integer Impact Parameter ID
   uint bi=(uint)get_global_id(0);
   // Integer Impact Parameter Size (half of image)
   uint bmaxi=(uint)get_global_size(0);

  private float Trajectory[TRACKPOINTS];

  float m,rs,ri,re,tho;
  int raie,q;

  m=Mass;
  rs=2.e0f*m;
  ri=InternalRadius;
  re=ExternalRadius;
  tho=Angle;
  q=-2;
  raie=Line;

  float bmx,db,b,h;
  uint nh;


  // Autosize for image
  bmx=1.25e0f*re;

  // Angular step of integration
  h=4.e0f*PI/(float)TRACKPOINTS;

  // impact parameter
  b=(float)bi/(float)bmaxi*bmx;
  db=bmx/(2.e0f*(float)bmaxi);

  float up,vp,pp,us,vs,ps;

  up=0.e0f;
  vp=1.e0f;

  pp=0.e0f;
  nh=0;

  rungekutta(&ps,&us,&vs,pp,up,vp,h,m,b);

  // b versus us
  float bvus=fabs(b/us);
  float bvus0=bvus;
  Trajectory[nh]=bvus;

  do
  {
     nh++;
     pp=ps;
     up=us;
     vp=vs;
     rungekutta(&ps,&us,&vs,pp,up,vp,h,m,b);
     bvus=(float)fabs(b/us);
     Trajectory[nh]=bvus;

  } while ((bvus>=rs)&&(bvus<=bvus0));


  for (uint i=(uint)nh+1;i<TRACKPOINTS;i++) {
     Trajectory[i]=0.e0f;
  }


  uint imx=(uint)(16*bi);

  for (uint i=0;i<imx;i++)
  {
     float zp=0.e0f,fp=0.e0f;
     float phi=2.e0f*PI/(float)imx*(float)i;
     float phd=atanp(cos(phi)*sin(tho),cos(tho));
     uint yi=(uint)((float)bi*sin(phi)+bmaxi);
     uint xi=(uint)((float)bi*cos(phi)+bmaxi);

     uint HalfLap=0,ExitOnImpact=0,ni;
     float php,nr,r;

     do
     {
        php=phd+(float)HalfLap*PI;
        nr=php/h;
        ni=(int)nr;

        if (ni<nh)
        {
           r=(Trajectory[ni+1]-Trajectory[ni])*(nr-ni*1.e0f)+Trajectory[ni];
        }
        else
        {
           r=Trajectory[ni];
        }

        if ((r<=re)&&(r>=ri))
        {
           ExitOnImpact=1;
           impact(phi,r,b,tho,m,&zp,&fp,q,db,h,raie);
        }

        HalfLap++;

     } while ((HalfLap<=2)&&(ExitOnImpact==0));

     zImage[yi+2*bmaxi*xi]=zp;
     fImage[yi+2*bmaxi*xi]=fp;

  }

  barrier(CLK_GLOBAL_MEM_FENCE);

}

__kernel void Original(__global float *zImage,__global float *fImage,
                       uint Size,float Mass,float InternalRadius,
                       float ExternalRadius,float Angle,
                       int Line)
{
   // Integer Impact Parameter Size (half of image)
   uint bmaxi=(uint)Size;

   float Trajectory[TRACKPOINTS];

   // Perform trajectory for each pixel

   float m,rs,ri,re,tho;
   int raie,q;

   m=Mass;
   rs=2.e0f*m;
   ri=InternalRadius;
   re=ExternalRadius;
   tho=Angle;
   q=-2;
   raie=Line;

   float bmx,db,b,h;
   uint nh;

   // Autosize for image
   bmx=1.25e0f*re;

   // Angular step of integration
   h=4.e0f*PI/(float)TRACKPOINTS;

   // Integer Impact Parameter ID
   for (int bi=0;bi<bmaxi;bi++)
   {
      // impact parameter
      b=(float)bi/(float)bmaxi*bmx;
      db=bmx/(2.e0f*(float)bmaxi);

      float up,vp,pp,us,vs,ps;

      up=0.e0f;
      vp=1.e0f;

      pp=0.e0f;
      nh=0;

      rungekutta(&ps,&us,&vs,pp,up,vp,h,m,b);

      // b versus us
      float bvus=fabs(b/us);
      float bvus0=bvus;
      Trajectory[nh]=bvus;

      do
      {
         nh++;
         pp=ps;
         up=us;
         vp=vs;
         rungekutta(&ps,&us,&vs,pp,up,vp,h,m,b);
         bvus=fabs(b/us);
         Trajectory[nh]=bvus;

      } while ((bvus>=rs)&&(bvus<=bvus0));

      for (uint i=(uint)nh+1;i<TRACKPOINTS;i++) {
         Trajectory[i]=0.e0f;
      }

      int imx=(int)(16*bi);

      for (int i=0;i<imx;i++)
      {
         float zp=0.e0f,fp=0.e0f;
         float phi=2.e0f*PI/(float)imx*(float)i;
         float phd=atanp(cos(phi)*sin(tho),cos(tho));
         uint yi=(uint)((float)bi*sin(phi)+bmaxi);
         uint xi=(uint)((float)bi*cos(phi)+bmaxi);

         uint HalfLap=0,ExitOnImpact=0,ni;
         float php,nr,r;

         do
         {
            php=phd+(float)HalfLap*PI;
            nr=php/h;
            ni=(int)nr;

            if (ni<nh)
            {
               r=(Trajectory[ni+1]-Trajectory[ni])*(nr-ni*1.e0f)+Trajectory[ni];
            }
            else
            {
               r=Trajectory[ni];
            }

            if ((r<=re)&&(r>=ri))
            {
               ExitOnImpact=1;
               impact(phi,r,b,tho,m,&zp,&fp,q,db,h,raie);
            }

            HalfLap++;

         } while ((HalfLap<=2)&&(ExitOnImpact==0));

         zImage[yi+2*bmaxi*xi]=zp;
         fImage[yi+2*bmaxi*xi]=fp;

      }

   }

   barrier(CLK_GLOBAL_MEM_FENCE);

}
"""








































































def KernelCodeCuda():
    BlobCUDA= """

#define PI (float)3.14159265359
#define nbr 256

#define EINSTEIN 0
#define NEWTON 1

#ifdef SETTRACKPOINTS
#define TRACKPOINTS SETTRACKPOINTS
#else
#define TRACKPOINTS
#endif
__device__ float nothing(float x)
{
  return(x);
}

__device__ float atanp(float x,float y)
{
  float angle;

  angle=atan2(y,x);

  if (angle<0.e0f)
    {
      angle+=(float)2.e0f*PI;
    }

  return(angle);
}

__device__ float f(float v)
{
  return(v);
}

#if PHYSICS == NEWTON
__device__ float g(float u,float m,float b)
{
  return (-u);
}
#else
__device__ float g(float u,float m,float b)
{
  return (3.e0f*m/b*pow(u,2)-u);
}
#endif

__device__ void calcul(float *us,float *vs,float up,float vp,
                       float h,float m,float b)
{
  float c0,c1,c2,c3,d0,d1,d2,d3;

  c0=h*f(vp);
  c1=h*f(vp+c0/2.);
  c2=h*f(vp+c1/2.);
  c3=h*f(vp+c2);
  d0=h*g(up,m,b);
  d1=h*g(up+d0/2.,m,b);
  d2=h*g(up+d1/2.,m,b);
  d3=h*g(up+d2,m,b);

  *us=up+(c0+2.*c1+2.*c2+c3)/6.;
  *vs=vp+(d0+2.*d1+2.*d2+d3)/6.;
}

__device__ void rungekutta(float *ps,float *us,float *vs,
                           float pp,float up,float vp,
                           float h,float m,float b)
{
  calcul(us,vs,up,vp,h,m,b);
  *ps=pp+h;
}

__device__ float decalage_spectral(float r,float b,float phi,
                                   float tho,float m)
{
  return (sqrt(1-3*m/r)/(1+sqrt(m/pow(r,3))*b*sin(tho)*sin(phi)));
}

__device__ float spectre(float rf,int q,float b,float db,
                         float h,float r,float m,float bss)
{
  float flx;

//  flx=exp(q*log(r/m))*pow(rf,4)*b*db*h;
  flx=exp(q*log(r/m)+4.*log(rf))*b*db*h;
  return(flx);
}

__device__ float spectre_cn(float rf32,float b32,float db32,
                            float h32,float r32,float m32,float bss32)
{

#define MYFLOAT float

  MYFLOAT rf=(MYFLOAT)(rf32);
  MYFLOAT b=(MYFLOAT)(b32);
  MYFLOAT db=(MYFLOAT)(db32);
  MYFLOAT h=(MYFLOAT)(h32);
  MYFLOAT r=(MYFLOAT)(r32);
  MYFLOAT m=(MYFLOAT)(m32);
  MYFLOAT bss=(MYFLOAT)(bss32);

  MYFLOAT flx;
  MYFLOAT nu_rec,nu_em,qu,temp_em,flux_int;
  int fi,posfreq;

#define planck 6.62e-34
#define k 1.38e-23
#define c2 9.e16
#define temp 3.e7
#define m_point 1.

#define lplanck (log(6.62)-34.*log(10.))
#define lk (log(1.38)-23.*log(10.))
#define lc2 (log(9.)+16.*log(10.))

  MYFLOAT v=1.-3./r;

  qu=1./sqrt((1.-3./r)*r)*(sqrt(r)-sqrt(6.)+sqrt(3.)/2.*log((sqrt(r)+sqrt(3.))/(sqrt(r)-sqrt(3.))* 0.17157287525380988 ));

  temp_em=temp*sqrt(m)*exp(0.25*log(m_point)-0.75*log(r)-0.125*log(v)+0.25*log(fabs(qu)));

  flux_int=0.;
  flx=0.;

  for (fi=0;fi<nbr;fi++)
    {
      nu_em=bss*(MYFLOAT)fi/(MYFLOAT)nbr;
      nu_rec=nu_em*rf;
      posfreq=(int)(nu_rec*(MYFLOAT)nbr/bss);
      if ((posfreq>0)&&(posfreq<nbr))
        {
          // Initial version
          // flux_int=2.*planck/c2*pow(nu_em,3)/(exp(planck*nu_em/(k*temp_em))-1.);
          // Version with log used
          //flux_int=2.*exp(lplanck-lc2+3.*log(nu_em))/(exp(exp(lplanck-lk+log(nu_em/temp_em)))-1.);
          // flux_int*=pow(rf,3)*b*db*h;
          //flux_int*=exp(3.*log(rf))*b*db*h;
          flux_int=2.*exp(lplanck-lc2+3.*log(nu_em))/(exp(exp(lplanck-lk+log(nu_em/temp_em)))-1.)*exp(3.*log(rf))*b*db*h;

          flx+=flux_int;
        }
    }

  return((float)(flx));
}

__device__ void impact(float phi,float r,float b,float tho,float m,
                       float *zp,float *fp,
                       int q,float db,
                       float h,int raie)
{
  float flx,rf,bss;

  rf=decalage_spectral(r,b,phi,tho,m);

  if (raie==0)
    {
      bss=1.e19;
      flx=spectre_cn(rf,b,db,h,r,m,bss);
    }
  else
    {
      bss=2.;
      flx=spectre(rf,q,b,db,h,r,m,bss);
    }

  *zp=1./rf;
  *fp=flx;

}

__global__ void EachPixel(float *zImage,float *fImage,
                          float Mass,float InternalRadius,
                          float ExternalRadius,float Angle,
                          int Line)
{
   uint xi=(uint)(blockIdx.x*blockDim.x+threadIdx.x);
   uint yi=(uint)(blockIdx.y*blockDim.y+threadIdx.y);
   uint sizex=(uint)gridDim.x*blockDim.x;
   uint sizey=(uint)gridDim.y*blockDim.y;


   // Perform trajectory for each pixel, exit on hit

  float m,rs,ri,re,tho;
  int q,raie;

  m=Mass;
  rs=2.*m;
  ri=InternalRadius;
  re=ExternalRadius;
  tho=Angle;
  q=-2;
  raie=Line;

  float bmx,db,b,h;
  float rp0,rpp,rps;
  float phi,phd;
  int nh;
  float zp,fp;

  // Autosize for image
  bmx=1.25*re;
  b=0.;

  h=4.e0f*PI/(float)TRACKPOINTS;

  // set origin as center of image
  float x=(float)xi-(float)(sizex/2)+(float)5e-1f;
  float y=(float)yi-(float)(sizey/2)+(float)5e-1f;
  // angle extracted from cylindric symmetry
  phi=atanp(x,y);
  phd=atanp(cos(phi)*sin(tho),cos(tho));

  float up,vp,pp,us,vs,ps;

  // impact parameter
  b=sqrt(x*x+y*y)*(float)2.e0f/(float)sizex*bmx;
  // step of impact parameter;
//  db=bmx/(float)(sizex/2);
  db=bmx/(float)(sizex);

  up=0.;
  vp=1.;
  pp=0.;
  nh=0;

  rungekutta(&ps,&us,&vs,pp,up,vp,h,m,b);

  rps=fabs(b/us);
  rp0=rps;

  int ExitOnImpact=0;

  do
  {
     nh++;
     pp=ps;
     up=us;
     vp=vs;
     rungekutta(&ps,&us,&vs,pp,up,vp,h,m,b);
     rpp=rps;
     rps=fabs(b/us);
     ExitOnImpact = ((fmod(pp,PI)<fmod(phd,PI))&&(fmod(ps,PI)>fmod(phd,PI)))&&(rps>ri)&&(rps<re)?1:0;          

  } while ((rps>=rs)&&(rps<=rp0)&&(ExitOnImpact==0));

  if (ExitOnImpact==1) {
     impact(phi,rpp,b,tho,m,&zp,&fp,q,db,h,raie);
  }
  else
  {
     zp=0.e0f;
     fp=0.e0f;
  }

  __syncthreads();

  zImage[yi+sizex*xi]=(float)zp;
  fImage[yi+sizex*xi]=(float)fp;
}

__global__ void Pixel(float *zImage,float *fImage,
                      float *Trajectories,int *IdLast,
                      uint ImpactParameter,
                      float Mass,float InternalRadius,
                      float ExternalRadius,float Angle,
                      int Line)
{
   uint xi=(uint)(blockIdx.x*blockDim.x+threadIdx.x);
   uint yi=(uint)(blockIdx.y*blockDim.y+threadIdx.y);
   uint sizex=(uint)gridDim.x*blockDim.x;
   uint sizey=(uint)gridDim.y*blockDim.y;

  // Perform trajectory for each pixel

  float m,ri,re,tho;
  int q,raie;

  m=Mass;
  ri=InternalRadius;
  re=ExternalRadius;
  tho=Angle;
  q=-2;
  raie=Line;

  float bmx,db,b,h;
  float phi,phd,php,nr,r;
  float zp=0,fp=0;
  // Autosize for image, 25% greater than external radius
  bmx=1.25e0f*re;

  // Angular step of integration
  h=4.e0f*PI/(float)TRACKPOINTS;

  // Step of Impact Parameter
  db=bmx/(2.e0f*(float)ImpactParameter);

  // set origin as center of image
  float x=(float)xi-(float)(sizex/2)+(float)5e-1f;
  float y=(float)yi-(float)(sizey/2)+(float)5e-1f;
  // angle extracted from cylindric symmetry
  phi=atanp(x,y);
  phd=atanp(cos(phi)*sin(tho),cos(tho));

  // Real Impact Parameter
  b=sqrt(x*x+y*y)*bmx/(float)ImpactParameter;

  // Integer Impact Parameter
  uint bi=(uint)sqrt(x*x+y*y);

  int HalfLap=0,ExitOnImpact=0,ni;

  if (bi<ImpactParameter)
  {
    do
    {
      php=phd+(float)HalfLap*PI;
      nr=php/h;
      ni=(int)nr;

      if (ni<IdLast[bi])
      {
        r=(Trajectories[bi*TRACKPOINTS+ni+1]-Trajectories[bi*TRACKPOINTS+ni])*(nr-ni*1.e0f)+Trajectories[bi*TRACKPOINTS+ni];
      }
      else
      {
        r=Trajectories[bi*TRACKPOINTS+ni];
      }

      if ((r<=re)&&(r>=ri))
      {
        ExitOnImpact=1;
        impact(phi,r,b,tho,m,&zp,&fp,q,db,h,raie);
      }

      HalfLap++;
    } while ((HalfLap<=2)&&(ExitOnImpact==0));

  }

  zImage[yi+sizex*xi]=zp;
  fImage[yi+sizex*xi]=fp;
}

__global__ void Circle(float *Trajectories,int *IdLast,
                       float *zImage,float *fImage,
                       float Mass,float InternalRadius,
                       float ExternalRadius,float Angle,
                       int Line)
{
   // Integer Impact Parameter ID
   int bi=blockIdx.x*blockDim.x+threadIdx.x;
   // Integer points on circle
   int i=blockIdx.y*blockDim.y+threadIdx.y;
   // Integer Impact Parameter Size (half of image)
   int bmaxi=gridDim.x*blockDim.x;
   // Integer Points on circle
   int imx=gridDim.y*blockDim.y;

   // Perform trajectory for each pixel

  float m,ri,re,tho;
  int q,raie;

  m=Mass;
  ri=InternalRadius;
  re=ExternalRadius;
  tho=Angle;
  raie=Line;

  float bmx,db,b,h;
  float phi,phd;
  float zp=0,fp=0;

  // Autosize for image
  bmx=1.25e0f*re;

  // Angular step of integration
  h=4.e0f*PI/(float)TRACKPOINTS;

  // impact parameter
  b=(float)bi/(float)bmaxi*bmx;
  db=bmx/(2.e0f*(float)bmaxi);

  phi=2.e0f*PI/(float)imx*(float)i;
  phd=atanp(cos(phi)*sin(tho),cos(tho));
  int yi=(int)((float)bi*sin(phi))+bmaxi;
  int xi=(int)((float)bi*cos(phi))+bmaxi;

  int HalfLap=0,ExitOnImpact=0,ni;
  float php,nr,r;

  do
  {
     php=phd+(float)HalfLap*PI;
     nr=php/h;
     ni=(int)nr;

     if (ni<IdLast[bi])
     {
        r=(Trajectories[bi*TRACKPOINTS+ni+1]-Trajectories[bi*TRACKPOINTS+ni])*(nr-ni*1.e0f)+Trajectories[bi*TRACKPOINTS+ni];
     }
     else
     {
        r=Trajectories[bi*TRACKPOINTS+ni];
     }

     if ((r<=re)&&(r>=ri))
     {
        ExitOnImpact=1;
        impact(phi,r,b,tho,m,&zp,&fp,q,db,h,raie);
     }

     HalfLap++;
  } while ((HalfLap<=2)&&(ExitOnImpact==0));

  zImage[yi+2*bmaxi*xi]=zp;
  fImage[yi+2*bmaxi*xi]=fp;

}

__global__ void Trajectory(float *Trajectories,int *IdLast,
                           float Mass,float InternalRadius,
                           float ExternalRadius,float Angle,
                           int Line)
{
  // Integer Impact Parameter ID
  int bi=blockIdx.x*blockDim.x+threadIdx.x;
  // Integer Impact Parameter Size (half of image)
  int bmaxi=gridDim.x*blockDim.x;

  // Perform trajectory for each pixel

  float m,rs,re;

  m=Mass;
  rs=2.e0f*m;
  re=ExternalRadius;

  float bmx,b,h;
  int nh;

  // Autosize for image
  bmx=1.25e0f*re;

  // Angular step of integration
  h=4.e0f*PI/(float)TRACKPOINTS;

  // impact parameter
  b=(float)bi/(float)bmaxi*bmx;

  float up,vp,pp,us,vs,ps;

  up=0.e0f;
  vp=1.e0f;
  pp=0.e0f;
  nh=0;

  rungekutta(&ps,&us,&vs,pp,up,vp,h,m,b);

  // b versus us
  float bvus=fabs(b/us);
  float bvus0=bvus;
  Trajectories[bi*TRACKPOINTS+nh]=bvus;

  do
  {
     nh++;
     pp=ps;
     up=us;
     vp=vs;
     rungekutta(&ps,&us,&vs,pp,up,vp,h,m,b);
     bvus=fabs(b/us);
     Trajectories[bi*TRACKPOINTS+nh]=bvus;

  } while ((bvus>=rs)&&(bvus<=bvus0));

  IdLast[bi]=nh;

}

__global__ void EachCircle(float *zImage,float *fImage,
                           float Mass,float InternalRadius,
                           float ExternalRadius,float Angle,
                           int Line)
{
  // Integer Impact Parameter ID
  int bi=blockIdx.x*blockDim.x+threadIdx.x;

  // Integer Impact Parameter Size (half of image)
  int bmaxi=gridDim.x*blockDim.x;

  float Trajectory[2048];

  // Perform trajectory for each pixel

  float m,rs,ri,re,tho;
  int raie,q;

  m=Mass;
  rs=2.*m;
  ri=InternalRadius;
  re=ExternalRadius;
  tho=Angle;
  q=-2;
  raie=Line;

  float bmx,db,b,h;
  int nh;

  // Autosize for image
  bmx=1.25e0f*re;

  // Angular step of integration
  h=4.e0f*PI/(float)TRACKPOINTS;

  // impact parameter
  b=(float)bi/(float)bmaxi*bmx;
  db=bmx/(2.e0f*(float)bmaxi);

  float up,vp,pp,us,vs,ps;

  up=0.;
  vp=1.;
  pp=0.;
  nh=0;

  rungekutta(&ps,&us,&vs,pp,up,vp,h,m,b);

  // b versus us
  float bvus=fabs(b/us);
  float bvus0=bvus;
  Trajectory[nh]=bvus;

  do
  {
     nh++;
     pp=ps;
     up=us;
     vp=vs;
     rungekutta(&ps,&us,&vs,pp,up,vp,h,m,b);
     bvus=fabs(b/us);
     Trajectory[nh]=bvus;

  } while ((bvus>=rs)&&(bvus<=bvus0));

  int imx=(int)(16*bi);

  for (int i=0;i<imx;i++)
  {
     float zp=0,fp=0;
     float phi=2.*PI/(float)imx*(float)i;
     float phd=atanp(cos(phi)*sin(tho),cos(tho));
     uint yi=(uint)((float)bi*sin(phi)+bmaxi);
     uint xi=(uint)((float)bi*cos(phi)+bmaxi);

     int HalfLap=0,ExitOnImpact=0,ni;
     float php,nr,r;

     do
     {
        php=phd+(float)HalfLap*PI;
        nr=php/h;
        ni=(int)nr;

        if (ni<nh)
        {
           r=(Trajectory[ni+1]-Trajectory[ni])*(nr-ni*1.)+Trajectory[ni];
        }
        else
        {
           r=Trajectory[ni];
        }

        if ((r<=re)&&(r>=ri))
        {
           ExitOnImpact=1;
           impact(phi,r,b,tho,m,&zp,&fp,q,db,h,raie);
        }

        HalfLap++;

     } while ((HalfLap<=2)&&(ExitOnImpact==0));

   __syncthreads();

   zImage[yi+2*bmaxi*xi]=zp;
   fImage[yi+2*bmaxi*xi]=fp;

  }

}

__global__ void Original(float *zImage,float *fImage,
                         uint Size,float Mass,float InternalRadius,
                         float ExternalRadius,float Angle,
                         int Line)
{
   // Integer Impact Parameter Size (half of image)
   uint bmaxi=(uint)Size;

   float Trajectory[TRACKPOINTS];

   // Perform trajectory for each pixel

   float m,rs,ri,re,tho;
   int raie,q;

   m=Mass;
   rs=2.e0f*m;
   ri=InternalRadius;
   re=ExternalRadius;
   tho=Angle;
   q=-2;
   raie=Line;

   float bmx,db,b,h;
   int nh;

   // Autosize for image
   bmx=1.25e0f*re;

   // Angular step of integration
   h=4.e0f*PI/(float)TRACKPOINTS;

   // Integer Impact Parameter ID
   for (int bi=0;bi<bmaxi;bi++)
   {
      // impact parameter
      b=(float)bi/(float)bmaxi*bmx;
      db=bmx/(2.e0f*(float)bmaxi);

      float up,vp,pp,us,vs,ps;

      up=0.;
      vp=1.;
      pp=0.;
      nh=0;

      rungekutta(&ps,&us,&vs,pp,up,vp,h,m,b);

      // b versus us
      float bvus=fabs(b/us);
      float bvus0=bvus;
      Trajectory[nh]=bvus;

      do
      {
         nh++;
         pp=ps;
         up=us;
         vp=vs;
         rungekutta(&ps,&us,&vs,pp,up,vp,h,m,b);
         bvus=fabs(b/us);
         Trajectory[nh]=bvus;

      } while ((bvus>=rs)&&(bvus<=bvus0));

      for (uint i=(uint)nh+1;i<TRACKPOINTS;i++) {
         Trajectory[i]=0.e0f;
      }

      int imx=(int)(16*bi);

      for (int i=0;i<imx;i++)
      {
         float zp=0,fp=0;
         float phi=2.e0f*PI/(float)imx*(float)i;
         float phd=atanp(cos(phi)*sin(tho),cos(tho));
         uint yi=(uint)((float)bi*sin(phi)+bmaxi);
         uint xi=(uint)((float)bi*cos(phi)+bmaxi);

         int HalfLap=0,ExitOnImpact=0,ni;
         float php,nr,r;

         do
         {
            php=phd+(float)HalfLap*PI;
            nr=php/h;
            ni=(int)nr;

            if (ni<nh)
            {
               r=(Trajectory[ni+1]-Trajectory[ni])*(nr-ni*1.)+Trajectory[ni];
            }
            else
            {
               r=Trajectory[ni];
            }

            if ((r<=re)&&(r>=ri))
            {
               ExitOnImpact=1;
               impact(phi,r,b,tho,m,&zp,&fp,q,db,h,raie);
            }

            HalfLap++;

         } while ((HalfLap<=2)&&(ExitOnImpact==0));

         zImage[yi+2*bmaxi*xi]=zp;
         fImage[yi+2*bmaxi*xi]=fp;

      }

   }

}
"""
    return(BlobCUDA)

# def ImageOutput(sigma,prefix,Colors):
#     import matplotlib.pyplot as plt
#     start_time=time.time()
#     if Colors == 'Red2Yellow':
#         plt.imsave("%s.png" % prefix, sigma, cmap='afmhot')
#     else:
#         plt.imsave("%s.png" % prefix, sigma, cmap='Greys_r')
#     save_time = time.time()-start_time
#     print("Save image as %s.png file" % prefix)
#     print("Save Time : %f" % save_time)

def ImageOutput(sigma,prefix,Colors):
    from PIL import Image
    Max=sigma.max()
    Min=sigma.min()
    # Normalize value as 8bits Integer
    SigmaInt=(255*(sigma-Min)/(Max-Min)).astype('uint8')
    image = Image.fromarray(SigmaInt)
    image.save("%s.jpg" % prefix)
    
def BlackHoleCL(zImage,fImage,InputCL):

    kernel_params = {}

    Device=InputCL['Device']
    GpuStyle=InputCL['GpuStyle']
    VariableType=InputCL['VariableType']
    Size=InputCL['Size']
    Mass=InputCL['Mass']
    InternalRadius=InputCL['InternalRadius']
    ExternalRadius=InputCL['ExternalRadius']
    Angle=InputCL['Angle']
    Method=InputCL['Method']
    TrackPoints=InputCL['TrackPoints']
    Physics=InputCL['Physics']
    NoImage=InputCL['NoImage']
    TrackSave=InputCL['TrackSave']

    PhysicsList=DictionariesAPI()
    
    if InputCL['BlackBody']:
        # Spectrum is Black Body one
        Line=0
    else:
        # Spectrum is Monochromatic Line one
        Line=1

    Trajectories=numpy.zeros((int(InputCL['Size']/2),InputCL['TrackPoints']),dtype=numpy.float32)
    IdLast=numpy.zeros(int(InputCL['Size']/2),dtype=numpy.int32)

    # Je detecte un peripherique GPU dans la liste des peripheriques
    Id=0
    HasXPU=False
    for platform in cl.get_platforms():
        for device in platform.get_devices():
            if Id==Device:
                PF4XPU=platform.name
                XPU=device
                print("CPU/GPU selected: ",device.name.lstrip())
                HasXPU=True
            Id+=1

    if HasXPU==False:
        print("No XPU #%i found in all of %i devices, sorry..." % (Device,Id-1))
        sys.exit()

    ctx = cl.Context([XPU])
    queue = cl.CommandQueue(ctx,
			    properties=cl.command_queue_properties.PROFILING_ENABLE)

    BuildOptions="-DPHYSICS=%i -DSETTRACKPOINTS=%i " % (PhysicsList[Physics],InputCL['TrackPoints'])

    print('My Platform is ',PF4XPU)
    
    if 'Intel' in PF4XPU or 'Experimental' in PF4XPU or 'Clover' in PF4XPU or 'Portable' in PF4XPU :
        print('No extra options for Intel and Clover!')
    else:
        BuildOptions = BuildOptions+" -cl-mad-enable"

    BlackHoleCL = cl.Program(ctx,BlobOpenCL).build(options = BuildOptions)
    
    # Je recupere les flag possibles pour les buffers
    mf = cl.mem_flags

    if Method=='TrajectoPixel' or Method=='TrajectoCircle':
        TrajectoriesCL = cl.Buffer(ctx, mf.WRITE_ONLY | mf.COPY_HOST_PTR, hostbuf=Trajectories) 
        IdLastCL = cl.Buffer(ctx, mf.WRITE_ONLY | mf.COPY_HOST_PTR, hostbuf=IdLast)

    zImageCL = cl.Buffer(ctx, mf.WRITE_ONLY | mf.COPY_HOST_PTR, hostbuf=zImage)
    fImageCL = cl.Buffer(ctx, mf.WRITE_ONLY | mf.COPY_HOST_PTR, hostbuf=fImage)
    
    start_time=time.time()

    if Method=='EachPixel':
        CLLaunch=BlackHoleCL.EachPixel(queue,(zImage.shape[0],zImage.shape[1]),
                                       None,zImageCL,fImageCL,
                                       numpy.float32(Mass),
                                       numpy.float32(InternalRadius),
                                       numpy.float32(ExternalRadius),
                                       numpy.float32(Angle),
                                       numpy.int32(Line))
        CLLaunch.wait()
    elif Method=='Original':
        CLLaunch=BlackHoleCL.Original(queue,(1,),
                                      None,zImageCL,fImageCL,
                                      numpy.uint32(zImage.shape[0]/2),
                                      numpy.float32(Mass),
                                      numpy.float32(InternalRadius),
                                      numpy.float32(ExternalRadius),
                                      numpy.float32(Angle),
                                      numpy.int32(Line))
        CLLaunch.wait()
    elif Method=='EachCircle':
        CLLaunch=BlackHoleCL.EachCircle(queue,(int(zImage.shape[0]/2),),
                                        None,zImageCL,fImageCL,
                                        numpy.float32(Mass),
                                        numpy.float32(InternalRadius),
                                        numpy.float32(ExternalRadius),
                                        numpy.float32(Angle),
                                        numpy.int32(Line))
        CLLaunch.wait()
    elif Method=='TrajectoCircle':
        CLLaunch=BlackHoleCL.Trajectory(queue,(Trajectories.shape[0],),
                                        None,TrajectoriesCL,IdLastCL,
                                        numpy.float32(Mass),
                                        numpy.float32(InternalRadius),
                                        numpy.float32(ExternalRadius),
                                        numpy.float32(Angle),
                                        numpy.int32(Line))
        
        CLLaunch=BlackHoleCL.Circle(queue,(Trajectories.shape[0],
                                           int(zImage.shape[0]*4)),None,
                                    TrajectoriesCL,IdLastCL,
                                    zImageCL,fImageCL,
                                    numpy.float32(Mass),
                                    numpy.float32(InternalRadius),
                                    numpy.float32(ExternalRadius),
                                    numpy.float32(Angle),
                                    numpy.int32(Line))
        CLLaunch.wait()
    else:
        CLLaunch=BlackHoleCL.Trajectory(queue,(Trajectories.shape[0],),
                                        None,TrajectoriesCL,IdLastCL,
                                        numpy.float32(Mass),
                                        numpy.float32(InternalRadius),
                                        numpy.float32(ExternalRadius),
                                        numpy.float32(Angle),
                                        numpy.int32(Line))
        
        CLLaunch=BlackHoleCL.Pixel(queue,(zImage.shape[0],zImage.shape[1]),None,
                                   zImageCL,fImageCL,TrajectoriesCL,IdLastCL,
                                   numpy.uint32(Trajectories.shape[0]),
                                   numpy.float32(Mass),
                                   numpy.float32(InternalRadius),
                                   numpy.float32(ExternalRadius),
                                   numpy.float32(Angle),
                                   numpy.int32(Line))
        CLLaunch.wait()
    
    compute = time.time()-start_time
    
    cl.enqueue_copy(queue,zImage,zImageCL).wait()
    cl.enqueue_copy(queue,fImage,fImageCL).wait()
    if Method=='TrajectoPixel' or Method=='TrajectoCircle':
        cl.enqueue_copy(queue,Trajectories,TrajectoriesCL).wait()
        cl.enqueue_copy(queue,IdLast,IdLastCL).wait()
    elapsed = time.time()-start_time
    print("\nCompute Time : %f" % compute)
    print("Elapsed Time : %f\n" % elapsed)

    zMaxPosition=numpy.where(zImage[:,:]==zImage.max())
    fMaxPosition=numpy.where(fImage[:,:]==fImage.max())
    print("Z max @(%f,%f) : %f" % ((1.*zMaxPosition[1][0]/zImage.shape[1]-0.5,1.*zMaxPosition[0][0]/zImage.shape[0]-0.5,zImage.max())))
    print("Flux max @(%f,%f) : %f" % ((1.*fMaxPosition[1][0]/fImage.shape[1]-0.5,1.*fMaxPosition[0][0]/fImage.shape[0]-0.5,fImage.max())))
    zImageCL.release()
    fImageCL.release()

    if Method=='TrajectoPixel' or Method=='TrajectoCircle':
        if not NoImage:
            AngleStep=4*numpy.pi/TrackPoints
            Angles=numpy.arange(0.,4*numpy.pi,AngleStep)
            Angles.shape=(1,TrackPoints)
            Hostname=gethostname()
            Date=time.strftime("%Y%m%d_%H%M%S")
            ImageInfo="%s_Device%i_%s_%s" % (Method,Device,Hostname,Date)
            
            if TrackSave:
                # numpy.savetxt("TrouNoirTrajectories_%s.csv" % ImageInfo,
                #               numpy.transpose(numpy.concatenate((Angles,Trajectories),axis=0)),
                #               delimiter=' ', fmt='%.2e')
                numpy.savetxt("TrouNoirTrajectories.csv",
                            numpy.transpose(numpy.concatenate((Angles,Trajectories),axis=0)),delimiter=' ', fmt='%.2e')
        
        TrajectoriesCL.release()
        IdLastCL.release()
        
    return(elapsed)

def BlackHoleCUDA(zImage,fImage,InputCL):

    kernel_params = {}

    Device=InputCL['Device']
    GpuStyle=InputCL['GpuStyle']
    VariableType=InputCL['VariableType']
    Size=InputCL['Size']
    Mass=InputCL['Mass']
    InternalRadius=InputCL['InternalRadius']
    ExternalRadius=InputCL['ExternalRadius']
    Angle=InputCL['Angle']
    Method=InputCL['Method']
    TrackPoints=InputCL['TrackPoints']
    Physics=InputCL['Physics']
    Threads=InputCL['Threads']

    PhysicsList=DictionariesAPI()
    
    if InputCL['BlackBody']:
        # Spectrum is Black Body one
        Line=0
    else:
        # Spectrum is Monochromatic Line one
        Line=1

    Trajectories=numpy.zeros((int(InputCL['Size']/2),InputCL['TrackPoints']),dtype=numpy.float32)
    IdLast=numpy.zeros(int(InputCL['Size']/2),dtype=numpy.int32)

    try:
        # For PyCUDA import
        import pycuda.driver as cuda
        from pycuda.compiler import SourceModule
        
        cuda.init()
        for Id in range(cuda.Device.count()):
            if Id==Device:
                XPU=cuda.Device(Id)
                print("GPU selected %s" % XPU.name())
        print

    except ImportError:
        print("Platform does not seem to support CUDA")

    Context=XPU.make_context()

    try:
        mod = SourceModule(KernelCodeCuda(),options=['--compiler-options','-DPHYSICS=%i -DSETTRACKPOINTS=%i' % (PhysicsList[Physics],TrackPoints)])
        print("Compilation seems to be OK")
    except:
        print("Compilation seems to break")

    EachPixelCU=mod.get_function("EachPixel")
    OriginalCU=mod.get_function("Original")
    EachCircleCU=mod.get_function("EachCircle")
    TrajectoryCU=mod.get_function("Trajectory")
    PixelCU=mod.get_function("Pixel")
    CircleCU=mod.get_function("Circle")
    
    TrajectoriesCU = cuda.mem_alloc(Trajectories.size*Trajectories.dtype.itemsize)
    cuda.memcpy_htod(TrajectoriesCU, Trajectories)
    zImageCU = cuda.mem_alloc(zImage.size*zImage.dtype.itemsize)
    cuda.memcpy_htod(zImageCU, zImage)
    fImageCU = cuda.mem_alloc(fImage.size*fImage.dtype.itemsize)
    cuda.memcpy_htod(zImageCU, fImage)
    IdLastCU = cuda.mem_alloc(IdLast.size*IdLast.dtype.itemsize)
    cuda.memcpy_htod(IdLastCU, IdLast)

    start_time=time.time()

    if Method=='EachPixel':
        EachPixelCU(zImageCU,fImageCU,
                    numpy.float32(Mass),
                    numpy.float32(InternalRadius),
                    numpy.float32(ExternalRadius),
                    numpy.float32(Angle),
                    numpy.int32(Line),
                    grid=(int(zImage.shape[0]/Threads),
                          int(zImage.shape[1]/Threads)),
                    block=(Threads,Threads,1))
    elif Method=='EachCircle':
        EachCircleCU(zImageCU,fImageCU,
                   numpy.float32(Mass),
                   numpy.float32(InternalRadius),
                   numpy.float32(ExternalRadius),
                   numpy.float32(Angle),
                   numpy.int32(Line),
                   grid=(int(zImage.shape[0]/Threads/2),1),
                   block=(Threads,1,1))
    elif Method=='Original':
        OriginalCU(zImageCU,fImageCU,
                   numpy.uint32(zImage.shape[0]/2),
                   numpy.float32(Mass),
                   numpy.float32(InternalRadius),
                   numpy.float32(ExternalRadius),
                   numpy.float32(Angle),
                   numpy.int32(Line),
                   grid=(1,1),
                   block=(1,1,1))
    elif Method=='TrajectoCircle':
        TrajectoryCU(TrajectoriesCU,IdLastCU,
                     numpy.float32(Mass),
                     numpy.float32(InternalRadius),
                     numpy.float32(ExternalRadius),
                     numpy.float32(Angle),
                     numpy.int32(Line),
                     grid=(int(Trajectories.shape[0]/Threads),1),
                     block=(Threads,1,1))
        
        CircleCU(TrajectoriesCU,IdLastCU,zImageCU,fImageCU,
                 numpy.float32(Mass),
                 numpy.float32(InternalRadius),
                 numpy.float32(ExternalRadius),
                 numpy.float32(Angle),
                 numpy.int32(Line),
                 grid=(int(Trajectories.shape[0]/Threads),
                       int(zImage.shape[0]*4/Threads)),
                 block=(Threads,Threads,1))
    else:
        # Default method: TrajectoPixel
        TrajectoryCU(TrajectoriesCU,IdLastCU,
                     numpy.float32(Mass),
                     numpy.float32(InternalRadius),
                     numpy.float32(ExternalRadius),
                     numpy.float32(Angle),
                     numpy.int32(Line),
                     grid=(int(Trajectories.shape[0]/Threads),1),
                     block=(Threads,1,1))
        
        PixelCU(zImageCU,fImageCU,TrajectoriesCU,IdLastCU,
                numpy.uint32(Trajectories.shape[0]),
                numpy.float32(Mass),
                numpy.float32(InternalRadius),
                numpy.float32(ExternalRadius),
                numpy.float32(Angle),
                numpy.int32(Line),
                grid=(int(zImage.shape[0]/Threads),
                      int(zImage.shape[1]/Threads),1),
                block=(Threads,Threads,1))

    Context.synchronize()

    compute = time.time()-start_time

    cuda.memcpy_dtoh(zImage,zImageCU)
    cuda.memcpy_dtoh(fImage,fImageCU)
    if Method=='TrajectoPixel' or Method=='TrajectoCircle':
        cuda.memcpy_dtoh(Trajectories,TrajectoriesCU)
    elapsed = time.time()-start_time
    print("\nCompute Time : %f" % compute)
    print("Elapsed Time : %f\n" % elapsed)

    zMaxPosition=numpy.where(zImage[:,:]==zImage.max())
    fMaxPosition=numpy.where(fImage[:,:]==fImage.max())
    print("Z max @(%f,%f) : %f" % ((1.*zMaxPosition[1][0]/zImage.shape[1]-0.5,1.*zMaxPosition[0][0]/zImage.shape[0]-0.5,zImage.max())))
    print("Flux max @(%f,%f) : %f" % ((1.*fMaxPosition[1][0]/fImage.shape[1]-0.5,1.*fMaxPosition[0][0]/fImage.shape[0]-0.5,fImage.max())))

    
    Context.pop()

    Context.detach()

    if Method=='TrajectoPixel' or Method=='TrajectoCircle':
        if not NoImage:
            AngleStep=4*numpy.pi/TrackPoints
            Angles=numpy.arange(0.,4*numpy.pi,AngleStep)
            Angles.shape=(1,TrackPoints)
            Hostname=gethostname()
            Date=time.strftime("%Y%m%d_%H%M%S")
            ImageInfo="%s_Device%i_%s_%s" % (Method,Device,Hostname,Date)
            
            # numpy.savetxt("TrouNoirTrajectories_%s.csv" % ImageInfo,
            #               numpy.transpose(numpy.concatenate((Angles,Trajectories),axis=0)),
            #               delimiter=' ', fmt='%.2e')
            numpy.savetxt("TrouNoirTrajectories.csv",
                          numpy.transpose(numpy.concatenate((Angles,Trajectories),axis=0)),
                          delimiter=' ', fmt='%.2e')

    
    return(elapsed)

if __name__=='__main__':

    # Default device: first one!
    Device=0
    # Default implementation: OpenCL, most versatile!
    GpuStyle = 'OpenCL'
    Mass = 1.
    # Internal Radius 3 times de Schwarzschild Radius
    InternalRadius=6.*Mass
    #
    ExternalRadius=12.
    #
    # Angle with normal to disc 10 degrees
    Angle = numpy.pi/180.*(90.-10.)
    # Radiation of disc : BlackBody or Monochromatic
    BlackBody = False
    # Size of image
    Size=1024
    # Variable Type
    VariableType='FP32'
    # ?
    q=-2
    # Method of resolution
    Method='TrajectoPixel'
    # Colors for output image
    Colors='Greyscale'
    # Physics
    Physics='Einstein'
    # No output as image
    NoImage = False
    # Threads in CUDA
    Threads = 32
    # Trackpoints of trajectories
    TrackPoints=2048
    # Tracksave of trajectories
    TrackSave=False
    
    HowToUse='%s -h [Help] -b [BlackBodyEmission] -j [TrackSave] -n [NoImage] -p <Einstein/Newton> -s <SizeInPixels> -m <Mass> -i <DiscInternalRadius> -x <DiscExternalRadius> -a <AngleAboveDisc> -d <DeviceId> -c <Greyscale/Red2Yellow> -g <CUDA/OpenCL> -o <EachPixel/TrajectoCircle/TrajectoPixel/EachCircle/Original> -t <ThreadsInCuda> -v <FP32/FP64> -k <TrackPoints>'

    try:
        opts, args = getopt.getopt(sys.argv[1:],"hbnjs:m:i:x:a:d:g:v:o:t:c:p:k:",["tracksave","blackbody","noimage","camera","size=","mass=","internal=","external=","angle=","device=","gpustyle=","variabletype=","method=","threads=","colors=","physics=","trackpoints="])
    except getopt.GetoptError:
        print(HowToUse % sys.argv[0])
        sys.exit(2)

    # List of Devices
    Devices=[]
    Alu={}
        
    for opt, arg in opts:
        if opt == '-h':
            print(HowToUse % sys.argv[0])
            
            print("\nInformations about devices detected under OpenCL API:")
            # For PyOpenCL import
            try:
                import pyopencl as cl
                Id=0
                for platform in cl.get_platforms():
                    for device in platform.get_devices():
                        #deviceType=cl.device_type.to_string(device.type)
                        deviceType="xPU"
                        print("Device #%i from %s of type %s : %s" % (Id,platform.vendor.lstrip(),deviceType,device.name.lstrip()))
                        Id=Id+1
                        
            except:
                print("Your platform does not seem to support OpenCL")
                
            print("\nInformations about devices detected under CUDA API:")
                # For PyCUDA import
            try:
                import pycuda.driver as cuda
                cuda.init()
                for Id in range(cuda.Device.count()):
                    device=cuda.Device(Id)
                    print("Device #%i of type GPU : %s" % (Id,device.name()))
                print
            except:
                print("Your platform does not seem to support CUDA")
        
            sys.exit()
        
        elif opt in ("-d", "--device"):
#            Devices.append(int(arg))
            Device=int(arg)
        elif opt in ("-g", "--gpustyle"):
            GpuStyle = arg
        elif opt in ("-v", "--variabletype"):
            VariableType = arg
        elif opt in ("-s", "--size"):
            Size = int(arg)
        elif opt in ("-k", "--trackpoints"):
            TrackPoints = int(arg)
        elif opt in ("-m", "--mass"):
            Mass = float(arg)
        elif opt in ("-i", "--internal"):
            InternalRadius = float(arg)
        elif opt in ("-e", "--external"):
            ExternalRadius = float(arg)
        elif opt in ("-a", "--angle"):
            Angle = numpy.pi/180.*(90.-float(arg))
        elif opt in ("-b", "--blackbody"):
            BlackBody = True
        elif opt in ("-j", "--tracksave"):
            TrackSave = True
        elif opt in ("-n", "--noimage"):
            NoImage = True
        elif opt in ("-o", "--method"):
            Method = arg
        elif opt in ("-t", "--threads"):
            Threads = int(arg)
        elif opt in ("-c", "--colors"):
            Colors = arg
        elif opt in ("-p", "--physics"):
            Physics = arg

    print("Device Identification selected : %s" % Device)
    print("GpuStyle used : %s" % GpuStyle)
    print("VariableType : %s" % VariableType)
    print("Size : %i" % Size)
    print("Mass : %f" % Mass)
    print("Internal Radius : %f" % InternalRadius)
    print("External Radius : %f" % ExternalRadius)
    print("Angle with normal of (in radians) : %f" % Angle)
    print("Black Body Disc Emission (monochromatic instead) : %s" % BlackBody)
    print("Method of resolution : %s" % Method)
    print("Colors for output images : %s" % Colors)
    print("Physics used for Trajectories : %s" % Physics)
    print("Trackpoints of Trajectories : %i" % TrackPoints)
    print("Tracksave of Trajectories : %i" % TrackSave)

    if GpuStyle=='CUDA':
        print("\nSelection of CUDA device") 
        try:
            # For PyCUDA import
            import pycuda.driver as cuda
            
            cuda.init()
            for Id in range(cuda.Device.count()):
                device=cuda.Device(Id)
                print("Device #%i of type GPU : %s" % (Id,device.name()))
                if Id in Devices:
                    Alu[Id]='GPU'
            
        except ImportError:
            print("Platform does not seem to support CUDA")

    if GpuStyle=='OpenCL':
        print("\nSelection of OpenCL device") 
        try:
            # For PyOpenCL import
            import pyopencl as cl
            Id=0
            for platform in cl.get_platforms():
                for device in platform.get_devices():
                    #deviceType=cl.device_type.to_string(device.type)
                    deviceType="xPU"
                    print("Device #%i from %s of type %s : %s" % (Id,platform.vendor.lstrip().rstrip(),deviceType,device.name.lstrip().rstrip()))

                    if Id in Devices:
                    # Set the Alu as detected Device Type
                        Alu[Id]=deviceType
                    Id=Id+1
        except ImportError:
            print("Platform does not seem to support OpenCL")


    zImage=numpy.zeros((Size,Size),dtype=numpy.float32)
    fImage=numpy.zeros((Size,Size),dtype=numpy.float32)

    InputCL={}
    InputCL['Device']=Device
    InputCL['GpuStyle']=GpuStyle
    InputCL['VariableType']=VariableType
    InputCL['Size']=Size
    InputCL['Mass']=Mass
    InputCL['InternalRadius']=InternalRadius
    InputCL['ExternalRadius']=ExternalRadius
    InputCL['Angle']=Angle
    InputCL['BlackBody']=BlackBody
    InputCL['Method']=Method
    InputCL['TrackPoints']=TrackPoints
    InputCL['Physics']=Physics
    InputCL['Threads']=Threads
    InputCL['NoImage']=NoImage
    InputCL['TrackSave']=TrackSave

    if GpuStyle=='OpenCL':
        duration=BlackHoleCL(zImage,fImage,InputCL)
    else:
        duration=BlackHoleCUDA(zImage,fImage,InputCL)
        
    Hostname=gethostname()
    Date=time.strftime("%Y%m%d_%H%M%S")
    ImageInfo="%s_Device%i_%s_%s" % (Method,Device,Hostname,Date)
    
    
    if not NoImage:
        ImageOutput(zImage,"TrouNoirZ_%s" % ImageInfo,Colors)
        ImageOutput(fImage,"TrouNoirF_%s" % ImageInfo,Colors)