/*
COPYRIGHT (2011-2012) by:
Kevin Marco Erler (author), http://www.kevinerler.de
AIU-FSU Jena (co-owner), http://www.astro.uni-jena.de
SBSZ Jena-Göschwitz (co-owner), http://www.sbsz-jena.de
BSZ-Hermsdorf (co-owner), http://www.bszh.de
Advanced Licensing (dual license: COPYRIGHT and following licenses):
License (international): CC-BY v3.0-unported or later - link: http://creativecommons.org/licenses/by/3.0/deed.en
License (Germany):       CC-BY v3.0-DE       or later - link: http://creativecommons.org/licenses/by/3.0/de/
------------------
Compilation requirements:
Packages (x86-64):
  GCC >v4.2, compat. libstdc++ and GOMP v3.0
Normal-Compile with g++-Compiler (Red Hat GCC 4.4.5-6 x86-64 tested) + OpenMP v3.0 ([lib]GOMP v3.0 x86-64 tested)
  g++ -std=c++0x -m64 -fopenmp -Wall -Wextra -pedantic -pedantic-errors -lgomp -lm -s <source.cpp> -o <dest>
Release-Compile with g++-Compiler (Red Hat GCC 4.4.5-6 x86-64 tested) + OpenMP v3.0 ([lib]GOMP v3.0 x86-64 tested)
  g++ -std=c++0x -m64 -fopenmp -Wall -Wextra -pedantic -pedantic-errors -lgomp -lm -O3 -s <source.cpp> -o <dest>
Debug-Compile with g++-Compiler (Red Hat GCC 4.4.5-6 x86-64 tested) + OpenMP v3.0 ([lib]GOMP v3.0 x86-64 tested)
  g++ -std=c++0x -m64 -fopenmp -Wall -Wextra -pedantic -pedantic-errors -lgomp -lm -g -ggdb3 <source.cpp> -o <dest>
*/

// Includes of C/C++-Librarys for INTs, REAL/FLOATs, STRINGS, Math-Calc and I/O
#include <climits>
#include <cstdint>
#include <cinttypes>
#include <cfloat>
#include <cwchar>
#include <string>  //std:string
#include <string.h>
#include <cstring>
#include <cstdlib>
#include <cstdio>
#include <iostream>
#include <sstream>
#include <iomanip>
#include <cmath>
#include <ctime>
#include <vector>
#include <algorithm>

// Conditional compilation (conditional include) of the OpenMP-Mainlib for OpenMP-Support
#ifdef _OPENMP
#include <omp.h>
#endif

using namespace std;

#define free(x) free(x); *x=NULL
#define PRId128 "s"
#define PRIi128 "s"
#define PRIu128 "s"

const uint64_t UINT64_MIN     = 0;
const __int128_t INT128_MIN   = (__int128_t)((-170141183460469231731.687303715884105728) * pow(10,18));
const __int128_t INT128_MAX   = (__int128_t)(( 170141183460469231731.687303715884105727) * pow(10,18));
const __uint128_t UINT128_MAX = (__uint128_t)((340282366920938463463.374607431768211455) * pow(10,18));
const __uint128_t UINT128_MIN = 0/* * pow(10,18)*/;

std::ostream &operator<<(std::ostream &out, __uint128_t x)
{
  if(x >= 10)
  {
    out << x / 10;
  }
  return out << static_cast<unsigned>(x % 10);
}

std::ostream &operator<<(std::ostream &out, __int128_t x)
{
  if(x < 0)
  {
    out << '-';
    x = -x;
  }
  return out << static_cast<__uint128_t>(x);
}

string INT128ToSTR(__int128_t x)
{
  std::stringstream sstr;
  sstr<<x;
  return sstr.str();
}
#define INT128ToCSTR(x) (INT128ToSTR(x)).c_str()

string UINT128ToSTR(__uint128_t x)
{
  std::stringstream sstr;
  sstr<<x;
  return sstr.str();
}
#define UINT128ToCSTR(x) (UINT128ToSTR(x)).c_str()

const uint64_t datarsize = 1000000000ULL;

int main(int argc, char *argv[])
{
  // Runtime manipulation of OpenMP-state variables
  //omp_set_num_threads(4);
  omp_set_dynamic(0);
  //omp_set_nested(1);

  /* data declarations and implementations
     DB-arrays requires a FILE-Input-database (file based) of datarsize (N) elements (mtype uint64_t) */
  FILE *FileVar;
  double starttime = 0.00, sdelay = 0.00, pdelay = 0.00;
  uint64_t db1[datarsize] = {0ULL},  //db1 for Single-Search
           db2[datarsize] = {0ULL},  //db2 for Parallel-Search
           SearchStr = 0ULL, 
           NumResults_s = 0ULL, NumResults_p = 0ULL,
           max = 0ULL, min = (0ULL-1ULL);
  std::vector<uint64_t> found_s(0), found_p(0);
  bool CorrectFiltered = true;

  std::cout << "Search 1                                                   (64-Bit)\n"
            << "===================================================================\n"
            << "DB einlesen:";

  //--------------------------Begin: Initialization of data------------------------------------------

  if((FileVar = fopen("db1.bin","rb"))!=NULL)
  {
    fread(&db1,sizeof(db1),1,FileVar);
    fseek(FileVar,0,SEEK_SET);
    fread(&db2,sizeof(db2),1,FileVar);
    fclose(FileVar);
  }
  else
  {
    std::cout << "Fehler beim Öffnen/Einlesen der DB-Datei!\n";
    return 0;
  }
  std::cout << "                                                   done\n"
            << "Ermittle Minimum und Maximum:";
  for(uint64_t m=0ULL;m<datarsize;++m)
  {
    if(min>db1[m])
    {
      min = db1[m];
    }
    if(max<db1[m])
    {
      max = db1[m];
    }
  }

  //--------------------------End: Initialization of data--------------------------------------------

  std::cout << "                                  done\n"
            << "Eingabe Search-String (Typ: UINT64): ";

  //--------------------------Begin: CPU-serial execution of algorithm-------------------------------

  std::cin >> SearchStr;
  std::cout << "SERIELLE AUSFÜHRUNG:";
  starttime = omp_get_wtime();
  //CPU-serial algorithm:
  for(uint64_t i=0ULL;i<datarsize;++i)
  {
    if(SearchStr == db1[i])
    {
      found_s.push_back(i);
      ++NumResults_s;
    }
  }
  sdelay = omp_get_wtime()-starttime;
  std::cout << "                                           done\n"; //serial

  //--------------------------End: CPU-serial execution of algorithm---------------------------------

  //--------------------------Begin: CPU-parallel OpenMP-execution of algorithm----------------------

  // create parallel region:
  #pragma omp parallel default(none) shared(std::cout, starttime, pdelay, SearchStr, db2, NumResults_p, found_p)
  {
    #pragma omp master
    {
      std::cout << "PARALLELE AUSFÜHRUNG mit " << omp_get_num_threads() << " Threads:";
      starttime = omp_get_wtime();
    }

    //OpenMP-CPU-parallel algorithm with the Reduction-clause:
    #pragma omp for schedule(static) reduction(+: NumResults_p)
    for(uint64_t j=0ULL;j<datarsize;++j)
    {
      if(SearchStr == db2[j])
      {
        #pragma omp critical
        {
          found_p.push_back(j);
        }
        ++NumResults_p;
      }
    }

    #pragma omp master
    {
      pdelay = omp_get_wtime()-starttime;
      if(omp_get_num_threads() >= 10)
      {
        std::cout << "                           done\n";  //parallel
      }
      else
      {
        std::cout << "                            done\n"; //parallel
      }
    }
  }

  //--------------------------End: CPU-parallel OpenMP-execution of algorithm------------------------

  //--------------------------Analysis of results----------------------------------------------------

  std::cout << "Überprüfe Ergebnisse:";
  if(((found_s.size()!=0ULL) && (found_p.size()!=0ULL)) && (found_s.size() == found_p.size()))
  {
    std::sort(found_p.begin(),found_p.end());
    for(uint64_t t=0ULL;t<found_p.size();++t)
    {
      if(found_p[t] != found_s[t])
      {
        CorrectFiltered = false;
        break;
      }
    }
  }
  std::cout << "                                          done\n";

  std::cout << "\nAuswertung:\n"
            << "*******************************************************************\n"
            << "DB-Größe:                     " << sizeof(db1) << " Bytes\n"
            << "Anzahl Elemente:              " << datarsize << " zu je " << sizeof(uint64_t) << " Bytes\n"
            << "Minimum:                      " << min << '\n'
            << "Maximum:                      " << max << '\n'
            << "Searchstring:                 " << SearchStr << '\n'
            << "Anzahl gefundener S-Elemente: " << NumResults_s << '\n'
            << "Anzahl gefundener P-Elemente: " << NumResults_p << '\n'
            << "Seriell & parallel richtig gefiltert?:                          " << ((CorrectFiltered==true)?"yes\n":" no\n")
            << "Dauer - SERIELL:              " << sdelay << " sec\n"
            << "Dauer - PARALLEL:             " << pdelay  << " sec\n"
            << "__________________\n"
            << "==>seriell/parallel gefilterte Daten:\n";
  if((found_s.size()!=0ULL) && (found_p.size()!=0ULL))
  {
    if((found_s.size()>1ULL) && (found_p.size()>1ULL))
    {
      std::cout << "S-Suchergebnis 1: "
                << (found_s[0]+1ULL) << ".S-Element[" << found_s[0] << "] = " << db1[found_s[0]] << '\n'
                << "P-Suchergebnis 1: "
                << (found_p[0]+1ULL) << ".P-Element[" << found_p[0] << "] = " << db2[found_p[0]] << '\n'
                << "...\n"
                << "S-Suchergebnis " << found_s.size() << ": "
                << (found_s.back()+1ULL) << ".S-Element[" << found_s.back() << "] = " << db1[found_s.back()] << '\n'
                << "P-Suchergebnis " << found_p.size() << ": "
                << (found_p.back()+1ULL) << ".P-Element[" << found_p.back() << "] = " << db2[found_p.back()] << '\n';
    }
    else
    {
      std::cout << "S-Element[" << found_s[0] << "] = " << db1[found_s[0]] << '\n'
                << "P-Element[" << found_p[0] << "] = " << db2[found_p[0]] << '\n';
    }
  }
  else
  {
    std::cout << "Suchstring kommt nicht in der DB vor!\n";
  }
  std::cout << "__________________"
            << "\n64-Bit-Werte:\n"
            << "INT64_MIN:                                    "  << INT64_MIN << '\n'
            << "INT64_MAX:                                     " << INT64_MAX << '\n'
            << "UINT64_MIN:                                    " << UINT64_MIN << '\n'
            << "UINT64_MAX:                                    " << UINT64_MAX << '\n';

  // Detailed output
  std::cout << "\nGefilterte Daten:\n"
            << "==========================\n"
            << "Anzahl gefundener S-Elemente: " << NumResults_s << '\n'
            << "Anzahl gefundener P-Elemente: " << NumResults_p << '\n';
  if((found_s.size()!=0ULL) && (found_p.size()!=0ULL))
  {
    for(uint64_t l=0ULL;l<found_p.size();++l)
    {
      std::cout << "S-Suchergebnis " << (l+1ULL) << ": "
                << (found_s[l]+1ULL) << ".S-Element[" << found_s[l] << "] = " << db1[found_s[l]] << '\n'
                << "P-Suchergebnis " << (l+1ULL) << ": "
                << (found_p[l]+1ULL) << ".P-Element[" << found_p[l] << "] = " << db2[found_p[l]] << '\n';
    }
  }

  found_s.clear();
  found_p.clear();

  getchar();
  return 0;
}