#include <gincsvd.hpp>
#include <algorithm>
#include <iostream>
#include <iterator>
#include <iomanip>
#include <string>
#include <cstdlib>
#include <cula.hpp>
#include <ctime>
#include <unistd.h>
#include <getopt.h>
using namespace GINCSVD;
using std::cout;
using std::endl;
using std::string;

#ifdef GINCSVD_DOUBLE
typedef double T;
#else
typedef float T;
#endif

struct CmdLineArgs {
  int M;
  int N;
  int K;
  int L;
  int verbose;
};

int processCmdLine(int argc, char ** argv, CmdLineArgs &args)
{
  int c;
  while (1) {
    static struct option long_options[] = {
      {"help",            no_argument,        NULL,           'h'},
      {"verbose",         no_argument,        &args.verbose,  1},
      {"quiet",           no_argument,        &args.verbose,  0},
      {"num-rows",        required_argument,  NULL,           'M'},
      {"num-cols",        required_argument,  NULL,           'N'},
      {"num-sigma",       required_argument,  NULL,           'K'},
      {"block-size",      required_argument,  NULL,           'L'},
      {NULL, NULL, NULL, NULL}
    };
    int opt_ind = 0;
    c = getopt_long(argc, argv, "hvqM:N:K:L:", long_options, &opt_ind);
    if (c==-1) {
      break;
    }
    switch (c)
    {
      case 0: /* long option */
        break;
      case 'M':
        args.M = atoi(optarg);
        break;
      case 'N':
        args.N = atoi(optarg);
        break;
      case 'K':
        args.K = atoi(optarg);
        break;
      case 'L':
        args.L = atoi(optarg);
        break;
      case 'h':
      case '?':
      default:
        return -1;
    }
  }
  return 0;
}

int main(int argc, char **argv) {
  //
  CmdLineArgs args;
  args.M = 10000;
  args.N =   800;
  args.K =     5;
  args.L =     5;
  if (processCmdLine(argc,argv,args)) {
    cout << argv[0] << " arguments:\n"
          "  --verbose          verbose output\n"
          "  --quiet            no output\n"
          "  --num-rows M       M rows in test matrix\n"
          "  --num-cols N       N columns in test matrix\n"
          "  --num-sigma K      K singular values computed\n"
          "  --block-size L     L columns processed per iteration\n"
          "  --notest-cula      Do not test against CULA host interface (default)\n"
          "  --test-cula        Test against CULA host interface\n"
         << endl;
    return -1;
  }
  //
  cout << "\n"
          "       /------------------------\\\n"
          "       | G-IncSVD rand() driver |\n"
          "       \\------------------------/\n\n";
  const int M = args.M;
  const int N = args.N;
  const int K = args.K;
  const int L = args.L;
  GINCSVD::init(0);
  //
  T *U  = new T[M*K],
    *VT = new T[K*N],
    *S  = new T[K];

  cout << "\n";

  cout << std::scientific << std::setprecision(8);

  cout << "Matrix: rand(" << M << "," << N << ")" << endl;
  cout << endl;

  cout << "-------------------------------\n"
       << "------ Synchronous tests ------\n"
       << "-------------------------------\n\n";

  cout << "Computing LARGEST " << K << " singular values, with block size " << L << endl;
  GINCSVD::timing_info sync = gincsvd_gen_sync(LARGEST,M,N,K,L,RandomMatrixGenerator<T>(M),U,VT,S);
  cout << "Total time  : " << sync.total_time/1e3 << " seconds" << endl;
  cout << "Push A      : " << sync.t_push/1e3 << " seconds (over " << sync.numPush << " xfers)" << endl;
  cout << "Pull UV time: " << sync.t_pullUV/1e3 << " seconds" << endl;
  cout << "Observed send bandwidth: " << (double)(M*N*sizeof(T))/sync.t_push/1024/1024 << " GB/sec" << endl;
  // cout << "Estimated largest singular values: ";
  // std::copy(S,S+K,std::ostream_iterator<T>(cout,"\t")); cout << endl;

  cout << "\n";

  cout << "-------------------------------\n"
       << "----- Asynchronous tests ------\n"
       << "-------------------------------\n\n";

  cout << "Computing LARGEST " << K << " singular values, with block size " << L << endl;
  GINCSVD::timing_info async = gincsvd_gen(LARGEST,M,N,K,L,RandomMatrixGenerator<T>(M),U,VT,S);
  cout << "Total time  : " << async.total_time/1e3 << " seconds" << endl;
  cout << "Push A      : " << async.t_push/1e3 << " seconds (over " << async.numPush << " xfers)" << endl;
  cout << "GEMM overlap: " << async.t_expand/1e3 << " seconds" << endl;
  cout << "Pull UV time: " << async.t_pullUV/1e3 << " seconds" << endl;
  cout << "Observed send bandwidth: " << (double)(M*N*sizeof(T))/async.t_push/1024/1024 << " GB/sec" << endl;
  // cout << "Estimated largest singular values: ";
  // std::copy(S,S+K,std::ostream_iterator<T>(cout,"\t")); cout << endl;

  cout << "\n";

  delete [] U;
  delete [] VT;
  delete [] S;

  return 0;
}
