ert icon indicating copy to clipboard operation
ert copied to clipboard

Implement POC for using the C-driver API

Open xjules opened this issue 1 year ago • 1 comments

Make use of the provided OpenPBS C library to suplement the current calls of qstat and see how the main server reacts, ie. check authentifications and communication between the ert server and pbs server.

xjules avatar Mar 19 '24 12:03 xjules

made a proof of concept using C code that was several orders of magnitude faster than qstat when many job ids where used. together with Eivind we found that there is an option (-e) for qstat that gives most of the speedup. with a few hundred job ids the PoC took about 1/5 of the time that qstat -e used.

main.cpp

#include <sstream>
#include <string>
#include <optional>
#include <thread>
#include <chrono>
#include <iostream>
#include <unistd.h>
#include <string.h>

#include "/opt/pbs/include/pbs_ifl.h"
#include "/opt/pbs/include/pbs_error.h"

#include "third-party/date/date.h"

using namespace std::chrono;
using namespace std::chrono_literals;

using std::cout;
using std::endl;

struct configuration
{
  std::string server = "s034-lcam";
  char* ids = nullptr;
  int connection = -1;
  duration<int, std::milli> rep_time = 10000ms; 
};

configuration conf{};

void print_batch_status(batch_status* batch)
{
  int i = 0;
  for(auto status = batch; status; status = status->next)
  {
    cout << status->name << " ";
    //cout << "text: status->text << endl;
    //cout << "----------attribs-start----------" << endl;
    for(auto attr = status->attribs; attr; attr = attr->next)
    {
      //cout << attr->name << attr->resource << attr->value << endl;
      //cout << attr->name << " " << attr->value << endl;
      cout << attr->value << " ";
    }
    cout << endl;
    //cout << "----------attribs-end----------" << endl << endl;
  }
}

int connect()
{
  conf.connection = pbs_connect(conf.server.c_str());  
  if(conf.connection < 0)
  {
    cout << "an error occured when connecting to " << conf.server << endl;
    cout << "errno: " << pbs_errno << endl;
    //exit(-1);
  }
  cout << pbs_server << endl;

  return conf.connection;
}

void queue_status()
{
  auto batch_status = pbs_statque(conf.connection, conf.ids, nullptr, nullptr);
  print_batch_status(batch_status);
  pbs_statfree(batch_status);  
}

void jobs_status(const char* extend = "x")
{
  static attrl exit_attrib{nullptr, "Exit_status", nullptr, nullptr};
  static attrl attrib_filter{&exit_attrib, "job_state", nullptr, nullptr};

  auto batch_status = pbs_statjob(conf.connection, conf.ids, &attrib_filter, extend);
  if(!batch_status)
  {
    cout << "an error occured when querying job statuses" << endl;
    cout << "errno: " << pbs_errno << endl;
    connect();
  }
  cout << endl << "printing job statuses:" << endl;
  print_batch_status(batch_status);
  pbs_statfree(batch_status);
}

void print_help_and_exit()
{
  cout << "USAGE:" << endl;
  cout << "  qstat [OPTIONS...] IDS" << endl;
  cout << "  qstat [OPTIONS...] -f path_to_file_with_ids" << endl;
  exit(-1);
}

void parse_args(int argc, char* argv[])
{
  int arg;
  while((arg = getopt(argc, argv, "?hf:s:l:")) != -1)
  {
    switch(arg)
    {
      case 'f':
        conf.ids = optarg;
        break;
      case 's':
        conf.server = std::string(optarg);
        break;
      case 'l':
        cout << "l option" << endl;
        conf.rep_time = duration<int, std::milli>(atoi(optarg));
      case '?':
      case 'h':
      default:
        cout << "default option" << endl;
        print_help_and_exit();
    }
  }
  
  if(!conf.ids)
  {
    if(optind >= argc) print_help_and_exit();
    std::stringstream ss;
    ss << argv[optind++];
    while(optind < argc)
    {
      ss << "," << argv[optind++];
    }    
    conf.ids = strdup(ss.str().c_str());
  }
  if(optind!=argc) print_help_and_exit();
}

int main(int argc, char* argv[]) {
  parse_args(argc, argv);
  connect();
  //queue_status(conn, queue);
  //jobs_status(conn, queue);
  auto timer = system_clock::now();
  while(true)
  {
    timer = system_clock::now();
    jobs_status();
    auto after = system_clock::now();
    auto elapsed = duration_cast<milliseconds>(after - timer);
    cout << date::format("[%D %T]: ", after);
    cout << "job status took " << elapsed.count() << " ms" << endl;
    std::this_thread::sleep_for(elapsed > 0s ? conf.rep_time - elapsed : 0s);
  }
  return 0;
}

CMakeLists.txt

cmake_minimum_required(VERSION 3.20...3.28)

project(
  QStatPoC
  VERSION 0.1
  LANGUAGES CXX)

set(PBS_PATH /opt/pbs/lib/)
find_library(pbs_lib pbs PATHS ${PBS_PATH})
find_library(pbsdb_lib pbsdb PATHS ${PBS_PATH})
#find_library(pbsjson_lib pbsjson PATHS ${PBS_PATH})
find_library(auth_munge_lib auth_munge PATHS ${PBS_PATH})
find_library(licensing licensing PATHS ${PBS_PATH})

add_executable(qstat_poc main.cpp)
set_property(TARGET qstat_poc PROPERTY CXX_STANDARD 17)

target_include_directories(qstat_poc PRIVATE /opt/pbs/include/)

target_link_libraries(qstat_poc ${pbs_lib} ${pbsdb_lib} ${auth_munge_lib} ${licensing})

JHolba avatar May 06 '24 08:05 JHolba