parlaylib icon indicating copy to clipboard operation
parlaylib copied to clipboard

Long compile time

Open WhateverLiu opened this issue 8 months ago • 3 comments

Is there a way to reduce the compile time when using Parlay? I know it's a template-heavy library, and there's no simple solution, but I'm curious if there's anything I might be missing. In larger projects, the long compile times can make iterations quite painful. For instance, even a simple code snippet of less than 150 lines takes 25 seconds to compile:

#include <iostream>
// #include "../code/cpp/infra/infra.hpp"
#include "parlay/primitives.hpp"
#define vec parlay::sequence

template <bool checked = false>
double computeJS (double p, double q) {
  if constexpr (!checked) {
    p = std::max(0.0, p);
    q = std::max(0.0, q);
  }
  double inv_half = 2.0 / (p + q);
  double left  = p == 0 ? 0 : p * std::log2(inv_half * p);
  double right = q == 0 ? 0 : q * std::log2(inv_half * q);
  return (left + right) * 0.5;
} 

template <typename Float = double>
struct Bin {
  Float p, dp, js, q;
  Bin() { p = dp = js = q = 0; }
  Bin(Float p_, Float dp_, Float q_) {
    p = p_; dp = dp_; q = q_;
    js = computeJS(p, q);
  } 
};

template <typename Float = double>
auto makeHists(auto && zbYear, auto && zbDim, 
               auto && gbid, 
               //auto && freq, 
               auto && subcandYears, // A vector of vectors
               int NsubYear,
               auto && targetYears,
               auto && dimw)
{ 
  auto inv_w = 1.0 / std::accumulate(dimw.begin(), dimw.end(), 0.0);
  for (auto && u: dimw) u *= inv_w;
  auto nyear = *parlay::max_element(zbYear) + 1;
  auto ndim = *parlay::max_element(zbDim) + 1;
  auto ngbid = *parlay::max_element(gbid) + 1;
  auto dimTotalFreq = parlay::reduce_by_index(parlay::depzip(
    parlay::slice(zbDim.begin(), zbDim.end()),
    parlay::delayed_tabulate(zbDim.size(), [](auto i)->unsigned { return 1; })),
    ndim);
  
  
  double targetScaleRatio = nyear / double(targetYears.size());
  double subScaleRatio =  nyear / double(NsubYear);
  auto rst = parlay::tabulate(subcandYears.size(), [ngbid](auto i)->auto { 
    return vec<Bin<Float>> (ngbid); });
  auto & rst0 = rst.front();
  
  
  auto whichIn = [](auto && x, auto && y, unsigned n)->auto {
    vec<bool> indi(n, false);
    parlay::for_each(y, [&](auto && u)->void { indi[u] = true; });
    return parlay::filter(parlay::iota(unsigned(x.size())), 
                   [&](auto i)->bool {return indi[x[i]]; });
  };
  auto targetYearsInd = whichIn(zbYear, targetYears, nyear);
  
  
  parlay::for_each(targetYearsInd, [&](auto i)->void {
    auto & b = rst0[gbid[i]];
    auto tfreq = dimTotalFreq[zbDim[i]];
    auto q = 1 * targetScaleRatio / tfreq * dimw[zbDim[i]];
    auto ptr = (std::atomic<Float>*)(&b.q);
    ptr->fetch_add(q);
  });
  
  auto subYearsInd = whichIn (zbYear, parlay::slice(
    subcandYears.front().begin(), subcandYears.front().end()), nyear);
  
  parlay::for_each(subYearsInd, [&](auto i)->void {
    auto & b = rst0[gbid[i]];
    auto ptr = (std::atomic<Float>*)(&b.p);
    auto tfreq = dimTotalFreq[zbDim[i]];
    auto p = 1 * subScaleRatio / tfreq * dimw[zbDim[i]];
    ptr->fetch_add(p);  
  }); 
  parlay::parFor(0, zbYear.size(), [&](size_t i)->void {
    auto & b = rst0[gbid[i]];
    auto tfreq = dimTotalFreq[zbDim[i]];
    b.dp = 1 * subScaleRatio / tfreq * dimw[zbDim[i]];
  }); 
  
  
  parlay::parFor(1, rst.size(), [&](int t)->void {
    auto & v = rst[t];
    auto subcat = parlay::slice(
      subcandYears[t].begin(), subcandYears[t].begin() + NsubYear);
    vec<bool> indi(nyear, false);
    for (auto && u: subcat) indi[u] = true;
    for (size_t i = 0, iend = zbYear.size(); i < iend; ++i) {
      if (!indi[zbYear[i]]) continue;
      auto & b = v[gbid[i]];
      auto tfreq = dimTotalFreq[zbDim[i]];
      b.p += 1 * subScaleRatio / tfreq * dimw[zbDim[i]];
    } 
    for (size_t i = 0, iend = rst0.size(); i < iend; ++i) {
      v[i].q = rst0[i].q;
      v[i].dp = rst0[i].dp;
    } 
  });
  
  
  parlay::for_each(rst, [&](auto & x)->void {
    for (auto & u: x) u.js = computeJS (u.p, u.q);
  }); 
  
  
  auto yearImpact = parlay::group_by_index(
    parlay::depzip(parlay::slice(zbYear.begin(), zbYear.end()), 
                  parlay::slice( gbid.begin(), gbid.end())), nyear); 
  return std::pair(std::move(yearImpact), std::move(rst));
}  

int main() {
  unsigned n = 1e6;
  auto zbYear = parlay::delayed_tabulate(n, [n](auto i)->auto {
    return unsigned(rand() % n);
  });
  auto rst = makeHists(
    parlay::delayed_tabulate(n, [n](auto i)->auto {return unsigned(rand() % n);}),
    parlay::delayed_tabulate(n, [n](auto i)->auto {return unsigned(rand() % n);}),
    parlay::delayed_tabulate(n, [n](auto i)->auto {return unsigned(rand() % n);}),
    parlay::delayed_tabulate(n, [n](auto i)->auto {
      return parlay::detab(1000, [n](auto i)->auto {
        return unsigned(rand() % 100);});
    }), 10000,
    parlay::delayed_tabulate(n, [n](auto i)->auto {return unsigned(rand() % n);}),
    parlay::delayed_tabulate(n, [n](auto i)->auto { return 0.1; }));
  std::cout << rst.first[0][0] << ", " << rst.second[0].size() << "\n";
}

#undef vec

The compile command is g++ -std=c++20 tempFiles/longcomptime.cpp -ftree-vectorize -march=native -O2 -pthread -o tempFiles/longcomptime.

Any suggestion would be greatly appreciated!

WhateverLiu avatar Apr 11 '25 00:04 WhateverLiu