add optimizations

This commit is contained in:
2019-12-10 20:38:23 -05:00
parent 92381db8ac
commit 2e24bb6bb2
3 changed files with 28 additions and 28 deletions

View File

@@ -1,5 +1,5 @@
main : main.cu util.h main : main.cu util.h
nvcc -o main -std=c++11 main.cu nvcc -o main -std=c++11 -O3 main.cu
clean : clean :
rm main rm main

View File

@@ -9,6 +9,8 @@
#include "util.h" #include "util.h"
#include "groups.h" #include "groups.h"
__constant__ Rel rels[128];
struct Row { struct Row {
int rel; int rel;
@@ -38,15 +40,12 @@ std::ostream &operator<<(std::ostream &o, const Row &r) {
struct Solver { struct Solver {
int ngens; int ngens;
int *cosets; int *cosets;
Rel *rels;
Solver(int ngens, Solver(int ngens,
thrust::device_vector<int> &cosets, thrust::device_vector<int> &cosets)
thrust::device_vector<Rel> &rels)
: ngens(ngens), : ngens(ngens),
cosets(thrust::raw_pointer_cast(cosets.data())), cosets(thrust::raw_pointer_cast(cosets.data())) {
rels(thrust::raw_pointer_cast(rels.data())) { }
}
__device__ __device__
void operator()(Row &r) { void operator()(Row &r) {
@@ -95,13 +94,11 @@ struct CosetInitializer {
// this creates rows for cosets by index of each relation table // this creates rows for cosets by index of each relation table
struct RowGen { struct RowGen {
Rel *rels;
int coset; int coset;
RowGen(int coset, thrust::device_vector<Rel> &rels) RowGen(int coset)
: coset(coset), : coset(coset) {
rels(thrust::raw_pointer_cast(rels.data())) {} }
__device__ __device__
Row operator()(int rel) { Row operator()(int rel) {
@@ -130,7 +127,8 @@ bool add_coset(
int ngens, int ngens,
int *coset, int *coset,
int *hint, int *hint,
thrust::device_vector<int> &cosets) { thrust::device_vector<int> &dcosets) {
thrust::host_vector<int> cosets = dcosets;
*coset = cosets.size() / ngens; *coset = cosets.size() / ngens;
// todo: this part especially. // todo: this part especially.
@@ -142,10 +140,10 @@ bool add_coset(
int from = *hint / ngens; int from = *hint / ngens;
int gen = *hint % ngens; int gen = *hint % ngens;
add_row(ngens, cosets); add_row(ngens, dcosets);
cosets[*hint] = *coset; dcosets[*hint] = *coset;
cosets[*coset * ngens + gen] = from; dcosets[*coset * ngens + gen] = from;
return false; return false;
} }
@@ -153,23 +151,23 @@ bool add_coset(
// add a row for each relation table for some coset // add a row for each relation table for some coset
void gen_rows( void gen_rows(
int coset, int coset,
thrust::device_vector<Rel> &rels, int nrels,
thrust::device_vector<Row> &rows) { thrust::device_vector<Row> &rows) {
rows.resize(rows.size() + rels.size()); rows.resize(rows.size() + nrels);
thrust::counting_iterator<int> counter(0); thrust::counting_iterator<int> counter(0);
thrust::transform( thrust::transform(
thrust::device, thrust::device,
counter, counter + rels.size(), counter, counter + nrels,
rows.end() - rels.size(), rows.end() - nrels,
RowGen(coset, rels)); RowGen(coset));
} }
// do everything. data is implicitly passed to the device via device_vector. // do everything. data is implicitly passed to the device via device_vector.
thrust::device_vector<int> solve( thrust::device_vector<int> solve(
int ngens, int ngens,
thrust::device_vector<int> subs, int nrels,
thrust::device_vector<Rel> rels) { thrust::device_vector<int> subs) {
thrust::device_vector<int> cosets; thrust::device_vector<int> cosets;
thrust::device_vector<Row> rows; thrust::device_vector<Row> rows;
@@ -182,7 +180,7 @@ thrust::device_vector<int> solve(
CosetInitializer(cosets)); CosetInitializer(cosets));
// generate initial relation table rows for coset 0 // generate initial relation table rows for coset 0
gen_rows(0, rels, rows); gen_rows(0, nrels, rows);
// these keep track of what progress has been made // these keep track of what progress has been made
int coset = 0; int coset = 0;
@@ -191,7 +189,7 @@ thrust::device_vector<int> solve(
// will break out later // will break out later
while (true) { while (true) {
// create a solver and apply it until nothing is being learned // create a solver and apply it until nothing is being learned
Solver solve(ngens, cosets, rels); Solver solve(ngens, cosets);
thrust::for_each( thrust::for_each(
thrust::device, thrust::device,
rows.begin(), rows.end(), rows.begin(), rows.end(),
@@ -205,7 +203,7 @@ thrust::device_vector<int> solve(
if (done) break; if (done) break;
// generate relation table rows for new coset // generate relation table rows for new coset
gen_rows(coset, rels, rows); gen_rows(coset, nrels, rows);
// move completed rows to the end of the list and remove. // move completed rows to the end of the list and remove.
auto cut = thrust::partition( auto cut = thrust::partition(
@@ -224,8 +222,10 @@ int main(int argc, const char* argv[]) {
cox = proc_args(argc, argv); cox = proc_args(argc, argv);
std::vector<int> subs = {}; std::vector<int> subs = {};
cudaMemcpyToSymbol(rels, cox.rels.data(), cox.rels.size() * sizeof(Rel));
auto s = std::chrono::system_clock::now(); auto s = std::chrono::system_clock::now();
thrust::host_vector<int> cosets = solve(cox.ngens, subs, cox.rels); thrust::host_vector<int> cosets = solve(cox.ngens, cox.rels.size(), subs);
auto e = std::chrono::system_clock::now(); auto e = std::chrono::system_clock::now();
std::chrono::duration<float> diff = e - s; std::chrono::duration<float> diff = e - s;

View File

@@ -1,5 +1,5 @@
main : main.cu util.h main : main.cu util.h
nvcc -o main -std=c++11 main.cu nvcc -o main -std=c++11 -O3 main.cu
clean : clean :
rm main rm main