starting gpu with thrust

This commit is contained in:
2019-12-10 02:04:36 -05:00
parent 64ee4e53f4
commit eea3c2d414
7 changed files with 326 additions and 0 deletions

View File

@@ -5,3 +5,5 @@ add_subdirectory(common)
add_subdirectory(cpu-slo)
add_subdirectory(cpu-opt)
add_subdirectory(gpu-slo)

6
gpu-slo/CMakeLists.txt Normal file
View File

@@ -0,0 +1,6 @@
cmake_minimum_required(VERSION 3.8 FATAL_ERROR)
project(main LANGUAGES CXX CUDA)
add_executable(main main.cu)
target_compile_features(main PUBLIC cxx_std_11)
set_target_properties(main PROPERTIES CUDA_SEPARABLE_COMPILATION ON)

8
gpu-slo/Makefile Normal file
View File

@@ -0,0 +1,8 @@
main : main.cu util.h
nvcc -o main -std=c++11 main.cu
clean :
rm main
run : main
./main

BIN
gpu-slo/main Executable file

Binary file not shown.

178
gpu-slo/main.cu Normal file
View File

@@ -0,0 +1,178 @@
#include <thrust/device_vector.h>
#include <thrust/host_vector.h>
#include <thrust/for_each.h>
#include <vector>
#include <iostream>
#include "util.h"
// struct Cosets {
// int width;
// thrust::device_vector<int> data{};
//
// __host__
// Cosets(int ngens) : width(ngens) {
// }
//
// void add_row() {
// data.resize(data.size() + width, -1);
// }
//
// thrust::host_vector<int> get_data() {
// return data;
// }
// };
struct Row {
int rel;
int l, r;
int from, to;
__host__ __device__
Row() : rel(0), l(0), r(0), from(0), to(0) {}
__device__
Row(int rel, int cos, int size) {
l = 0;
r = size - 1;
from = to = cos;
this->rel = rel;
}
};
std::ostream &operator<<(std::ostream &o, const Row &r) {
return o << "Row[" << r.rel << "]{" << r.l << ":" << r.from << "-" << r.to << ":" << r.r << "}";
}
struct Rel {
int gens[2];
int mul;
};
struct Solver {
int ngens;
int *cosets;
Rel *rels;
Solver(int ngens,
thrust::device_vector<int> &cosets,
thrust::device_vector<Rel> &rels)
: ngens(ngens),
cosets(thrust::raw_pointer_cast(cosets.data())),
rels(thrust::raw_pointer_cast(rels.data())) {
}
__device__
void operator()(Row &r) {
if (r.l + 1 >= r.r) return;
while ((r.r - r.l) > 0) {
int gen = rels[r.rel].gens[r.l & 1];
int next = cosets[r.from * ngens + gen];
if (next < 0) break;
r.l++;
r.from = next;
}
while ((r.r - r.l) > 0) {
int gen = rels[r.rel].gens[r.r & 1];
int next = cosets[r.to * ngens + gen];
if (next < 0) break;
r.r--;
r.to = next;
}
if (r.r - r.l == 0) {
int gen = rels[r.rel].gens[r.l & 1];
cosets[r.from * ngens + gen] = r.to;
cosets[r.to * ngens + gen] = r.from;
}
}
};
struct CosetInitializer {
int *cosets;
CosetInitializer(thrust::device_vector<int> &cosets)
: cosets(thrust::raw_pointer_cast(cosets.data())) {
}
__device__
void operator()(int gen) {
cosets[gen] = 0;
}
};
struct RowGen {
Rel *rels;
int coset;
RowGen(int coset, thrust::device_vector<Rel> &rels)
: coset(coset),
rels(thrust::raw_pointer_cast(rels.data())) {}
__device__
Row operator()(int rel) {
return Row(rel, coset, rels[rel].mul * 2);
}
};
thrust::device_vector<int> solve(
int ngens,
thrust::device_vector<int> subs,
thrust::device_vector<Rel> rels) {
thrust::device_vector<int> cosets;
cosets.resize(cosets.size() + ngens, -1);
int lastCoset = 0;
thrust::for_each(subs.begin(), subs.end(),
CosetInitializer(cosets));
thrust::device_vector<Row> rows;
thrust::counting_iterator<int> counter(0);
thrust::device_vector<Row> new_rows(rels.size());
thrust::transform(counter, counter + rels.size(), new_rows.begin(),
RowGen(lastCoset, rels));
rows.insert(rows.begin(), new_rows.begin(), new_rows.end());
std::cout << rows << std::endl;
Solver solv(ngens, cosets, rels);
std::cout << thrust::host_vector<Row>(rows) << std::endl;
thrust::for_each(rows.begin(), rows.end(), solv);
std::cout << thrust::host_vector<Row>(rows) << std::endl;
return cosets;
}
int main(int argc, char* argv[]) {
int ngens = 4;
std::vector<Rel> rels = {
{0, 1, 4},
{1, 2, 3},
{2, 3, 3},
{0, 2, 2},
{1, 2, 2},
{1, 3, 2},
};
std::vector<int> subs = {1, 3};
thrust::host_vector<int> cosets = solve(ngens, subs, rels);
std::cout << cosets << std::endl;
return 0;
}

104
gpu-slo/tests.cu Normal file
View File

@@ -0,0 +1,104 @@
//#include <cstdio>
//#include <cstdlib>
//
//#include <thrust/host_vector.h>
//#include <thrust/device_vector.h>
//#include <thrust/sequence.h>
//
//#define N 50
//
//__global__
//void vector_add(float* out, float* a, float* b, int n) {
// for(int i = 0; i < n; i++){
// out[i] = a[i] + b[i];
// }
//}
//
//int main(){
// thrust::host_vector<float> a(N);
// thrust::sequence(a.begin(), a.end());
//
// thrust::host_vector<float> b(N);
// thrust::sequence(b.begin(), b.end());
// thrust::reverse(b.begin(), b.end());
//
// for (int i = 0; i < N; ++i) {
// printf("%.1f ", a[i]);
// } printf("\n");
//
// for (int i = 0; i < N; ++i) {
// printf("%.1f ", b[i]);
// } printf("\n");
//
// thrust::device_vector<float> aD = a;
// thrust::device_vector<float> bD = b;
// thrust::device_vector<float> outD(N);
//
// vector_add<<<1, 1>>>(
// thrust::raw_pointer_cast(&outD[0]),
// thrust::raw_pointer_cast(&aD[0]),
// thrust::raw_pointer_cast(&bD[0]),
// N);
//
// thrust::host_vector<float> out = outD;
//
// for (int i = 0; i < N; ++i) {
// printf("%.1f ", out[i]);
// } printf("\n");
//
// return 0;
//}
#include <cstdio>
#include <cstdlib>
#include <chrono>
#include <thrust/device_vector.h>
#include <thrust/host_vector.h>
#include <thrust/sequence.h>
void add_proc(int *c, int *a, int *b) {
*c = *a + *b;
}
void test_proc(){
int a = 0;
int b = 1;
auto start = std::chrono::system_clock::now();
for (int i = 0; i < 1000000; ++i) {
add_proc(&a, &a, &b);
}
auto end = std::chrono::system_clock::now();
std::chrono::duration<float, std::micro> diff = end - start;
printf("proc: %d: 1B in %.3f micro\n", a, diff.count());
}
__global__
void add_gpu(int *c, int *a, int *b) {
*c = *a + *b;
}
void test_gpu(){
thrust::device_vector<int> vals(2, 0);
vals[0] = 0;
vals[1] = 1;
printf(" gpu: %d: 1B in %.3f micro\n", vals[0], 0.0f);
int *a = thrust::raw_pointer_cast(&vals[0]);
int *b = thrust::raw_pointer_cast(&vals[1]);
add_gpu<<<1, 1>>>(a, a, a);
printf(" gpu: %d: 1B in %.3f micro\n", vals[0], 0.0f);
}
int main(int argc, char *argv[]) {
test_proc();
test_gpu();
}

28
gpu-slo/util.h Normal file
View File

@@ -0,0 +1,28 @@
#pragma once
#include <thrust/device_vector.h>
#include <thrust/host_vector.h>
#include <iostream>
template<class T>
std::ostream &operator<<(std::ostream &o, const thrust::host_vector<T> &vec) {
if (vec.size() == 0 || vec.size() > 15)
return o << "host_vector{size=" << vec.size() << "}";
o << "[";
for (int i = 0; i < vec.size() - 1; i++) o << vec[i] << ", ";
if (vec.size() > 0) o << vec[vec.size() - 1];
o << "]";
return o;
}
template<class T>
std::ostream &operator<<(std::ostream &o, const thrust::device_vector<T> &vec) {
return o << "device_vector{size=" << vec.size() << "}";
}