starting gpu with thrust
This commit is contained in:
@@ -5,3 +5,5 @@ add_subdirectory(common)
|
||||
|
||||
add_subdirectory(cpu-slo)
|
||||
add_subdirectory(cpu-opt)
|
||||
|
||||
add_subdirectory(gpu-slo)
|
||||
|
||||
6
gpu-slo/CMakeLists.txt
Normal file
6
gpu-slo/CMakeLists.txt
Normal file
@@ -0,0 +1,6 @@
|
||||
cmake_minimum_required(VERSION 3.8 FATAL_ERROR)
|
||||
project(main LANGUAGES CXX CUDA)
|
||||
|
||||
add_executable(main main.cu)
|
||||
target_compile_features(main PUBLIC cxx_std_11)
|
||||
set_target_properties(main PROPERTIES CUDA_SEPARABLE_COMPILATION ON)
|
||||
8
gpu-slo/Makefile
Normal file
8
gpu-slo/Makefile
Normal file
@@ -0,0 +1,8 @@
|
||||
main : main.cu util.h
|
||||
nvcc -o main -std=c++11 main.cu
|
||||
|
||||
clean :
|
||||
rm main
|
||||
|
||||
run : main
|
||||
./main
|
||||
BIN
gpu-slo/main
Executable file
BIN
gpu-slo/main
Executable file
Binary file not shown.
178
gpu-slo/main.cu
Normal file
178
gpu-slo/main.cu
Normal file
@@ -0,0 +1,178 @@
|
||||
#include <thrust/device_vector.h>
|
||||
#include <thrust/host_vector.h>
|
||||
#include <thrust/for_each.h>
|
||||
|
||||
#include <vector>
|
||||
#include <iostream>
|
||||
|
||||
#include "util.h"
|
||||
|
||||
// struct Cosets {
|
||||
// int width;
|
||||
// thrust::device_vector<int> data{};
|
||||
//
|
||||
// __host__
|
||||
// Cosets(int ngens) : width(ngens) {
|
||||
// }
|
||||
//
|
||||
// void add_row() {
|
||||
// data.resize(data.size() + width, -1);
|
||||
// }
|
||||
//
|
||||
// thrust::host_vector<int> get_data() {
|
||||
// return data;
|
||||
// }
|
||||
// };
|
||||
|
||||
struct Row {
|
||||
int rel;
|
||||
|
||||
int l, r;
|
||||
|
||||
int from, to;
|
||||
|
||||
__host__ __device__
|
||||
Row() : rel(0), l(0), r(0), from(0), to(0) {}
|
||||
|
||||
__device__
|
||||
Row(int rel, int cos, int size) {
|
||||
l = 0;
|
||||
r = size - 1;
|
||||
|
||||
from = to = cos;
|
||||
|
||||
this->rel = rel;
|
||||
}
|
||||
};
|
||||
|
||||
std::ostream &operator<<(std::ostream &o, const Row &r) {
|
||||
return o << "Row[" << r.rel << "]{" << r.l << ":" << r.from << "-" << r.to << ":" << r.r << "}";
|
||||
}
|
||||
|
||||
struct Rel {
|
||||
int gens[2];
|
||||
int mul;
|
||||
};
|
||||
|
||||
struct Solver {
|
||||
int ngens;
|
||||
int *cosets;
|
||||
Rel *rels;
|
||||
|
||||
Solver(int ngens,
|
||||
thrust::device_vector<int> &cosets,
|
||||
thrust::device_vector<Rel> &rels)
|
||||
: ngens(ngens),
|
||||
cosets(thrust::raw_pointer_cast(cosets.data())),
|
||||
rels(thrust::raw_pointer_cast(rels.data())) {
|
||||
}
|
||||
|
||||
__device__
|
||||
void operator()(Row &r) {
|
||||
if (r.l + 1 >= r.r) return;
|
||||
|
||||
while ((r.r - r.l) > 0) {
|
||||
int gen = rels[r.rel].gens[r.l & 1];
|
||||
int next = cosets[r.from * ngens + gen];
|
||||
if (next < 0) break;
|
||||
r.l++;
|
||||
r.from = next;
|
||||
}
|
||||
|
||||
while ((r.r - r.l) > 0) {
|
||||
int gen = rels[r.rel].gens[r.r & 1];
|
||||
int next = cosets[r.to * ngens + gen];
|
||||
if (next < 0) break;
|
||||
r.r--;
|
||||
r.to = next;
|
||||
}
|
||||
|
||||
if (r.r - r.l == 0) {
|
||||
int gen = rels[r.rel].gens[r.l & 1];
|
||||
cosets[r.from * ngens + gen] = r.to;
|
||||
cosets[r.to * ngens + gen] = r.from;
|
||||
}
|
||||
}
|
||||
};
|
||||
|
||||
struct CosetInitializer {
|
||||
int *cosets;
|
||||
|
||||
CosetInitializer(thrust::device_vector<int> &cosets)
|
||||
: cosets(thrust::raw_pointer_cast(cosets.data())) {
|
||||
}
|
||||
|
||||
__device__
|
||||
void operator()(int gen) {
|
||||
cosets[gen] = 0;
|
||||
}
|
||||
};
|
||||
|
||||
struct RowGen {
|
||||
Rel *rels;
|
||||
|
||||
int coset;
|
||||
|
||||
RowGen(int coset, thrust::device_vector<Rel> &rels)
|
||||
: coset(coset),
|
||||
rels(thrust::raw_pointer_cast(rels.data())) {}
|
||||
|
||||
__device__
|
||||
Row operator()(int rel) {
|
||||
return Row(rel, coset, rels[rel].mul * 2);
|
||||
}
|
||||
};
|
||||
|
||||
thrust::device_vector<int> solve(
|
||||
int ngens,
|
||||
thrust::device_vector<int> subs,
|
||||
thrust::device_vector<Rel> rels) {
|
||||
|
||||
thrust::device_vector<int> cosets;
|
||||
cosets.resize(cosets.size() + ngens, -1);
|
||||
int lastCoset = 0;
|
||||
|
||||
thrust::for_each(subs.begin(), subs.end(),
|
||||
CosetInitializer(cosets));
|
||||
|
||||
thrust::device_vector<Row> rows;
|
||||
|
||||
thrust::counting_iterator<int> counter(0);
|
||||
|
||||
thrust::device_vector<Row> new_rows(rels.size());
|
||||
thrust::transform(counter, counter + rels.size(), new_rows.begin(),
|
||||
RowGen(lastCoset, rels));
|
||||
rows.insert(rows.begin(), new_rows.begin(), new_rows.end());
|
||||
|
||||
std::cout << rows << std::endl;
|
||||
|
||||
Solver solv(ngens, cosets, rels);
|
||||
|
||||
std::cout << thrust::host_vector<Row>(rows) << std::endl;
|
||||
thrust::for_each(rows.begin(), rows.end(), solv);
|
||||
std::cout << thrust::host_vector<Row>(rows) << std::endl;
|
||||
|
||||
return cosets;
|
||||
}
|
||||
|
||||
|
||||
int main(int argc, char* argv[]) {
|
||||
int ngens = 4;
|
||||
std::vector<Rel> rels = {
|
||||
{0, 1, 4},
|
||||
{1, 2, 3},
|
||||
{2, 3, 3},
|
||||
|
||||
{0, 2, 2},
|
||||
{1, 2, 2},
|
||||
{1, 3, 2},
|
||||
};
|
||||
std::vector<int> subs = {1, 3};
|
||||
|
||||
thrust::host_vector<int> cosets = solve(ngens, subs, rels);
|
||||
|
||||
std::cout << cosets << std::endl;
|
||||
|
||||
return 0;
|
||||
}
|
||||
|
||||
104
gpu-slo/tests.cu
Normal file
104
gpu-slo/tests.cu
Normal file
@@ -0,0 +1,104 @@
|
||||
//#include <cstdio>
|
||||
//#include <cstdlib>
|
||||
//
|
||||
//#include <thrust/host_vector.h>
|
||||
//#include <thrust/device_vector.h>
|
||||
//#include <thrust/sequence.h>
|
||||
//
|
||||
//#define N 50
|
||||
//
|
||||
//__global__
|
||||
//void vector_add(float* out, float* a, float* b, int n) {
|
||||
// for(int i = 0; i < n; i++){
|
||||
// out[i] = a[i] + b[i];
|
||||
// }
|
||||
//}
|
||||
//
|
||||
//int main(){
|
||||
// thrust::host_vector<float> a(N);
|
||||
// thrust::sequence(a.begin(), a.end());
|
||||
//
|
||||
// thrust::host_vector<float> b(N);
|
||||
// thrust::sequence(b.begin(), b.end());
|
||||
// thrust::reverse(b.begin(), b.end());
|
||||
//
|
||||
// for (int i = 0; i < N; ++i) {
|
||||
// printf("%.1f ", a[i]);
|
||||
// } printf("\n");
|
||||
//
|
||||
// for (int i = 0; i < N; ++i) {
|
||||
// printf("%.1f ", b[i]);
|
||||
// } printf("\n");
|
||||
//
|
||||
// thrust::device_vector<float> aD = a;
|
||||
// thrust::device_vector<float> bD = b;
|
||||
// thrust::device_vector<float> outD(N);
|
||||
//
|
||||
// vector_add<<<1, 1>>>(
|
||||
// thrust::raw_pointer_cast(&outD[0]),
|
||||
// thrust::raw_pointer_cast(&aD[0]),
|
||||
// thrust::raw_pointer_cast(&bD[0]),
|
||||
// N);
|
||||
//
|
||||
// thrust::host_vector<float> out = outD;
|
||||
//
|
||||
// for (int i = 0; i < N; ++i) {
|
||||
// printf("%.1f ", out[i]);
|
||||
// } printf("\n");
|
||||
//
|
||||
// return 0;
|
||||
//}
|
||||
|
||||
#include <cstdio>
|
||||
#include <cstdlib>
|
||||
#include <chrono>
|
||||
|
||||
#include <thrust/device_vector.h>
|
||||
#include <thrust/host_vector.h>
|
||||
#include <thrust/sequence.h>
|
||||
|
||||
void add_proc(int *c, int *a, int *b) {
|
||||
*c = *a + *b;
|
||||
}
|
||||
|
||||
void test_proc(){
|
||||
int a = 0;
|
||||
int b = 1;
|
||||
|
||||
auto start = std::chrono::system_clock::now();
|
||||
|
||||
for (int i = 0; i < 1000000; ++i) {
|
||||
add_proc(&a, &a, &b);
|
||||
}
|
||||
|
||||
auto end = std::chrono::system_clock::now();
|
||||
|
||||
std::chrono::duration<float, std::micro> diff = end - start;
|
||||
|
||||
printf("proc: %d: 1B in %.3f micro\n", a, diff.count());
|
||||
}
|
||||
|
||||
__global__
|
||||
void add_gpu(int *c, int *a, int *b) {
|
||||
*c = *a + *b;
|
||||
}
|
||||
|
||||
void test_gpu(){
|
||||
thrust::device_vector<int> vals(2, 0);
|
||||
vals[0] = 0;
|
||||
vals[1] = 1;
|
||||
printf(" gpu: %d: 1B in %.3f micro\n", vals[0], 0.0f);
|
||||
|
||||
int *a = thrust::raw_pointer_cast(&vals[0]);
|
||||
int *b = thrust::raw_pointer_cast(&vals[1]);
|
||||
|
||||
add_gpu<<<1, 1>>>(a, a, a);
|
||||
|
||||
printf(" gpu: %d: 1B in %.3f micro\n", vals[0], 0.0f);
|
||||
}
|
||||
|
||||
int main(int argc, char *argv[]) {
|
||||
test_proc();
|
||||
test_gpu();
|
||||
}
|
||||
|
||||
28
gpu-slo/util.h
Normal file
28
gpu-slo/util.h
Normal file
@@ -0,0 +1,28 @@
|
||||
#pragma once
|
||||
|
||||
#include <thrust/device_vector.h>
|
||||
#include <thrust/host_vector.h>
|
||||
|
||||
#include <iostream>
|
||||
|
||||
template<class T>
|
||||
std::ostream &operator<<(std::ostream &o, const thrust::host_vector<T> &vec) {
|
||||
if (vec.size() == 0 || vec.size() > 15)
|
||||
return o << "host_vector{size=" << vec.size() << "}";
|
||||
|
||||
o << "[";
|
||||
|
||||
for (int i = 0; i < vec.size() - 1; i++) o << vec[i] << ", ";
|
||||
|
||||
if (vec.size() > 0) o << vec[vec.size() - 1];
|
||||
|
||||
o << "]";
|
||||
|
||||
return o;
|
||||
}
|
||||
|
||||
template<class T>
|
||||
std::ostream &operator<<(std::ostream &o, const thrust::device_vector<T> &vec) {
|
||||
return o << "device_vector{size=" << vec.size() << "}";
|
||||
}
|
||||
|
||||
Reference in New Issue
Block a user