1. Write a parallel program that computes the sum of all elements in a matrix. Use a binary tree for computing the sum. The program shall use a number of threads specified as input.
#include <iostream>
#include <fstream>
#include <vector>
#include <thread>
#include <mutex>
using namespace std;
int t, n, m;
vector<vector<int>> matrix;
int sum = 0;
mutex mtx;
void partial_matrix_sum(int row, int col, int size) {
if (size == 1) {
unique_lock<mutex> lck(mtx);
sum += matrix[row][col];
return;
}
int l_size = size / 2 + size % 2;
int l_row = row;
int l_col = col;
int r_size = size / 2;
int r_row = row + (l_size / m);
int r_col = col + (l_size % m);
if (r_col >= m) r_row++, r_col = r_col % m;
partial_matrix_sum(l_row, l_col, l_size);
partial_matrix_sum(r_row, r_col, r_size);
}
void parallel_matrix_sum(int row, int col, int size, int t) {
if (size == 1) {
unique_lock<mutex> lck(mtx);
sum += matrix[row][col];
return;
}
if (t == 0)
partial_matrix_sum(row, col, size);
else {
int l_size = size / 2 + size % 2;
int l_row = row;
int l_col = col;
int r_size = size / 2;
int r_row = row + (l_size / m);
int r_col = col + (l_size % m);
if (r_col >= m) r_row++, r_col = r_col % m;
if (t == 1) {
thread th(parallel_matrix_sum, l_row, l_col, l_size, 0);
partial_matrix_sum(r_row, r_col, r_size);
th.join();
} else {
thread l_th(parallel_matrix_sum, l_row, l_col, l_size, t / 2 + t % 2 - 1);
thread r_th(parallel_matrix_sum, r_row, r_col, r_size, t / 2 - 1);
l_th.join();
r_th.join();
}
}
}
int main() {
ifstream fin("input.in");
fin >> t;
fin >> n >> m;
matrix.resize(n, vector<int>(m));
for (int i = 0; i < n; ++i) {
for (int j = 0; j < m; ++j) {
fin >> matrix[i][j];
}
}
if (t == 0) partial_matrix_sum(0, 0, n * m);
else {
thread th(parallel_matrix_sum, 0, 0, n * m, t - 1);
th.join();
}
cout << sum;
}
2. Write a parallel program that computes the n-th power of a square matrix. Use a binary tree for computing the result. The program shall use a number of threads specified as input.
#include <iostream>
#include <fstream>
#include <vector>
#include <thread>
#include <mutex>
#include <cmath>
using namespace std;
mutex mtx;
vector<vector<int>> matrix_multiply(const vector<vector<int>> &a, const vector<vector<int>> &b) {
int n = a.size();
int m = b[0].size();
int p = b.size();
vector<vector<int>> result(n, vector<int>(m, 0));
for (int i = 0; i < n; i++) {
for (int j = 0; j < m; j++) {
for (int k = 0; k < p; k++) {
result[i][j] += a[i][k] * b[k][j];
}
}
}
return result;
}
vector<vector<int>> matrix_power_recursive(const vector<vector<int>> &matrix, int n) {
int size = matrix.size();
if (n == 0) {
vector<vector<int>> result(size, vector<int>(size, 0));
for (int i = 0; i < size; i++) {
result[i][i] = 1;
}
return result;
}
if (n % 2 == 0) {
vector<vector<int>> half_power = matrix_power_recursive(matrix, n / 2);
return matrix_multiply(half_power, half_power);
} else {
vector<vector<int>> half_power = matrix_power_recursive(matrix, (n - 1) / 2);
vector<vector<int>> temp = matrix_multiply(half_power, half_power);
return matrix_multiply(temp, matrix);
}
}
vector<vector<int>> matrix_power(const vector<vector<int>> &matrix, int n, int t) {
int size = matrix.size();
vector<vector<int>> result(size, vector<int>(size, 0));
if (t <= 0) {
result = matrix_power_recursive(matrix, n);
} else {
vector<thread> threads(t);
for (int i = 0; i < t; i++) {
threads[i] = thread([&result, &matrix, size, n, i, t]() {
for (int k = i; k < n; k += t) {
vector<vector<int>> temp_result = matrix_power_recursive(matrix, k + 1);
mtx.lock();
for (int j = 0; j < size; j++) {
for (int l = 0; l < size; l++) {
result[j][l] += temp_result[j][l];
}
}
mtx.unlock();
}
});
}
for (int i = 0; i < t; i++) {
threads[i].join();
}
}
return result;
}
int main() {
ifstream fin("input.in");
int t, n, exponent;
vector<vector<int>> matrix;
fin >> t;
fin >> n;
matrix.resize(n, vector<int>(n));
for (int i = 0; i < n; ++i) {
for (int j = 0; j < n; ++j) {
fin >> matrix[i][j];
}
}
fin >> exponent;
fin.close();
vector<vector<int>> result = matrix_power(matrix, exponent, t);
for (int i = 0; i < n; i++) {
for (int j = 0; j < n; j++) {
cout << result[i][j] << " ";
}
cout << endl;
}
return 0;
}
3. Write a parallel program that computes the scalar product of 2 vectors of the same length. Use a binary tree for computing the sum. The program shall use a number of threads specified as input.
#include <iostream>
#include <fstream>
#include <vector>
#include <thread>
#include <mutex>
using namespace std;
int t, n;
vector<int> a, b;
int prod = 0;
mutex mtx;
void compute_scalar_product(int start, int end, int &result) {
int partial_prod = 0;
for (int i = start; i < end; ++i) {
partial_prod += a[i] * b[i];
}
lock_guard<mutex> lock(mtx);
result = partial_prod;
}
void parallel_scalar_product(int start, int end, int &result, int thread_count) {
if (thread_count == 1) {
compute_scalar_product(start, end, result);
} else {
int mid = (start + end) / 2;
int left_result = 0;
int right_result = 0;
thread left_thread(parallel_scalar_product, start, mid, ref(left_result), thread_count / 2);
thread right_thread(parallel_scalar_product, mid, end, ref(right_result), thread_count / 2);
left_thread.join();
right_thread.join();
lock_guard<mutex> lock(mtx);
result = left_result + right_result;
}
}
int main() {
ifstream fin("input.in");
fin >> t;
fin >> n;
a.resize(n);
b.resize(n);
for (int i = 0; i < n; i++) {
fin >> a[i];
}
for (int i = 0; i < n; i++) {
fin >> b[i];
}
if (t == 0) {
compute_scalar_product(0, n, prod);
} else {
parallel_scalar_product(0, n, prod, t);
}
cout << prod << endl;
}
4. Write a parallel program that computes the product of 2 matrices. The program shall use a number of threads specified as input.
#include <iostream>
#include <fstream>
#include <vector>
#include <thread>
#include <mutex>
using namespace std;
mutex mtx;
vector<vector<int>> matrix_multiply(const vector<vector<int>> &a, const vector<vector<int>> &b) {
int n = a.size();
int m = b[0].size();
int p = b.size();
vector<vector<int>> result(n, vector<int>(m, 0));
for (int i = 0; i < n; i++) {
for (int j = 0; j < m; j++) {
for (int k = 0; k < p; k++) {
result[i][j] += a[i][k] * b[k][j];
}
}
}
return result;
}
vector<vector<int>> matrix_product_recursive(const vector<vector<int>> &matrixA, const vector<vector<int>> &matrixB) {
int size = matrixA.size();
if (size == 1) {
vector<vector<int>> result(1, vector<int>(1, 0));
result[0][0] = matrixA[0][0] * matrixB[0][0];
return result;
}
int half_size = size / 2;
vector<vector<int>> A11(half_size, vector<int>(half_size));
vector<vector<int>> A12(half_size, vector<int>(half_size));
vector<vector<int>> A21(half_size, vector<int>(half_size));
vector<vector<int>> A22(half_size, vector<int>(half_size));
vector<vector<int>> B11(half_size, vector<int>(half_size));
vector<vector<int>> B12(half_size, vector<int>(half_size));
vector<vector<int>> B21(half_size, vector<int>(half_size));
vector<vector<int>> B22(half_size, vector<int>(half_size));
for (int i = 0; i < half_size; i++) {
for (int j = 0; j < half_size; j++) {
A11[i][j] = matrixA[i][j];
A12[i][j] = matrixA[i][j + half_size];
A21[i][j] = matrixA[i + half_size][j];
A22[i][j] = matrixA[i + half_size][j + half_size];
B11[i][j] = matrixB[i][j];
B12[i][j] = matrixB[i][j + half_size];
B21[i][j] = matrixB[i + half_size][j];
B22[i][j] = matrixB[i + half_size][j + half_size];
}
}
vector<vector<int>> P1 = matrix_product_recursive(A11, B11);
vector<vector<int>> P2 = matrix_product_recursive(A12, B21);
vector<vector<int>> P3 = matrix_product_recursive(A11, B12);
vector<vector<int>> P4 = matrix_product_recursive(A12, B22);
vector<vector<int>> P5 = matrix_product_recursive(A21, B11);
vector<vector<int>> P6 = matrix_product_recursive(A22, B21);
vector<vector<int>> P7 = matrix_product_recursive(A21, B12);
vector<vector<int>> P8 = matrix_product_recursive(A22, B22);
vector<vector<int>> C11(half_size, vector<int>(half_size));
vector<vector<int>> C12(half_size, vector<int>(half_size));
vector<vector<int>> C21(half_size, vector<int>(half_size));
vector<vector<int>> C22(half_size, vector<int>(half_size));
for (int i = 0; i < half_size; i++) {
for (int j = 0; j < half_size; j++) {
C11[i][j] = P1[i][j] + P2[i][j];
C12[i][j] = P3[i][j] + P4[i][j];
C21[i][j] = P5[i][j] + P6[i][j];
C22[i][j] = P7[i][j] + P8[i][j];
}
}
vector<vector<int>> result(size, vector<int>(size));
for (int i = 0; i < half_size; i++) {
for (int j = 0; j < half_size; j++) {
result[i][j] = C11[i][j];
result[i][j + half_size] = C12[i][j];
result[i + half_size][j] = C21[i][j];
result[i + half_size][j + half_size] = C22[i][j];
}
}
return result;
}
vector<vector<int>> matrix_product(const vector<vector<int>> &matrixA, const vector<vector<int>> &matrixB, int t) {
int size = matrixA.size();
vector<vector<int>> result(size, vector<int>(size, 0));
if (t <= 0) {
result = matrix_multiply(matrixA, matrixB);
} else {
vector<thread> threads(t);
for (int i = 0; i < t; i++) {
threads[i] = thread([&result, &matrixA, &matrixB, size, i, t]() {
for (int k = i; k < size; k += t) {
for (int j = 0; j < size; j++) {
for (int l = 0; l < size; l++) {
lock_guard<mutex> lock(mtx);
result[k][j] += matrixA[k][l] * matrixB[l][j];
}
}
}
});
}
for (int i = 0; i < t; i++) {
threads[i].join();
}
}
return result;
}
int main() {
ifstream fin("input.in");
int t, n;
vector<vector<int>> matrixA, matrixB;
fin >> t;
fin >> n;
matrixA.resize(n, vector<int>(n));
matrixB.resize(n, vector<int>(n));
for (int i = 0; i < n; ++i) {
for (int j = 0; j < n; ++j) {
fin >> matrixA[i][j];
}
}
for (int i = 0; i < n; ++i) {
for (int j = 0; j < n; ++j) {
fin >> matrixB[i][j];
}
}
fin.close();
vector<vector<int>> result = matrix_product(matrixA, matrixB, t);
for (int i = 0; i < n; i++) {
for (int j = 0; j < n; j++) {
cout << result[i][j] << " ";
}
cout << endl;
}
return 0;
}
#include <iostream>
#include <fstream>
#include <vector>
#include <thread>
#include <mutex>
#include <queue>
#include <condition_variable>
using namespace std;
mutex mtx;
condition_variable cv;
void multiply_matrices(const vector<vector<int>>& a, const vector<vector<int>>& b, vector<vector<int>>& c, int start, int end) {
int n = a.size();
int m = a[0].size();
int p = b[0].size();
for (int i = start; i < end; ++i) {
for (int j = 0; j < n; ++j) {
for (int k = 0; k < m; ++k) {
c[i][j] += a[i][k] * b[k][j];
}
}
}
cv.notify_one();
}
vector<vector<int>> matrix_multiply(const vector<vector<int>> &a, const vector<vector<int>> &b, int t) {
int n = a.size();
vector<vector<int>> c(n, vector<int>(n, 0));
if (t <= 0) {
lock_guard<mutex> lock(mtx);
multiply_matrices(a, b, c, 0, n);
} else {
vector<thread> threads;
queue<int> task_queue;
for (int i = 0; i < t; ++i) {
int start = (i * n) / t;
int end = ((i + 1) * n) / t;
task_queue.push(start);
task_queue.push(end);
}
for (int i = 0; i < t; ++i) {
threads.push_back(thread([&a, &b, &c, &task_queue]() {
while (true) {
int start, end;
{
unique_lock<mutex> lock(mtx);
if (task_queue.empty()) {
break;
}
start = task_queue.front();
task_queue.pop();
end = task_queue.front();
task_queue.pop();
}
multiply_matrices(a, b, c, start, end);
}
}));
}
for (int i = 0; i < t; ++i) {
threads[i].join();
}
}
return c;
}
int main() {
ifstream fin("input.in");
int t;
fin >> t;
int n;
fin >> n;
vector<vector<int>> a(n, vector<int>(n));
vector<vector<int>> b(n, vector<int>(n));
for (int i = 0; i < n; ++i) {
for (int j = 0; j < n; ++j) {
fin >> a[i][j];
}
}
for (int i = 0; i < n; ++i) {
for (int j = 0; j < n; ++j) {
fin >> b[i][j];
}
}
fin.close();
vector<vector<int>> result = matrix_multiply(a, b, t);
for (int i = 0; i < n; ++i) {
for (int j = 0; j < n; ++j) {
cout << result[i][j] << ' ';
}
cout << '\\n';
}
return 0;
}
5. Write a parallel program that computes the product of two big numbers. The program shall use a number of threads specified as input.
#include <iostream>
#include <fstream>
#include <vector>
#include <thread>
#include <mutex>
using namespace std;
mutex mtx;
vector<int> solve(vector<int> a, vector<int> b, int t) {
int n = a.size();
int m = 2 * n - 1;
vector<int> c;
c.resize(m, 0);
if (t <= 0) {
unique_lock<mutex> lock(mtx);
for (int x = 0; x < m; x++) {
for (int i = 0; i < n; ++i) {
if (x - i >= n || x - i < 0) {
continue;
}
c[x] += a[i] * b[x - i];
}
}
} else {
vector<thread> thr;
for (int idx = 0; idx < t; ++idx) {
thr.push_back(thread([&, idx]() {
for (int x = idx; x < m; x += t) {
for (int i = 0; i < n; ++i) {
if (x - i >= n || x - i < 0) {
continue;
}
unique_lock<mutex> lock(mtx);
c[x] += a[i] * b[x - i];
}
}
}));
}
for (int i = 0; i < thr.size(); ++i) {
thr[i].join();
}
}
return c;
}
int main() {
ifstream fin("input.in");
int t;
fin >> t;
int n;
fin >> n;
vector<int> a, b;
for (int i = 0; i < n; ++i) {
int x;
fin >> x;
a.push_back(x);
}
for (int i = 0; i < n; ++i) {
int x;
fin >> x;
b.push_back(x);
}
fin.close();
vector<int> res = solve(a, b, t);
for (auto it : res) {
cout << it;
}
return 0;
}
6. Write a parallel program that computes the prime numbers up to N. It is assumed to have the list of primes up to √N, and will check each of the other numbers if it is divisible with a number from the initial list.
#include <iostream>
#include <fstream>
#include <vector>
#include <thread>
#include <cmath>
#include <mutex>
using namespace std;
mutex mtx;
bool is_prime(int num, const vector<int> &primes) {
int sqrt_num = sqrt(num);
for (int prime : primes) {
if (prime > sqrt_num)
break;
if (num % prime == 0)
return false;
}
return true;
}
void find_primes(int start, int end, const vector<int> &initial_primes, vector<int> &result) {
for (int i = start; i <= end; ++i) {
if (is_prime(i, initial_primes)) {
lock_guard<mutex> lock(mtx);
result.push_back(i);
}
}
}
int main() {
ifstream fin("input.in");
int t;
fin >> t;
int n;
fin >> n;
int sqrt_n = sqrt(n);
vector<int> initial_primes;
vector<int> primes;
primes.push_back(2);
initial_primes.push_back(2);
for (int i = 3; i <= sqrt_n; i += 2) {
if (is_prime(i, initial_primes)) {
initial_primes.push_back(i);
primes.push_back(i);
}
}
vector<thread> threads;
vector<vector<int>> thread_results(t);
for (int i = 0; i < t; ++i) {
int start = sqrt_n + 1 + (i * (n - sqrt_n)) / t;
int end = sqrt_n + 1 + ((i + 1) * (n - sqrt_n)) / t - 1;
threads.push_back(thread(find_primes, start, end, ref(initial_primes), ref(thread_results[i])));
}
for (auto& thread : threads) {
thread.join();
}
for (int i = 0; i < t; ++i) {
primes.insert(primes.end(), thread_results[i].begin(), thread_results[i].end());
}
for (int prime : primes) {
cout << prime << " ";
}
cout << endl;
return 0;
}
7. Write a parallel that computes the discrete convolution of a vector with another vector. The convolution is defined as: r(i) = sum(a(i) * b(i - j), j = 0..n)
#include <iostream>
#include <vector>
#include <thread>
using namespace std;
inline void solve(vector <int> a, vector <int> b, int T) {
vector <thread> threads;
int n = a.size();
vector <int> sol(n, 0);
for(int idx = 0; idx < T; ++ idx) {
threads.push_back(thread([a, b, idx, n, &sol, T](){
for(int i = idx; i < n; i += T) {
for(int j = 0; j < n; ++ j) {
sol[i] += a[j] * b[(i - j + n) % n];
}
}
}));
}
for(int i = 0; i < threads.size(); ++ i) {
threads[i].join();
}
for(auto it : sol) {
cerr << it << '\\n';
}
}
int main() {
solve({1, 2, 3}, {1, 2, 3}, 3);
// r[0] = a[0] * b[0] + a[1] * b[2] + a[2] * b[1] = 1 * 1 + 2 * 3 + 3 * 2 = 1 + 6 + 6 = 13
// r[1] = a[0] * b[1] + a[1] * b[0] + a[2] * b[2] = 1 * 2 + 2 * 1 + 3 * 3 = 2 + 2 + 9 = 13
// r[2] = a[0] * b[2] + a[1] * b[1] + a[2] * b[0] = 1 * 3 + 2 * 2 + 3 * 1 = 3 + 4 + 3 = 10
}
8. Write a parallel program that counts the number of permutations of N
that satisfy a given property. You have a function (bool pred(vector <int> const& v)
) that verifies if a given permutation satisfies the property. Your program shall call that function once for each permutation and count the number of times it returns true.
#include <iostream>
#include <thread>
#include <vector>
#include <atomic>
using namespace std;
bool check(vector <int> v) {
if(v[0] % 2 == 0) {
return true;
}
return false;
}
bool contains(vector <int> v, int n) {
for(auto it : v) {
if(it == n) {
return true;
}
}
return false;
}
atomic <int> cnt;
void back(vector <int> sol, int T, int n) {
if(sol.size() == n) {
if(check(sol)) {
cnt ++;
}
return;
}
if(T == 1) {
for (int i = 1; i <= n; ++ i) {
if(contains(sol, i)) continue;
sol.push_back(i);
back(sol, T, n);
sol.pop_back();
}
} else {
vector <int> x(sol);
thread t([&](){
for(int i = 1; i <= n; i += 2) {
if(contains(x, i)) continue;
x.push_back(i);
back(x, T / 2, n);
x.pop_back();
}
});
for(int i = 2; i <= n; i += 2) {
if(contains(sol, i)) continue;
sol.push_back(i);
back(sol, T - T / 2, n);
sol.pop_back();
}
t.join();
}
}
int main() {
back(vector <int>(), 2, 3);
cout << cnt.load() << '\\n';
}
9. Write a parallel program that counts the number of subsets of k
out of N
that satisfy a given property. You have a function (bool pred(vector <int> const& v)
) that verifies if a given subset satisfies the property. Your program shall call that function once for each subset of k
elements and count the number of times it returns true.
#include <iostream>
#include <fstream>
#include <vector>
#include <thread>
#include <atomic>
using namespace std;
atomic <int> cnt;
inline bool check(vector <int> const &v) {
if(v.size() == 0) {
return false;
}
return v[0] % 2 == 0;
}
inline void generate(vector <int> v, int k, int n, int T) {
if(v.size() == k) {
for(auto it : v) {
cerr << it << ' ';
}
cerr << '\\n';
if(check(v)) {
cnt ++;
}
return ;
}
int lst = 0;
if(v.size() > 0) {
lst = v.back();
}
if(T == 1) {
for(int i = lst + 1; i <= n; ++ i) {
v.push_back(i);
generate(v, k, n, T);
v.pop_back();
}
} else {
thread t([&]() {
vector <int> newv(v);
for(int i = lst + 1; i <= n; i += 2) {
newv.push_back(i);
generate(newv, k, n, T / 2);
newv.pop_back();
}
});
vector <int> aux(v);
for(int i = lst + 2; i <= n; i += 2) {
aux.push_back(i);
generate(aux, k, n, T - T / 2);
aux.pop_back();
}
t.join();
}
}
int main() {
generate(vector <int> (), 3, 5, 2);
cout << cnt << '\\n';
}