1. Write a parallel program that computes the sum of all elements in a matrix. Use a binary tree for computing the sum.

#include <iostream>
#include <fstream>
#include <vector>
#include <thread>
#include <mutex>

using namespace std;

typedef vector<vector<int>> mat;
mutex mtx;

int compute_sum(const mat &matrix, int start_row, int end_row, int start_col, int end_col) {
    int sum = 0;
    for (int i = start_row; i < end_row; ++i) {
        for (int j = start_col; j < end_col; ++j) {
            sum += matrix[i][j];
        }
    }
    return sum;
}

void calculate_tree_sum(const mat &matrix, int start_row, int end_row, int start_col, int end_col, int &result) {
    if (start_row == end_row) {
        result = compute_sum(matrix, start_row, end_row, start_col, end_col);
        return;
    }

    if (t == 1) {
        result = compute_sum(matrix, start_row, end_row, start_col, end_col);
        return;
    }

    int mid_row = (start_row + end_row) / 2;
    int mid_col = (start_col + end_col) / 2;

    vector<thread> threads(4);
    vector<int> sub_results(4);

    threads[0] = thread([&]() { calculate_tree_sum(matrix, start_row, mid_row, start_col, mid_col, sub_results[0]); });
    threads[1] = thread([&]() { calculate_tree_sum(matrix, start_row, mid_row, mid_col, end_col, sub_results[1]); });
    threads[2] = thread([&]() { calculate_tree_sum(matrix, mid_row, end_row, start_col, mid_col, sub_results[2]); });
    threads[3] = thread([&]() { calculate_tree_sum(matrix, mid_row, end_row, mid_col, end_col, sub_results[3]); });

    for (auto& thread : threads) {
        thread.join();
    }

    
    for (int sub_sum : sub_results) {
        mtx.lock(); 
        result += sub_sum;
        mtx.unlock();
    }
}

int main() {
    ifstream fin("input.in");
    int n, m;
    fin >> n >> m;
    mat matrix(n, vector<int>(m));
    for (int i = 0; i < n; ++i) {
        for (int j = 0; j < m; ++j) {
            fin >> matrix[i][j];
        }
    }

    int sum = 0;
    calculate_tree_sum(matrix, 0, n, 0, m, t, sum);
    cout << sum << endl;

    return 0;
}

2. Write a parallel program that computes the scalar product of 2 vectors of the same length. Use a binary tree for computing the sum.

#include <iostream>
#include <fstream>
#include <vector>
#include <thread>
#include <mutex>

using namespace std;

mutex mtx;

int compute_scalar_product(const vector<int> &a, const vector<int> &b, int start, int end) {
    int product = 0;
    for (int i = start; i < end; ++i) {
        product += a[i] * b[i];
    }
    return product;
}

void calculate_tree_scalar_product(const vector<int> &a, const vector<int> &b, int start, int end, int &product) {
    if (start == end) {
				mtx.lock();
        product = a[start] * b[start];
				mtx.unlock();
        return;
    }

    int mid = (start + end) / 2;

    vector<thread> threads(2);
    vector<int> sub_results(2);

    threads[0] = thread([&]() { calculate_tree_scalar_product(a, b, start, mid, sub_results[0]); });
    threads[1] = thread([&]() { calculate_tree_scalar_product(a, b, mid + 1, end, sub_results[1]); });

    for (auto& thread : threads) {
        thread.join();
    }
		mtx.lock();
    for (int sub_product : sub_results) {
        product += sub_product;
    }
		mtx.unlock();
}

int main() {
    ifstream fin("input.in");
    int n;
    fin >> n;
    vector<int> a(n);
    vector<int> b(n);
    for (int i = 0; i < n; ++i) {
        fin >> a[i];
    }
    for (int i = 0; i < n; ++i) {
        fin >> b[i];
    }

    int product = 0;
    calculate_tree_scalar_product(a, b, 0, n - 1, product);

    cout << product << endl;

    return 0;
}

3. Write a parallel program that computes the scalar product of 2 vectors of the same length. The program shall use a number of threads specified as input.

#include <iostream>
#include <fstream>
#include <vector>
#include <thread>
#include <mutex>

using namespace std;

mutex mtx;

int compute_scalar_product(const vector<int> &a, const vector<int> &b, int start, int end) {
    int product = 0;
    for (int i = start; i < end; ++i) {
        product += a[i] * b[i];
    }
    return product;
}

void calculate_tree_scalar_product(const vector<int> &a, const vector<int> &b, int start, int end, int t, int &product) {
    if (start == end) {
				mtx.lock();
        product = a[start] * b[start];
				mtx.unlock();
        return;
    }

    if (t == 1) {
				mtx.lock();
        product = compute_scalar_product(a, b, start, end);
				mtx.unlock();
        return;
    }

    int mid = (start + end) / 2;

    vector<thread> threads(2);
    vector<int> sub_results(2);

    threads[0] = thread([&]() { calculate_tree_scalar_product(a, b, start, mid, t / 2, sub_results[0]); });
    threads[1] = thread([&]() { calculate_tree_scalar_product(a, b, mid + 1, end, t / 2, sub_results[1]); });

    for (auto& thread : threads) {
        thread.join();
    }
		mtx.lock();
    for (int sub_product : sub_results) {
        product += sub_product;
    }
		mtx.unlock();
}

int main() {
    ifstream fin("input.in");
    int t;
    fin >> t;
    int n;
    fin >> n;
    vector<int> a(n);
    vector<int> b(n);
    for (int i = 0; i < n; ++i) {
        fin >> a[i];
    }
    for (int i = 0; i < n; ++i) {
        fin >> b[i];
    }

    int product = 0;
    calculate_tree_scalar_product(a, b, 0, n - 1, t, product);

    cout << product << endl;

    return 0;
}

4. Write a parallel program that computes the n-th power of a square matrix. The program shall use a number of threads specified as input.

#include <iostream>
#include <fstream>
#include <vector>
#include <thread>
#include <cmath>
#include <mutex>

using namespace std;

typedef vector<vector<int>> Mat;

void printMatrix(const Mat& matrix) {
    for (size_t i = 0; i < matrix.size(); i++) {
        for (size_t j = 0; j < matrix[i].size(); j++) {
            cout << matrix[i][j] << " ";
        }
        cout << '\\n';
    }
    cout << "\\n";
}

int computePoint(const Mat& a, const Mat& b, size_t i, size_t j) {
    int sum = 0;
    for (size_t k = 0; k < a.size(); k++) {
        sum += a[i][k] * b[k][j];
    }
    return sum;
}

vector<pair<size_t, size_t>> splitWorkload(size_t n, int t) {
    vector<pair<size_t, size_t>> intervals;
    size_t index = 0;
    size_t step = n / t;
    int mod = n % t;
    while (index < n) {
        intervals.push_back(make_pair(index, index + step + (mod > 0)));
        index += step + (mod > 0);
        mod--;
    }
    return intervals;
}

Mat multiply(const Mat& a, const Mat& b, int t) {
    int size = a.size();
    Mat result(size, vector<int>(size, 0));
    vector<thread> threads;
    vector<pair<size_t, size_t>> intervals = splitWorkload(size, t);
    for (int idx = 0; idx < t; idx++) {
        threads.push_back(
            thread([intervals, &a, &b, idx, &result]() {
                for (int i = intervals[idx].first; i < intervals[idx].second; i++) {
                    for (int j = 0; j < a.size(); j++) {
                        result[i][j] = computePoint(a, b, i, j);
                    }
                }
            })
        );
    }
    for (int i = 0; i < t; i++) {
        threads[i].join();
    }
    return result;
}

Mat pow(const Mat& a, int n, int t) {
    if (n == 1) {
        return a;
    }
    Mat temp = pow(a, n / 2, t);
    if (n % 2 == 0) {
        return multiply(temp, temp, t);
    } else {
        return multiply(a, multiply(temp, temp, t), t);
    }
}

int main() {
    ifstream fin("input.in");
    int t;
    fin >> t;
    int n;
    fin >> n;
    Mat matrix(n, vector<int>(n));
    for (int i = 0; i < n; ++i) {
        for (int j = 0; j < n; ++j) {
            fin >> matrix[i][j];
        }
    }
    int exponent;
    fin >> exponent;
    Mat result(n, vector<int>(n, 0));
    if (exponent < 0) {
        cout << "Exponent should not be negative." << endl;
    } else {
        result = pow(matrix, exponent, t);
        printMatrix(result);
    }

    return 0;
}

5. Write a parallel program that computes the product of 2 matrices. The program shall use a number of threads specified as input.

#include <iostream>
#include <fstream>
#include <vector>
#include <thread>

using namespace std;

void multiply_matrices(const vector<vector<int>>& a, const vector<vector<int>>& b, vector<vector<int>>& c, int start, int end) {
    int n = a.size();
    int m = a[0].size();
    int p = b[0].size();

    for (int i = start; i < end; ++i) {
        for (int j = 0; j < n; ++j) {
            for (int k = 0; k < m; ++k) {
                c[i][j] += a[i][k] * b[k][j];
            }
        }
    }
}

int main() {
    ifstream fin("input.in");
    int t;
    fin >> t;
    int n;
    fin >> n;

    vector<vector<int>> a(n, vector<int>(n));
    vector<vector<int>> b(n, vector<int>(n));
    vector<vector<int>> c(n, vector<int>(n, 0));

    for (int i = 0; i < n; ++i) {
        for (int j = 0; j < n; ++j) {
            fin >> a[i][j];
        }
    }

    for (int i = 0; i < n; ++i) {
        for (int j = 0; j < n; ++j) {
            fin >> b[i][j];
        }
    }

    vector<thread> threads;

    for (int i = 0; i < t; ++i) {
        int start = (i * n) / t;
        int end = ((i + 1) * n) / t;
        threads.push_back(thread(multiply_matrices, ref(a), ref(b), ref(c), start, end));
    }

    for (int i = 0; i < t; ++i) {
        threads[i].join();
    }

    for (int i = 0; i < n; ++i) {
        for (int j = 0; j < n; ++j) {
            cout << c[i][j] << ' ';
        }
        cout << '\\n';
    }

    return 0;
}

6. Write a parallel program that computes the product of two big numbers. The program shall use a number of threads specified as input.

#include <iostream>
#include <fstream>
#include <vector>
#include <thread>

using namespace std;

vector <int> solve(vector <int> a, vector <int> b, int T) {
  int n = a.size();
  int m = 2 * n - 1;
	vector <int> c;
  c.resize(m, 0);
  vector <thread> the;
  for(int idx = 0; idx < T; ++idx) {
    thr.push_back(thread([&, idx](){
      for(int x = idx; x < m; x += T) {
        for(int i = 0; i < n; ++i) {
          if(x - i >= n || x - i < 0) {
            continue;
          }
          c[x] += a[i] * b[x - i];
        }
      }
    }));
  }

  for(int i = 0; i < thr.size(); ++ i) {
    thr[i].join();
  }

  return c;
}

int main() {
  ifstream fin("input.in");
	int t;
	fin >> t;
  int n;
  fin >> n;
	vector <int> a, b;
  for(int i = 0; i < n; ++ i) {
    int x;
    fin >> x;
    a.push_back(x);
  }
  for(int i = 0; i < n; ++ i) {
    int x;
    fin >> x;
    b.push_back(x);
  }
	res = solve(a, b, t); 
  for(auto it: res) {
    cout << it;
  }
}

7. Write a parallel program that computes the prime numbers up to N. It is assumed to have the list of primes up to √N, and will check each of the other numbers if it is divisible with a number from the initial list.

#include <iostream>
#include <fstream>
#include <vector>
#include <thread>
#include <cmath>
#include <mutex>

using namespace std;

mutex mtx;

bool is_prime(int num, const vector<int> &primes) {
    int sqrt_num = sqrt(num);

    for (int prime : primes) {
        if (prime > sqrt_num)
            break;

        if (num % prime == 0)
            return false;
    }

    return true;
}

void find_primes(int start, int end, const vector<int> &initial_primes, vector<int> &result) {
    for (int i = start; i <= end; ++i) {
        if (is_prime(i, initial_primes)) {
            lock_guard<mutex> lock(mtx);
            result.push_back(i);
        }
    }
}

int main() {
    ifstream fin("input.in");
    int t;
    fin >> t;
    int n;
    fin >> n;
    int sqrt_n = sqrt(n);
    vector<int> initial_primes;
    vector<int> primes;
    primes.push_back(2);

    initial_primes.push_back(2);

    for (int i = 3; i <= sqrt_n; i += 2) {
        if (is_prime(i, initial_primes)) {
            initial_primes.push_back(i);
            primes.push_back(i);
        }
    }

    vector<thread> threads;
    vector<vector<int>> thread_results(t);

    for (int i = 0; i < t; ++i) {
        int start = sqrt_n + 1 + (i * (n - sqrt_n)) / t;
        int end = sqrt_n + 1 + ((i + 1) * (n - sqrt_n)) / t - 1;

        threads.push_back(thread(find_primes, start, end, ref(initial_primes), ref(thread_results[i])));
    }

    for (auto& thread : threads) {
        thread.join();
    }

    for (int i = 0; i < t; ++i) {
        primes.insert(primes.end(), thread_results[i].begin(), thread_results[i].end());
    }

    for (int prime : primes) {
        cout << prime << " ";
    }
    cout << endl;

    return 0;
}


8. Write a parallel program for solving the k-coloring problem. That is, you are given a number k, and n objects and some pairs among them that have distinct colors. Find a solution to color them with at most n colors in total, if one exists. Assume you have a function that gets a vector with n integers representing the assignment of colors to objects and checks if the constraits are obeyed or not.