From fbfee74d89fa011f3f9ea1abe276d2fea99a351a Mon Sep 17 00:00:00 2001 From: "Reza Rafati Bonab (pma753)" <reza.rafati@usask.ca> Date: Fri, 6 Sep 2024 14:16:54 -0600 Subject: [PATCH] Add code Files --- Makefile | 30 ++++++++ SW-Actor.cpp | 185 ++++++++++++++++++++++++++++++++++++++++++++++++++ config.hpp | 22 ++++++ job.sh | 24 +++++++ makePairs.cpp | 16 +++++ makePairs.hpp | 5 ++ readFasta.cpp | 33 +++++++++ readFasta.hpp | 7 ++ 8 files changed, 322 insertions(+) create mode 100644 Makefile create mode 100644 SW-Actor.cpp create mode 100644 config.hpp create mode 100644 job.sh create mode 100644 makePairs.cpp create mode 100644 makePairs.hpp create mode 100644 readFasta.cpp create mode 100644 readFasta.hpp diff --git a/Makefile b/Makefile new file mode 100644 index 0000000..5d8f32c --- /dev/null +++ b/Makefile @@ -0,0 +1,30 @@ +# Compiler +CXX = g++ + +# Compiler flags +CXXFLAGS = -std=c++17 -O3 -I/globalhome/pma753/HPC/root/install/include + +# Linker flags +LDFLAGS = -L/globalhome/pma753/HPC/root/install/lib64 + +# Libraries +LIBS = -lcaf_core -lcaf_io + +# Source files +SRCS = SW-Actor.cpp readFasta.cpp makePairs.cpp + +# Target executable +TARGET = swActor + +# Set library path +export LD_LIBRARY_PATH := /globalhome/pma753/HPC/root/install/lib64:$(LD_LIBRARY_PATH) + +# Build rule +all: $(TARGET) + +$(TARGET): $(SRCS) + $(CXX) $(CXXFLAGS) $(LDFLAGS) -o $(TARGET) $(SRCS) $(LIBS) + +# Clean rule +clean: + rm -f $(TARGET) diff --git a/SW-Actor.cpp b/SW-Actor.cpp new file mode 100644 index 0000000..6fd8794 --- /dev/null +++ b/SW-Actor.cpp @@ -0,0 +1,185 @@ +/* +A program that reads Multiple sequences from a fasta file and calls the +Smith-Waterman algorithm to align any possible pair of them Using Actor Model. +The aligned sequences and the alignment score are then printed to the output +file. + */ +// include the necessary libraries +#include "config.hpp" + +using namespace std; +using namespace caf; + +// The Manager Actor +behavior workerActor(event_based_actor *self) +{ + // recive the messages + return {[=](actor manager, int position) + { + // Set the scores + int matchScore = 2; + int mismatchScore = -1; + int gapScore = -2; + + // Get the length of the sequences + int index1 = workList1[position]; + int index2 = workList2[position]; + string seq1 = sequences[index1]; + string seq2 = sequences[index2]; + int m = seq1.length(); + int n = seq2.length(); + + // Create a matrix to store the scores + vector<vector<int>> scoreMatrix(m + 1, vector<int>(n + 1, 0)); + + // Fill the matrix and find the max score + int maxScore = 0; + int indexFirst, indexSecond; + + // loop through the matrix row by row + for (int i = 1; i <= m; ++i) + { + for (int j = 1; j <= n; ++j) + { + // find 3 scores for adjacent cells according to the SW algorithm + int diagonal = scoreMatrix[i - 1][j - 1] + (seq1[i - 1] == seq2[j - 1] ? matchScore : mismatchScore); + int left = scoreMatrix[i - 1][j] + gapScore; + int above = scoreMatrix[i][j - 1] + gapScore; + // take the maximum of the three scores and 0 + scoreMatrix[i][j] = max({0, diagonal, left, above}); + + // update the max score and the corresponding indices + if (scoreMatrix[i][j] > maxScore) + { + maxScore = scoreMatrix[i][j]; + indexFirst = i; + indexSecond = j; + } + } + } + + // Traceback and find the aligned sequences + string alignedSeq1, alignedSeq2; + int i = indexFirst; + int j = indexSecond; + + // start from the cell with the max score and loop until a cell with score 0 is reached + while (i > 0 || j > 0) + { + // get the current score and the scores of the adjacent cells + int score = scoreMatrix[i][j]; + // check termination condition + if (score == 0) + { + break; + } + // check different strategies for updating the aligned sequences + // match/mismatch case + int diagonal = scoreMatrix[i - 1][j - 1]; + if (score == diagonal + (seq1[i - 1] == seq2[j - 1] ? matchScore : mismatchScore)) + { + alignedSeq1 = seq1[i - 1] + alignedSeq1; + alignedSeq2 = seq2[j - 1] + alignedSeq2; + --i; + --j; + continue; + } + // gap for first sequence cases + int above = scoreMatrix[i - 1][j]; + if (score == above + gapScore) + { + alignedSeq1 = seq1[i - 1] + alignedSeq1; + alignedSeq2 = '-' + alignedSeq2; + --i; + continue; + } + // gap for second sequence cases + int left = scoreMatrix[i][j - 1]; + if (score == left + gapScore) + { + alignedSeq1 = '-' + alignedSeq1; + alignedSeq2 = seq2[j - 1] + alignedSeq2; + --j; + continue; + } + } + + // write the aligned sequences and the score to the output file + self->println("Pair: {}, {} with score: {}, \nAligned seq1: {}, \nAligned seq2: {}\n", + workList1[position], workList2[position], maxScore, alignedSeq1, alignedSeq2); + + // send message to the manager to request new work + anon_mail(self, position, maxScore).send(manager); + }}; +} + +// Define the state of the Manager Actor +struct state_full_actor +{ + int position = 0; + int counter = 0; +}; +// The Manager Actor +behavior managerActor(stateful_actor<state_full_actor> *self) +{ + // recive the messages + return { + [=](int actorNumber) + { + // Spawn the worker Actors and send them the position in the workList + for (int i = 0; i < actorNumber; ++i) + { + actor worker = self->spawn(workerActor); + anon_mail(self, i).send(worker); + } + self->state().position = actorNumber - 1; + }, + [=](actor sender, int position, int score) + { + // update the position and the counter + self->state().position++; + self->state().counter++; + + // check if there is more work to do + if (self->state().position < workList1.size()) + { + // send the next work to the worker + anon_mail(self, self->state().position).send(sender); + } + + if (self->state().counter == workList1.size()) + { + // Stop the timer + auto stop = std::chrono::high_resolution_clock::now(); + auto duration = std::chrono::duration_cast<std::chrono::seconds>(stop - start); + self->println("Time taken by function: {} seconds\n", duration.count()); + self->quit(); + } + }, + }; +} + +// Main now takes a config object as an additional argument +void caf_main(actor_system &system) +{ + // Read sequences from the input file + sequences = readFasta("/globalhome/pma753/HPC/Two-Level-Test/Data/BRCA1.fasta"); + + // Create a list of all possible pairs of sequences + vector<vector<int>> paires = makePairs(sequences.size()); + workList1 = paires[0]; + workList2 = paires[1]; + // Start the timer + start = std::chrono::high_resolution_clock::now(); + + // Span the Manager Actor + actor manager = system.spawn(managerActor); + + // Send a message to the manager with actorNumber to start the process + int actorNumber = 40; + anon_mail(actorNumber).send(manager); + + // The rest of the process is being don inside the managerActor +} + +CAF_MAIN() \ No newline at end of file diff --git a/config.hpp b/config.hpp new file mode 100644 index 0000000..7270a7f --- /dev/null +++ b/config.hpp @@ -0,0 +1,22 @@ +#ifndef CONFIG_HPP +#define CONFIG_HPP + +#include "caf/all.hpp" +#include <vector> +#include <string> +#include <chrono> +#include <utility> +#include <algorithm> + +// include the necessary headers +#include "readFasta.hpp" +#include "makePairs.hpp" + +// Declare the global variables +std::vector<std::string> sequences; +std::vector<std::pair<int, int>> pairs; +std::vector<int> workList1; +std::vector<int> workList2; +auto start = std::chrono::high_resolution_clock::now(); + +#endif // CONFIG_HPP \ No newline at end of file diff --git a/job.sh b/job.sh new file mode 100644 index 0000000..58ab13b --- /dev/null +++ b/job.sh @@ -0,0 +1,24 @@ +#!/bin/bash +#SBATCH --account=hpc_p_spiteri +#SBATCH --nodes=1 +#SBATCH --cpus-per-task=40 +#SBATCH --mem=185GB +#SBATCH --time=01:00:00 +#SBATCH --constraint=skylake + + +# Navigate to the directory containing your MPI program +cd $SLURM_SUBMIT_DIR + +# Set LD_LIBRARY_PATH if needed +export LD_LIBRARY_PATH=/globalhome/pma753/HPC/root/install/lib64:$LD_LIBRARY_PATH + +make clean +make + +echo "40 cpus 40 actors" +echo "BRCA1.fasta" + +# Run your program +time ./swActor --caf.scheduler.max-threads=40 > outputjob.txt + \ No newline at end of file diff --git a/makePairs.cpp b/makePairs.cpp new file mode 100644 index 0000000..299a546 --- /dev/null +++ b/makePairs.cpp @@ -0,0 +1,16 @@ +#include "makePairs.hpp" + +std::vector<std::vector<int>> makePairs(int seqSize) +{ + std::vector<std::vector<int>> pairs(2); + // Create a list of all possible pairs of sequences + for (int i = 0; i < seqSize; ++i) + { + for (int j = i + 1; j < seqSize; ++j) + { + pairs[0].push_back(i); + pairs[1].push_back(j); + } + } + return pairs; +} diff --git a/makePairs.hpp b/makePairs.hpp new file mode 100644 index 0000000..260105e --- /dev/null +++ b/makePairs.hpp @@ -0,0 +1,5 @@ +#include <iostream> +#include <vector> + +// Define the makePairs function +std::vector<std::vector<int>> makePairs(int seqSize); diff --git a/readFasta.cpp b/readFasta.cpp new file mode 100644 index 0000000..31bf56d --- /dev/null +++ b/readFasta.cpp @@ -0,0 +1,33 @@ +#include "readFasta.hpp" + +// A function to read multiple sequences from a fasta file +std::vector<std::string> readFasta(std::string filename) +{ + // a vector to store the sequences + std::vector<std::string> sequences; + + // Read Multiple sequences from the input file + std::ifstream inputFile(filename); + std::string line; + std::string seq; + while (std::getline(inputFile, line)) + { + line.erase(line.find_last_not_of(" \t\r\n") + 1); + if (line[0] == '>') + { + if (!seq.empty()) + { + sequences.push_back(seq); + seq.clear(); + } + } + else + { + seq += line; + } + } + sequences.push_back(seq); + inputFile.close(); + + return sequences; +} diff --git a/readFasta.hpp b/readFasta.hpp new file mode 100644 index 0000000..9c1f98b --- /dev/null +++ b/readFasta.hpp @@ -0,0 +1,7 @@ +#include <iostream> +#include <fstream> +#include <string> +#include <vector> + +// A function to read multiple sequences from a fasta file +std::vector<std::string> readFasta(std::string filename); -- GitLab