From fbfee74d89fa011f3f9ea1abe276d2fea99a351a Mon Sep 17 00:00:00 2001
From: "Reza Rafati Bonab (pma753)" <reza.rafati@usask.ca>
Date: Fri, 6 Sep 2024 14:16:54 -0600
Subject: [PATCH] Add code Files

---
 Makefile      |  30 ++++++++
 SW-Actor.cpp  | 185 ++++++++++++++++++++++++++++++++++++++++++++++++++
 config.hpp    |  22 ++++++
 job.sh        |  24 +++++++
 makePairs.cpp |  16 +++++
 makePairs.hpp |   5 ++
 readFasta.cpp |  33 +++++++++
 readFasta.hpp |   7 ++
 8 files changed, 322 insertions(+)
 create mode 100644 Makefile
 create mode 100644 SW-Actor.cpp
 create mode 100644 config.hpp
 create mode 100644 job.sh
 create mode 100644 makePairs.cpp
 create mode 100644 makePairs.hpp
 create mode 100644 readFasta.cpp
 create mode 100644 readFasta.hpp

diff --git a/Makefile b/Makefile
new file mode 100644
index 0000000..5d8f32c
--- /dev/null
+++ b/Makefile
@@ -0,0 +1,30 @@
+# Compiler
+CXX = g++
+
+# Compiler flags
+CXXFLAGS = -std=c++17 -O3 -I/globalhome/pma753/HPC/root/install/include
+
+# Linker flags
+LDFLAGS = -L/globalhome/pma753/HPC/root/install/lib64
+
+# Libraries
+LIBS = -lcaf_core -lcaf_io
+
+# Source files
+SRCS = SW-Actor.cpp readFasta.cpp makePairs.cpp
+
+# Target executable
+TARGET = swActor
+
+# Set library path
+export LD_LIBRARY_PATH := /globalhome/pma753/HPC/root/install/lib64:$(LD_LIBRARY_PATH)
+
+# Build rule
+all: $(TARGET)
+
+$(TARGET): $(SRCS)
+	$(CXX) $(CXXFLAGS) $(LDFLAGS) -o $(TARGET) $(SRCS) $(LIBS)
+
+# Clean rule
+clean:
+	rm -f $(TARGET)
diff --git a/SW-Actor.cpp b/SW-Actor.cpp
new file mode 100644
index 0000000..6fd8794
--- /dev/null
+++ b/SW-Actor.cpp
@@ -0,0 +1,185 @@
+/*
+A program that reads Multiple sequences from a fasta file and calls the
+Smith-Waterman algorithm to align any possible pair of them Using Actor Model.
+The aligned sequences and the alignment score are then printed to the output
+file.
+ */
+// include the necessary libraries
+#include "config.hpp"
+
+using namespace std;
+using namespace caf;
+
+// The Manager Actor
+behavior workerActor(event_based_actor *self)
+{
+    // recive the messages
+    return {[=](actor manager, int position)
+            {
+                // Set the scores
+                int matchScore = 2;
+                int mismatchScore = -1;
+                int gapScore = -2;
+
+                // Get the length of the sequences
+                int index1 = workList1[position];
+                int index2 = workList2[position];
+                string seq1 = sequences[index1];
+                string seq2 = sequences[index2];
+                int m = seq1.length();
+                int n = seq2.length();
+
+                // Create a matrix to store the scores
+                vector<vector<int>> scoreMatrix(m + 1, vector<int>(n + 1, 0));
+
+                // Fill the matrix and find the max score
+                int maxScore = 0;
+                int indexFirst, indexSecond;
+
+                // loop through the matrix row by row
+                for (int i = 1; i <= m; ++i)
+                {
+                    for (int j = 1; j <= n; ++j)
+                    {
+                        // find 3 scores for adjacent cells according to the SW algorithm
+                        int diagonal = scoreMatrix[i - 1][j - 1] + (seq1[i - 1] == seq2[j - 1] ? matchScore : mismatchScore);
+                        int left = scoreMatrix[i - 1][j] + gapScore;
+                        int above = scoreMatrix[i][j - 1] + gapScore;
+                        // take the maximum of the three scores and 0
+                        scoreMatrix[i][j] = max({0, diagonal, left, above});
+
+                        // update the max score and the corresponding indices
+                        if (scoreMatrix[i][j] > maxScore)
+                        {
+                            maxScore = scoreMatrix[i][j];
+                            indexFirst = i;
+                            indexSecond = j;
+                        }
+                    }
+                }
+
+                // Traceback and find the aligned sequences
+                string alignedSeq1, alignedSeq2;
+                int i = indexFirst;
+                int j = indexSecond;
+
+                // start from the cell with the max score and loop until a cell with score 0 is reached
+                while (i > 0 || j > 0)
+                {
+                    // get the current score and the scores of the adjacent cells
+                    int score = scoreMatrix[i][j];
+                    // check termination condition
+                    if (score == 0)
+                    {
+                        break;
+                    }
+                    // check different strategies for updating the aligned sequences
+                    // match/mismatch case
+                    int diagonal = scoreMatrix[i - 1][j - 1];
+                    if (score == diagonal + (seq1[i - 1] == seq2[j - 1] ? matchScore : mismatchScore))
+                    {
+                        alignedSeq1 = seq1[i - 1] + alignedSeq1;
+                        alignedSeq2 = seq2[j - 1] + alignedSeq2;
+                        --i;
+                        --j;
+                        continue;
+                    }
+                    // gap for first sequence cases
+                    int above = scoreMatrix[i - 1][j];
+                    if (score == above + gapScore)
+                    {
+                        alignedSeq1 = seq1[i - 1] + alignedSeq1;
+                        alignedSeq2 = '-' + alignedSeq2;
+                        --i;
+                        continue;
+                    }
+                    // gap for second sequence cases
+                    int left = scoreMatrix[i][j - 1];
+                    if (score == left + gapScore)
+                    {
+                        alignedSeq1 = '-' + alignedSeq1;
+                        alignedSeq2 = seq2[j - 1] + alignedSeq2;
+                        --j;
+                        continue;
+                    }
+                }
+
+                // write the aligned sequences and the score to the output file
+                self->println("Pair: {}, {} with score: {}, \nAligned seq1: {}, \nAligned seq2: {}\n",
+                              workList1[position], workList2[position], maxScore, alignedSeq1, alignedSeq2);
+
+                // send message to the manager to request new work
+                anon_mail(self, position, maxScore).send(manager);
+            }};
+}
+
+// Define the state of the Manager Actor
+struct state_full_actor
+{
+    int position = 0;
+    int counter = 0;
+};
+// The Manager Actor
+behavior managerActor(stateful_actor<state_full_actor> *self)
+{
+    // recive the messages
+    return {
+        [=](int actorNumber)
+        {
+            // Spawn the worker Actors and send them the position in the workList
+            for (int i = 0; i < actorNumber; ++i)
+            {
+                actor worker = self->spawn(workerActor);
+                anon_mail(self, i).send(worker);
+            }
+            self->state().position = actorNumber - 1;
+        },
+        [=](actor sender, int position, int score)
+        {
+            // update the position and the counter
+            self->state().position++;
+            self->state().counter++;
+
+            // check if there is more work to do
+            if (self->state().position < workList1.size())
+            {
+                // send the next work to the worker
+                anon_mail(self, self->state().position).send(sender);
+            }
+
+            if (self->state().counter == workList1.size())
+            {
+                // Stop the timer
+                auto stop = std::chrono::high_resolution_clock::now();
+                auto duration = std::chrono::duration_cast<std::chrono::seconds>(stop - start);
+                self->println("Time taken by function: {} seconds\n", duration.count());
+                self->quit();
+            }
+        },
+    };
+}
+
+// Main now takes a config object as an additional argument
+void caf_main(actor_system &system)
+{
+    // Read sequences from the input file
+    sequences = readFasta("/globalhome/pma753/HPC/Two-Level-Test/Data/BRCA1.fasta");
+
+    // Create a list of all possible pairs of sequences
+    vector<vector<int>> paires = makePairs(sequences.size());
+    workList1 = paires[0];
+    workList2 = paires[1];
+    // Start the timer
+    start = std::chrono::high_resolution_clock::now();
+
+    // Span the Manager Actor
+    actor manager = system.spawn(managerActor);
+
+    // Send a message to the manager with actorNumber to start the process
+    int actorNumber = 40;
+    anon_mail(actorNumber).send(manager);
+
+    // The rest of the process is being don inside the managerActor
+}
+
+CAF_MAIN()
\ No newline at end of file
diff --git a/config.hpp b/config.hpp
new file mode 100644
index 0000000..7270a7f
--- /dev/null
+++ b/config.hpp
@@ -0,0 +1,22 @@
+#ifndef CONFIG_HPP
+#define CONFIG_HPP
+
+#include "caf/all.hpp"
+#include <vector>
+#include <string>
+#include <chrono>
+#include <utility>
+#include <algorithm>
+
+// include the necessary headers
+#include "readFasta.hpp"
+#include "makePairs.hpp"
+
+// Declare the global variables
+std::vector<std::string> sequences;
+std::vector<std::pair<int, int>> pairs;
+std::vector<int> workList1;
+std::vector<int> workList2;
+auto start = std::chrono::high_resolution_clock::now();
+
+#endif // CONFIG_HPP
\ No newline at end of file
diff --git a/job.sh b/job.sh
new file mode 100644
index 0000000..58ab13b
--- /dev/null
+++ b/job.sh
@@ -0,0 +1,24 @@
+#!/bin/bash
+#SBATCH --account=hpc_p_spiteri
+#SBATCH --nodes=1
+#SBATCH --cpus-per-task=40
+#SBATCH --mem=185GB
+#SBATCH --time=01:00:00
+#SBATCH --constraint=skylake
+
+
+# Navigate to the directory containing your MPI program
+cd $SLURM_SUBMIT_DIR
+
+# Set LD_LIBRARY_PATH if needed
+export LD_LIBRARY_PATH=/globalhome/pma753/HPC/root/install/lib64:$LD_LIBRARY_PATH
+
+make clean
+make
+
+echo "40 cpus 40 actors"
+echo "BRCA1.fasta"
+
+# Run your program
+time ./swActor --caf.scheduler.max-threads=40 > outputjob.txt
+ 
\ No newline at end of file
diff --git a/makePairs.cpp b/makePairs.cpp
new file mode 100644
index 0000000..299a546
--- /dev/null
+++ b/makePairs.cpp
@@ -0,0 +1,16 @@
+#include "makePairs.hpp"
+
+std::vector<std::vector<int>> makePairs(int seqSize)
+{
+    std::vector<std::vector<int>> pairs(2);
+    // Create a list of all possible pairs of sequences
+    for (int i = 0; i < seqSize; ++i)
+    {
+        for (int j = i + 1; j < seqSize; ++j)
+        {
+            pairs[0].push_back(i);
+            pairs[1].push_back(j);
+        }
+    }
+    return pairs;
+}
diff --git a/makePairs.hpp b/makePairs.hpp
new file mode 100644
index 0000000..260105e
--- /dev/null
+++ b/makePairs.hpp
@@ -0,0 +1,5 @@
+#include <iostream>
+#include <vector>
+
+// Define the makePairs function
+std::vector<std::vector<int>> makePairs(int seqSize);
diff --git a/readFasta.cpp b/readFasta.cpp
new file mode 100644
index 0000000..31bf56d
--- /dev/null
+++ b/readFasta.cpp
@@ -0,0 +1,33 @@
+#include "readFasta.hpp"
+
+// A function to read multiple sequences from a fasta file
+std::vector<std::string> readFasta(std::string filename)
+{
+    // a vector to store the sequences
+    std::vector<std::string> sequences;
+
+    // Read Multiple sequences from the input file
+    std::ifstream inputFile(filename);
+    std::string line;
+    std::string seq;
+    while (std::getline(inputFile, line))
+    {
+        line.erase(line.find_last_not_of(" \t\r\n") + 1);
+        if (line[0] == '>')
+        {
+            if (!seq.empty())
+            {
+                sequences.push_back(seq);
+                seq.clear();
+            }
+        }
+        else
+        {
+            seq += line;
+        }
+    }
+    sequences.push_back(seq);
+    inputFile.close();
+
+    return sequences;
+}
diff --git a/readFasta.hpp b/readFasta.hpp
new file mode 100644
index 0000000..9c1f98b
--- /dev/null
+++ b/readFasta.hpp
@@ -0,0 +1,7 @@
+#include <iostream>
+#include <fstream>
+#include <string>
+#include <vector>
+
+// A function to read multiple sequences from a fasta file
+std::vector<std::string> readFasta(std::string filename);
-- 
GitLab