diff --git a/README.md b/README.md index 09af234dbe370df3ceaaf420bfd0e7f6cda8d3c4..f621b9d245490a2e9245bae173682d46fe3ee1e3 100644 --- a/README.md +++ b/README.md @@ -80,12 +80,11 @@ to control the thread pool size used by CAF's scheduler. --- ## Credits -The implementation of SW-Actors builds on the foundation provided by the [Smith–Waterman algorithm](https://icbi.i-med.ac.at/courses/bioinformatics_ex_2021/7265238.pdf) and the [C++ Actor Framework](https://github.com/actor-framework/actor-framework). We credit the original creators of the C++ Actor Framework which allowed us to implement the actor model into SUMMA-Actors. Links to their research work can be found below. +The implementation of SW-Actors builds on the foundation provided by the [Smith–Waterman algorithm](https://icbi.i-med.ac.at/courses/bioinformatics_ex_2021/7265238.pdf) and the [C++ Actor Framework](https://github.com/actor-framework/actor-framework). We credit the original creators of the C++ Actor Framework which allowed us to implement the actor model into SW-Actors. Links to their research work can be found below. * Charousset, D., Schmidt, T. C., Hiesgen, R., Wählisch, M., 2013: Native actors: a scalable software platform for distributed, heterogeneous environments. _AGERE!_, [doi:10.1145/2541329.2541336](http://dx.doi.org/10.1145/2541329.2541336). * Charousset, D., Schmidt, T. C., Hiesgen, R., 2016: Revisiting actor programming in - C++. _Computer Languages, Systems & Structures_, [doi:10.1016/j.cl.2016.01.002](http:// - dx.doi.org/10.1016/j.cl.2016.01.002) \ No newline at end of file + C++. _Computer Languages, Systems & Structures_, [doi:10.1016/j.cl.2016.01.002](http://dx.doi.org/10.1016/j.cl.2016.01.002) \ No newline at end of file diff --git a/benchmarks/README.md b/benchmarks/README.md index 6581c5d2bae58f2d45257a5b94b58bfbfb032770..9e5cf908372898dc95d09e6ae00ef13c8cd7b5ec 100644 --- a/benchmarks/README.md +++ b/benchmarks/README.md @@ -114,5 +114,11 @@ sbatch SWIPE.sh - Ensure all dependencies (`Parasail`, `SWIPE`, `SeqAN`, etc.) are **installed and loaded** before running. +## Credits +The benchmarking setup presented here relies on the work and tools developed by the authors of **Parasail**, **SeqAN**, and **SWIPE**, whose implementations form the basis of our performance comparisons. We gratefully acknowledge their contributions to the bioinformatics community. +- **Parasail:** Daily, J., 2016. Parasail: SIMD C library for global, semi-global, and local pairwise sequence alignments. *BMC Bioinformatics*, [doi:10.1186/s12859-016-0930-z](https://doi.org/10.1186/s12859-016-0930-z) +- **SeqAN:** Reinert, K., et al., 2017. The SeqAn C++ template library for efficient sequence analysis: A resource for programmers. *Journal of Biotechnology*, [doi:10.1016/j.jbiotec.2017.07.017](https://doi.org/10.1016/j.jbiotec.2017.07.017) +- **SWIPE:** Rognes, T., 2011. Faster Smith–Waterman database searches with inter-sequence SIMD parallelisation. *BMC Bioinformatics*, [doi:10.1186/1471-2105-12-221](https://doi.org/10.1186/1471-2105-12-221) + --- diff --git a/build/swActor b/build/swActor index 8c92f96a000eca762c4696224b27b92d3768796d..3cbedd80bf1c712bbe030c4b629af4e57e604f3a 100755 Binary files a/build/swActor and b/build/swActor differ diff --git a/dataset /BRCA1.fasta b/dataset/BRCA1.fasta similarity index 100% rename from dataset /BRCA1.fasta rename to dataset/BRCA1.fasta diff --git a/dataset /BRCA2.fasta b/dataset/BRCA2.fasta similarity index 100% rename from dataset /BRCA2.fasta rename to dataset/BRCA2.fasta diff --git a/dataset /Heterogeneous .fasta b/dataset/Heterogeneous .fasta similarity index 100% rename from dataset /Heterogeneous .fasta rename to dataset/Heterogeneous .fasta diff --git a/dataset /Titin.fasta b/dataset/Titin.fasta similarity index 100% rename from dataset /Titin.fasta rename to dataset/Titin.fasta diff --git a/include/pairActor.hpp b/include/pairActor.hpp index efae9df19139e46b43c4f4e5b3053172050e0085..9dfa7bd7159b670a3315f0a5b3483e829d33ea10 100644 --- a/include/pairActor.hpp +++ b/include/pairActor.hpp @@ -22,6 +22,8 @@ namespace caf std::string alignedSeq2; actor manager; int position; + int maxLenQuery = 0; + int maxLenSubject = 0; }; behavior pairActor(stateful_actor<pairActorState> *self, int matchScore, int mismatchScore, int gapScore, int deviderRow, int deviderCol); diff --git a/include/serialActor.hpp b/include/serialActor.hpp index 9e6060c7c103b789dfaab9c2c82c1e661cfece9f..4e8b42178b29a6835bbc58b271601a7d04aafd77 100644 --- a/include/serialActor.hpp +++ b/include/serialActor.hpp @@ -11,6 +11,7 @@ namespace caf int mismatchScore = 0; int gapScore = 0; std::vector<std::vector<int>> scoreMatrix; + actor manager; }; behavior serialActor(stateful_actor<serialActorState> *self, int matchScore, int mismatchScore, int gapScore); diff --git a/multi.sh b/multi.sh index 55aa2e16d4d067a06ad48df2ae543d8c00939f1c..54e3da504719441bfc8723846f0c00547cdfdd40 100644 --- a/multi.sh +++ b/multi.sh @@ -13,7 +13,7 @@ IP=$(srun --nodes=1 --ntasks=1 hostname -i) # Run the code for node in $(scontrol show hostnames); do echo $node -export LD_LIBRARY_PATH := /path/to/your/library:$LD_LIBRARY_PATH +export LD_LIBRARY_PATH= /path/to/your/library:$LD_LIBRARY_PATH cd $SLURM_SUBMIT_DIR @@ -23,7 +23,7 @@ export LD_LIBRARY_PATH := /path/to/your/library:$LD_LIBRARY_PATH if [ $counter -eq 0 ]; then IP=$(srun --nodes=1 --ntasks=1 hostname -i) - srun --nodes=1 --ntasks=1 ./swActor -D -s -p 4444 -Q dataset/BRCA1.fasta -A 40 -m 2 -M -1 -g -2 -R 1 -C 1 --caf.scheduler.max-threads=40& + srun --nodes=1 --ntasks=1 ./swActor -D -s -p 4444 -Q ../dataset/BRCA1.fasta -A 40 -m 2 -M -1 -g -2 -R 1 -C 1 --caf.scheduler.max-threads=40& sleep 2 counter=$((counter + 1)) echo "lets do" diff --git a/single.sh b/single.sh index fa5a2b122a0103b42035d0504ec8a57ba1a726b1..b1c453b5a95bc0d59d74a9bf261a57e1ff480d11 100644 --- a/single.sh +++ b/single.sh @@ -9,7 +9,7 @@ cd $SLURM_SUBMIT_DIR -export LD_LIBRARY_PATH := /path/to/your/library:$LD_LIBRARY_PATH +export LD_LIBRARY_PATH= /path/to/your/library:$LD_LIBRARY_PATH make clean @@ -20,4 +20,4 @@ cd build # Run your program echo "BRCA1 1 node 40 cpu" -./swActor -Q dataset/BRCA1.fasta -A 40 -m 2 -M -1 -g -2 -R 1 -C 1 --caf.scheduler.max-threads=40 +./swActor -Q ../dataset/BRCA1.fasta -A 40 -m 2 -M -1 -g -2 -R 1 -C 1 --caf.scheduler.max-threads=40 diff --git a/src/actors/clientActor.cpp b/src/actors/clientActor.cpp index fee4ccbdb879775454f9312643f9ea2eaff83f71..8b9cf8dce6d1ef6843180c387ee10196443bc9e6 100644 --- a/src/actors/clientActor.cpp +++ b/src/actors/clientActor.cpp @@ -1,5 +1,4 @@ #include "clientActor.hpp" -#include "serialActor.hpp" #include "pairActor.hpp" namespace caf @@ -22,22 +21,11 @@ namespace caf self->state().server = *serverActor; - if (cfg.deviderRow == 1 && cfg.deviderCol == 1) + for (int i = 0; i < cfg.actorNumber; ++i) { - for (int i = 0; i < cfg.actorNumber; ++i) - { - actor worker = self->spawn(serialActor, cfg.matchScore, cfg.mismatchScore, cfg.gapScore); - anon_mail(self->state().server).send(worker); - } - } - else - { - for (int i = 0; i < cfg.actorNumber; ++i) - { - actor worker = self->spawn(pairActor, cfg.matchScore, cfg.mismatchScore, cfg.gapScore, - cfg.deviderRow, cfg.deviderCol); - anon_mail(self->state().server).send(worker); - } + actor worker = self->spawn(pairActor, cfg.matchScore, cfg.mismatchScore, cfg.gapScore, + cfg.deviderRow, cfg.deviderCol); + anon_mail(self->state().server).send(worker); } return {}; diff --git a/src/actors/managerActor.cpp b/src/actors/managerActor.cpp index 71e028fed2316b5146d3fc7354eab84981182f5c..b8a5975b542e6af8dd5d3a75907f1ed7936e646e 100644 --- a/src/actors/managerActor.cpp +++ b/src/actors/managerActor.cpp @@ -1,5 +1,4 @@ #include "managerActor.hpp" -#include "serialActor.hpp" #include "pairActor.hpp" #include "makePairs.hpp" #include "readFasta.hpp" @@ -48,36 +47,23 @@ namespace caf self->state().maxLenSubject = self->state().maxLenQuery; } - // Start the timer self->state().start = std::chrono::high_resolution_clock::now(); - if (cfg.deviderRow == 1 && cfg.deviderCol == 1) - { - for (int i = 0; i < cfg.actorNumber; ++i) - { - actor worker = self->spawn(serialActor, cfg.matchScore, cfg.mismatchScore, cfg.gapScore); + self->println("Query sequences: {}", self->state().querySequences.size()); + self->println("Subject sequences: {}", self->state().subjectSequences.size()); - std::string seq1 = self->state().querySequences[self->state().workList1[i]]; - std::string seq2 = self->state().subjectSequences[self->state().workList2[i]]; - - anon_mail(self->state().maxLenQuery, self->state().maxLenSubject).send(worker); - anon_mail(self, i, seq1, seq2).send(worker); - } - } - else + for (int i = 0; i < cfg.actorNumber; ++i) { - for (int i = 0; i < cfg.actorNumber; ++i) - { - actor worker = self->spawn(pairActor, cfg.matchScore, cfg.mismatchScore, cfg.gapScore, - cfg.deviderRow, cfg.deviderCol); + actor worker = self->spawn(pairActor, cfg.matchScore, cfg.mismatchScore, cfg.gapScore, + cfg.deviderRow, cfg.deviderCol); - self->state().start = std::chrono::high_resolution_clock::now(); - std::string seq1 = self->state().querySequences[self->state().workList1[i]]; - std::string seq2 = self->state().subjectSequences[self->state().workList2[i]]; + std::string seq1 = self->state().querySequences[self->state().workList1[i]]; + std::string seq2 = self->state().subjectSequences[self->state().workList2[i]]; - anon_mail(self, i, seq1, seq2).send(worker); - } + anon_mail(self->state().maxLenQuery, self->state().maxLenSubject).send(worker); + anon_mail(self, i, seq1, seq2).send(worker); } + self->state().position = cfg.actorNumber - 1; return { @@ -90,7 +76,7 @@ namespace caf { std::string seq1 = self->state().querySequences[self->state().workList1[self->state().position]]; std::string seq2 = self->state().subjectSequences[self->state().workList2[self->state().position]]; - anon_mail(self, self->state().position, seq1, seq2).send(sender); + anon_mail(self->state().position, seq1, seq2).send(sender); } else { diff --git a/src/actors/pairActor.cpp b/src/actors/pairActor.cpp index ff7b22e2793fcd3e3059c60a18d8632ea6c0abf2..594d74bc668e47b1f9695781cf5dfd9d2b527364 100644 --- a/src/actors/pairActor.cpp +++ b/src/actors/pairActor.cpp @@ -1,3 +1,4 @@ +#include "serialActor.hpp" #include "pairActor.hpp" #include "blockActor.hpp" @@ -19,11 +20,25 @@ namespace caf }, [=](actor manager, int position, std::string seq1, std::string seq2) { - actor blockAct = self->spawn(blockActor, actor_cast<actor>(self), 0, seq1, seq2, self->state().matchScore, self->state().mismatchScore, self->state().gapScore, self->state().deviderRow, self->state().deviderCol); + if (self->state().deviderRow == 1 && self->state().deviderCol == 1) + { + actor serialAct = self->spawn(serialActor, self->state().matchScore, self->state().mismatchScore, self->state().gapScore); + anon_mail(manager, self->state().maxLenQuery, self->state().maxLenSubject).send(serialAct); + anon_mail(position, seq1, seq2).send(serialAct); + } + else + { + actor blockAct = self->spawn(blockActor, actor_cast<actor>(self), 0, seq1, seq2, self->state().matchScore, self->state().mismatchScore, self->state().gapScore, self->state().deviderRow, self->state().deviderCol); + } self->state().manager = manager; self->state().position = position; self->state().workers.resize(self->state().deviderRow); }, + [=](int position, std::string seq1, std::string seq2) + { + actor blockAct = self->spawn(blockActor, actor_cast<actor>(self), 0, seq1, seq2, self->state().matchScore, self->state().mismatchScore, self->state().gapScore, self->state().deviderRow, self->state().deviderCol); + self->state().position = position; + }, [=](actor worker, int rowPose, int maxScore, int maxRow, int maxCol) { self->state().workers[rowPose] = worker; @@ -66,7 +81,11 @@ namespace caf anon_mail(row, col).send(self->state().workers[rowPose]); } }, - [=](int maxLenQuery, int maxLenSubject) {}, + [=](int maxLenQuery, int maxLenSubject) + { + self->state().maxLenQuery = maxLenQuery; + self->state().maxLenSubject = maxLenSubject; + }, [=](std::string exit) { self->quit(); diff --git a/src/actors/serialActor.cpp b/src/actors/serialActor.cpp index 944d6aad05a02437bc7afc1eb5b2d5e2d4783f0b..b9072a42beab0073aa05eef50e057b77a9070b1a 100644 --- a/src/actors/serialActor.cpp +++ b/src/actors/serialActor.cpp @@ -11,11 +11,12 @@ namespace caf // receive the messages return { - [=](int maxLenQuery, int maxLenSubject) + [=](actor manager, int maxLenQuery, int maxLenSubject) { self->state().scoreMatrix = std::vector<std::vector<int>>(maxLenQuery + 1, std::vector<int>(maxLenSubject + 1, 0)); + self->state().manager = manager; }, - [=](actor manager, int position, std::string seq1, std::string seq2) + [=](int position, std::string seq1, std::string seq2) { int m = seq1.length(); int n = seq2.length(); @@ -81,11 +82,7 @@ namespace caf } self->println("Pair: {}, with score: {}, \nAligned seq1: {}, \nAligned seq2: {}\n", position, maxScore, alignedSeq1, alignedSeq2); - anon_mail(self, position, maxScore).send(manager); - }, - [=](actor serverOrManager) - { - anon_mail(self).send(serverOrManager); + anon_mail(self, position, maxScore).send(self->state().manager); }, [=](std::string exit) {