seqan
seqan copied to clipboard
MyersBitVector (Banded) returns wrong Score in some cases.
The issue only seems to occure when length(reference) == length(pattern)
#include <seqan/basic.h>
#include <seqan/sequence.h>
#include <seqan/align.h>
#include <seqan/index.h>
#include <seqan/seq_io.h>
#include <iostream>
#include <seqan/align.h>
using namespace seqan;
using namespace std;
template<typename TString, typename TNeedle, typename TError>
inline bool verification(TString & text, TNeedle & needle, TError maxErrors)
{
typedef Finder<TString> TMyersFinder;
typedef PatternState_<TNeedle, Myers<AlignTextBanded<FindInfix, NMatchesNone_, NMatchesNone_>, True, void> > TPatternState;
TPatternState patternState;
typedef PatternState_<TNeedle, Myers<AlignTextBanded<FindPrefix, NMatchesNone_, NMatchesNone_>, True, void> > TPatternStatePrefix;
TPatternStatePrefix patternPrefixState;
uint8_t minErrors = maxErrors + 1;
std::cout << "Seqs: \n" << "reference: " << text << "\nneedle: " << needle << "\n";
TMyersFinder finder(text);
uint8_t mErrors = maxErrors * 4;
int startPos = 99;
while (find(finder, needle, patternPrefixState, -static_cast<int>(maxErrors * 4)))
{
int currentEnd = position(finder) + 1;
int currentErrors = -getScore(patternPrefixState);
std::cout << "Loop errors: " << currentErrors << "\tselected end: " << currentEnd << "\n";
if (currentErrors <= mErrors)
{
mErrors = currentErrors;
startPos = currentEnd;
}
}
std::cout << "BEST error: " << (int)mErrors << "\n";
int score = globalAlignmentScore(needle, text, MyersBitVector());
std::cout << "score global Alignment function :" << score << "\n";
}
int main(int argc, char const * argv[])
{
// The bug only occurres when length(reference) == length(needle);
Dna5String reference = "TAAATATTATT";
Dna5String needle = "TAAAATATTAT";
verification(reference, needle, 3);
// This works
// Dna5String needle = "TATTATAAAAT";
// Dna5String reference = "TTATTATAAAT";
return 0;
}
Cout:
Seqs:
reference: TAAATATTATT
needle: TAAAATATTAT
Loop errors: 4 selected end: 11
BEST error: 4
score global Alignment function :-2
One more thing I noticed, where always the wrong score is reported. Dna5String reference = "TAAATATTAT"; Dna5String needle __= "TAAATAATTAT"; TAAATA--TTAT |||||||||||||||...|||||||| TAAATAATTAT BEST error: 4 score global Alignment function :- 1 Needle aligns to an infix of the reference (the whole reference in this case). I am not if is possible for myers to report the correct score, since never the needle is longer than the reference.
Here is an even shorter example, it seem to be connect to band and having more Insertions than deletion in the needle: Dna5String reference = "ACGTA"; Dna5String needle = "ACCGT"; verification(reference, needle, 3);
Dna5String reference = "ACGTA";
Dna5String needle = "ACCGTA";
verification(reference, needle, 3);
The non-banded version works flawless:
#include <seqan/basic.h>
#include <seqan/sequence.h>
#include <seqan/align.h>
#include <seqan/index.h>
#include <seqan/seq_io.h>
#include <iostream>
#include <seqan/align.h>
using namespace seqan;
using namespace std;
template<typename TString, typename TNeedle, typename TError>
inline bool verification(TString & text, TNeedle & needle, TError maxErrors)
{
typedef Finder<TString> TMyersFinder;
typedef Pattern<TNeedle const, Myers<FindInfix> > TMyersPattern;
typedef typename PatternState<TMyersPattern>::Type TPatternState;
typedef Pattern<TNeedle const, Myers<FindPrefix> > TMyersPrefixPattern;
typedef typename PatternState<TMyersPrefixPattern>::Type TPatternPrefixState;
TPatternState patternState;
TPatternPrefixState patternPrefixState;
uint8_t minErrors = maxErrors + 1;
std::cout << "Seqs: \n" << "reference: " << text << "\nneedle: " << needle << "\n";
TMyersFinder finder(text);
TMyersPrefixPattern pattern = needle;
uint8_t mErrors = maxErrors * 4;
int startPos = 99;
// find(finder, pattern, patternState, 5);
while (find(finder, pattern, patternPrefixState, 5))
{
int currentEnd = position(finder) + 1;
int currentErrors = -getScore(patternPrefixState);
std::cout << "Loop errors: " << currentErrors << "\tselected end: " << currentEnd << "\n";
if (currentErrors <= mErrors)
{
mErrors = currentErrors;
startPos = currentEnd;
}
}
std::cout << "BEST error: " << (int)mErrors << "\n";
int score = globalAlignmentScore(needle, text, MyersBitVector());
std::cout << "score global Alignment function :" << score << "\n";
}
int main(int argc, char const * argv[])
{
{
Dna5String reference = "ACGTA";
Dna5String needle = "ACGGGGTA";
verification(reference, needle, 3);
}
return 0;
}
I guess this should be fixed in SeqAn3.