Skip to content
Snippets Groups Projects
Commit 4a85835e authored by Langella Olivier's avatar Langella Olivier
Browse files

WIP: first fdr filter test

parent 6031ba4c
No related branches found
No related tags found
No related merge requests found
......@@ -24,7 +24,7 @@ SET_TARGET_PROPERTIES(gp-grouping PROPERTIES OUTPUT_NAME gp-grouping)
INSTALL(PROGRAMS ${CMAKE_BINARY_DIR}/src/cpp/bin/gp-grouping DESTINATION bin)
# gp-output-proticdbml
QT5_WRAP_CPP( GP_OUTPUT_MOC_SRCS gpoutputproticdbml.h )
ADD_EXECUTABLE(gp-output-proticdbml gpoutputproticdbml.cpp
output/proticdbml.cpp input/groupinghandler.cpp input/xtandemsaxhandler.cpp ${GP_OUTPUT_MOC_SRCS})
......@@ -33,3 +33,18 @@ TARGET_LINK_LIBRARIES(gp-output-proticdbml ${GP_LIB_NAME} Qt5::Core ${PAPPSOMSPP
SET_TARGET_PROPERTIES(gp-output-proticdbml PROPERTIES OUTPUT_NAME gp-output-proticdbml)
INSTALL(PROGRAMS ${CMAKE_BINARY_DIR}/src/cpp/bin/gp-output-proticdbml DESTINATION bin)
# gp-fdr-filter
QT5_WRAP_CPP( GP_FDR_FILTER_MOC_SRCS gpfdrfilter.h )
ADD_EXECUTABLE(gp-fdr-filter
gpfdrfilter.cpp
input/peptideresultsqvaluehandler.cpp
${GP_FDR_FILTER_MOC_SRCS}
)
target_include_directories (gp-fdr-filter PUBLIC ${PAPPSOMSPP_INCLUDE_DIR} ${Qt5Core_INCLUDE_DIRS})
TARGET_LINK_LIBRARIES(gp-fdr-filter ${GP_LIB_NAME} Qt5::Core ${PAPPSOMSPP_QT5_LIBRARY})
SET_TARGET_PROPERTIES(gp-fdr-filter PROPERTIES OUTPUT_NAME gp-fdr-filter)
INSTALL(PROGRAMS ${CMAKE_BINARY_DIR}/src/cpp/bin/gp-fdr-filter DESTINATION bin)
......@@ -27,13 +27,9 @@
#include <iostream>
#include <pappsomspp/pappsoexception.h>
#include "gp_lib_config.h"
#include "gp_engine.h"
#include "gp_params.h"
#include "gp_error.h"
#include "gp_engine.h"
#include "gpoutputproticdbml.h"
#include "output/proticdbml.h"
#include "input/groupinghandler.h"
#include "gpfdrfilter.h"
#include "../libgroupingprotein/gp_error.h"
#include "input/peptideresultsqvaluehandler.h"
GpFdrFilter::GpFdrFilter(QObject *parent) : QObject(parent)
......@@ -64,6 +60,7 @@ GpFdrFilter::run()
//-o /tmp/test.fasta
QTextStream errorStream(stderr, QIODevice::WriteOnly);
QTextStream outStream(stdout, QIODevice::WriteOnly);
try
{
......@@ -92,7 +89,7 @@ GpFdrFilter::run()
QCoreApplication::translate("main", "fdr"));
parser.addOption(outputOption);
parser.addOption(inputOption);
parser.addOption(fdrOption);
qDebug() << __FILE__ << " " << __FUNCTION__ << " " << __LINE__;
......@@ -108,24 +105,21 @@ GpFdrFilter::run()
const QStringList args = parser.positionalArguments();
QString groupingFileStr = parser.value(inputOption);
QString fastaFileStr = parser.value(fastaOption);
QString peptidesFileStr = parser.value(inputOption);
QString targetFdrStr = parser.value(fdrOption);
QString proticFileStr = parser.value(outputOption);
if(!proticFileStr.isEmpty())
if(!peptidesFileStr.isEmpty())
{
ProticdbMl output(proticFileStr);
GroupingHandler parser(&output, fastaFileStr);
PeptideResultsQvalueHandler parser;
qDebug() << __FILE__ << " " << __FUNCTION__ << " " << __LINE__;
QXmlSimpleReader simplereader;
simplereader.setContentHandler(&parser);
simplereader.setErrorHandler(&parser);
QFile grouping_file(groupingFileStr);
QXmlInputSource xmlInputSource(&grouping_file);
QFile peptide_file(peptidesFileStr);
QXmlInputSource xmlInputSource(&peptide_file);
if(simplereader.parse(xmlInputSource))
{
......@@ -134,21 +128,23 @@ GpFdrFilter::run()
{
throw GpError(parser.errorString());
}
outStream << QString::number(
parser.getEvalueThresholdForFdr(targetFdrStr.toDouble()), 'f', 6);
}
qDebug() << __FILE__ << " " << __FUNCTION__ << " " << __LINE__;
}
catch(GpError &gperror)
{
errorStream << "Oops! an error occurred in PAPPSO MS tools. Dont Panic :"
<< endl;
errorStream << "Oops! an error occurred in GP. Dont Panic :" << endl;
errorStream << gperror.qwhat() << endl;
exit(1);
app->exit(1);
}
catch(pappso::PappsoException &error)
{
errorStream << "Oops! an error occurred in PAPPSO MS tools. Dont Panic :"
<< endl;
errorStream << "Oops! an error occurred in GP. Dont Panic :" << endl;
errorStream << error.qwhat() << endl;
exit(1);
app->exit(1);
......@@ -156,8 +152,7 @@ GpFdrFilter::run()
catch(std::exception &error)
{
errorStream << "Oops! an error occurred in PAPPSO MS tools. Dont Panic :"
<< endl;
errorStream << "Oops! an error occurred in GP. Dont Panic :" << endl;
errorStream << error.what() << endl;
exit(1);
app->exit(1);
......
/*******************************************************************************
* Copyright (c) 2019 Olivier Langella <Olivier.Langella@u-psud.fr>.
*
* This file is part of Protein Grouper.
*
* Protein Grouper is free software: you can redistribute it and/or modify
* it under the terms of the GNU General Public License as published by
* the Free Software Foundation, either version 3 of the License, or
* (at your option) any later version.
*
* Protein Grouper is distributed in the hope that it will be useful,
* but WITHOUT ANY WARRANTY; without even the implied warranty of
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
* GNU General Public License for more details.
*
* You should have received a copy of the GNU General Public License
* along with Protein Grouper. If not, see <http://www.gnu.org/licenses/>.
*
******************************************************************************/
#include "peptideresultsqvaluehandler.h"
#include <pappsomspp/pappsoexception.h>
PeptideResultsQvalueHandler::PeptideResultsQvalueHandler()
{
m_regexpDecoy.setPattern(".*\\|reversed$");
}
PeptideResultsQvalueHandler::~PeptideResultsQvalueHandler()
{
}
bool
PeptideResultsQvalueHandler::startElement(const QString &namespaceURI,
const QString &localName,
const QString &qName,
const QXmlAttributes &attributes)
{
// qDebug() << namespaceURI << " " << localName << " " << qName ;
_tag_stack.push_back(qName);
bool is_ok = true;
try
{
// startElement_group
if(qName == "scan")
{
is_ok = startElement_scan(attributes);
}
else if(qName == "psm")
{
is_ok = startElement_psm(attributes);
}
_current_text.clear();
}
catch(pappso::PappsoException &exception_pappso)
{
_errorStr = QObject::tr(
"ERROR in PeptideResultsQvalueHandler::startElement tag "
"%1, PAPPSO exception:\n%2")
.arg(qName)
.arg(exception_pappso.qwhat());
return false;
}
catch(std::exception &exception_std)
{
_errorStr = QObject::tr(
"ERROR in PeptideResultsQvalueHandler::startElement tag "
"%1, std exception:\n%2")
.arg(qName)
.arg(exception_std.what());
return false;
}
return is_ok;
}
bool
PeptideResultsQvalueHandler::endElement(const QString &namespaceURI,
const QString &localName,
const QString &qName)
{
bool is_ok = true;
// endElement_peptide_list
try
{
if(qName == "scan")
{
is_ok = endElement_scan();
}
}
catch(pappso::PappsoException &exception_pappso)
{
_errorStr = QObject::tr(
"ERROR in PeptideResultsQvalueHandler::endElement tag %1, "
"PAPPSO exception:\n%2")
.arg(qName)
.arg(exception_pappso.qwhat());
return false;
}
catch(std::exception &exception_std)
{
_errorStr = QObject::tr(
"ERROR in PeptideResultsQvalueHandler::endElement tag %1, "
"std exception:\n%2")
.arg(qName)
.arg(exception_std.what());
return false;
}
_current_text.clear();
_tag_stack.pop_back();
return is_ok;
}
bool
PeptideResultsQvalueHandler::startDocument()
{
return true;
}
bool
PeptideResultsQvalueHandler::endDocument()
{
std::sort(m_scoreList.begin(),
m_scoreList.end(),
[](const PsmScore &scorea, PsmScore &scoreb) {
return (scorea.m_evalue < scoreb.m_evalue);
});
std::size_t count_decoy = 0;
std::size_t count_target = 0;
for(auto &score : m_scoreList)
{
if(score.m_isDecoy)
{
count_decoy++;
}
else
{
count_target++;
}
score.m_qvalue = count_decoy / count_target;
}
qDebug() << __FILE__ << " " << __FUNCTION__ << " " << __LINE__;
// check reverse list to clean q-values
auto rit = m_scoreList.rbegin();
double qvalue_max = 99999999;
while(rit != m_scoreList.rend())
{
if(rit->m_qvalue > qvalue_max)
{
rit->m_qvalue = qvalue_max;
}
qvalue_max = rit->m_qvalue;
rit++;
}
qDebug() << __FILE__ << " " << __FUNCTION__ << " " << __LINE__;
return true;
}
bool
PeptideResultsQvalueHandler::characters(const QString &str)
{
_current_text += str;
return true;
}
bool
PeptideResultsQvalueHandler::error(const QXmlParseException &exception)
{
_errorStr = QObject::tr(
"Parse error at line %1, column %2 :\n"
"%3")
.arg(exception.lineNumber())
.arg(exception.columnNumber())
.arg(exception.message());
return false;
}
bool
PeptideResultsQvalueHandler::fatalError(const QXmlParseException &exception)
{
_errorStr = QObject::tr(
"Parse error at line %1, column %2 :\n"
"%3")
.arg(exception.lineNumber())
.arg(exception.columnNumber())
.arg(exception.message());
return false;
}
QString
PeptideResultsQvalueHandler::errorString() const
{
return _errorStr;
}
bool
PeptideResultsQvalueHandler::startElement_scan(QXmlAttributes attrs)
{
bool is_ok = true;
m_oneScanScoreList.clear();
return is_ok;
}
bool
PeptideResultsQvalueHandler::startElement_psm(QXmlAttributes attributes)
{
QString psm_key(QString("%1-%2")
.arg(attributes.value("seq"))
.arg(attributes.value("mhTheo")));
auto ret = m_oneScanScoreList.insert(
std::pair<QString, PsmScore>(psm_key, PsmScore()));
ret.first->second.m_evalue = attributes.value("evalue").toDouble();
//|reversed
if(m_regexpDecoy.indexIn(attributes.value("prot"), 0) > -1)
{
ret.first->second.m_isDecoy = true;
}
return true;
}
bool
PeptideResultsQvalueHandler::endElement_scan()
{
bool is_ok = true;
for(auto &pair_scan_score : m_oneScanScoreList)
{
m_scoreList.push_back(pair_scan_score.second);
}
return is_ok;
}
double
PeptideResultsQvalueHandler::getEvalueThresholdForFdr(double target_fdr) const
{
double evalue = 0;
for(auto &score : m_scoreList)
{
if(score.m_qvalue > target_fdr)
{
return score.m_evalue;
}
}
return evalue;
}
/*******************************************************************************
* Copyright (c) 2019 Olivier Langella <Olivier.Langella@u-psud.fr>.
*
* This file is part of Protein Grouper.
*
* Protein Grouper is free software: you can redistribute it and/or modify
* it under the terms of the GNU General Public License as published by
* the Free Software Foundation, either version 3 of the License, or
* (at your option) any later version.
*
* Protein Grouper is distributed in the hope that it will be useful,
* but WITHOUT ANY WARRANTY; without even the implied warranty of
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
* GNU General Public License for more details.
*
* You should have received a copy of the GNU General Public License
* along with Protein Grouper. If not, see <http://www.gnu.org/licenses/>.
*
******************************************************************************/
#pragma once
#include <QXmlDefaultHandler>
struct PsmScore
{
double m_evalue = 0;
bool m_isDecoy = false;
double m_qvalue = 0;
};
class ProticdbMl;
class PeptideResultsQvalueHandler : public QXmlDefaultHandler
{
public:
PeptideResultsQvalueHandler();
virtual ~PeptideResultsQvalueHandler();
bool startElement(const QString &namespaceURI,
const QString &localName,
const QString &qName,
const QXmlAttributes &attributes) override;
bool endElement(const QString &namespaceURI,
const QString &localName,
const QString &qName) override;
bool startDocument() override;
bool endDocument() override;
bool characters(const QString &str) override;
bool fatalError(const QXmlParseException &exception) override;
bool error(const QXmlParseException &exception) override;
QString errorString() const override;
double getEvalueThresholdForFdr(double target_fdr) const;
private:
bool startElement_scan(QXmlAttributes attrs);
bool startElement_psm(QXmlAttributes attributes);
bool endElement_scan();
private:
std::vector<QString> _tag_stack;
QString _errorStr;
QString _current_text;
std::vector<PsmScore> m_scoreList;
std::map<QString, PsmScore> m_oneScanScoreList;
QRegExp m_regexpDecoy;
};
0% Loading or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment