PatientLevelPrediction
PatientLevelPrediction copied to clipboard
Code base refactor - Discussion
A place to discuss the refactor of PLP and get an overview of the current and options for a prospective code base. Currently the project is file-organized and function-based. Below is a "class" diagram of all files and functions in the R folder.
classDiagram
class AdditionalCovariates {
createCohortCovariateSettings
getCohortCovariateData
}
class AndromedaHelperFunctions {
batchRestrict
calculatePrevs
limitCovariatesToPopulation
}
class CalibrationSummary {
getCalibrationSummary
getCalibrationSummary_binary
getCalibrationSummary_survival
}
class CovariateSummary {
aggregateCovariateSummaries
covariateSummary
covariateSummarySubset
createCovariateSubsets
getCovariatesForGroup
}
class CyclopsModels {
createCyclopsModel
filterCovariateIds
fitCyclopsModel
getCV
getVariableImportance
modelTypeToCyclopsModelType
predictCyclops
predictCyclopsType
reparamTransferCoefs
}
class CyclopsSettings {
setCoxModel
setIterativeHardThresholding
setLassoLogisticRegression
}
class DatabaseMigration {
getDataMigrator
migrateDataModel
}
class DataSplitting {
checkInputsSplit
createDefaultSplitSetting
dataSummary
randomSplitter
splitData
subjectSplitter
timeSplitter
}
class DemographicSummary {
getDemographicSummary
getDemographicSummary_binary
getDemographicSummary_survival
}
class DiagnosePlp {
cos_sim
diagnoseMultiplePlp
diagnosePlp
getDiagnostic
getMaxEndDaysFromCovariates
getOutcomeSummary
probastDesign
probastOutcome
probastParticipants
probastPredictors
}
class EvaluatePlp {
evaluatePlp
modelBasedConcordance
}
class EvaluationSummary {
aucWithCi
aucWithoutCi
averagePrecision
brierScore
calculateEStatisticsBinary
calibrationInLarge
calibrationInLargeIntercept
calibrationLine
calibrationWeak
computeAuc
getEvaluationStatistics
getEvaluationStatistics_binary
getEvaluationStatistics_survival
ici
}
class ExternalValidatePlp {
createValidationDesign
createValidationSettings
externalValidateDbPlp
externalValidatePlp
validateExternal
validateModel
}
class ExtractData {
createDatabaseDetails
createRestrictPlpDataSettings
getPlpData
print.plpData
print.summary.plpData
summary.plpData
}
class FeatureEngineering {
calculateStratifiedMeans
createFeatureEngineeringSettings
createRandomForestFeatureSelection
createSplineSettings
createStratifiedImputationSettings
createUnivariateFeatureSelection
featureEngineer
imputeMissingMeans
randomForestFeatureSelection
splineCovariates
splineMap
stratifiedImputeCovariates
univariateFeatureSelection
}
class FeatureImportance {
permute
permutePerf
pfi
}
class Fit {
fitPlp
}
class Formatting {
checkRam
MapIds
toSparseM
}
class GradientBoostingMachine {
fitXgboost
predictXgboost
setGradientBoostingMachine
varImpXgboost
}
class HelperFunctions {
configurePython
createTempModelLoc
cut2
ensure_installed
getOs
is_installed
listAppend
nrow
nrow.default
nrow.tbl
removeInvalidString
setPythonEnvironment
}
class ImportFromCsv {
extractCohortDefinitionsCSV
extractDatabaseListCSV
extractDiagnosticFromCsv
extractObjectFromCsv
getModelDesignCsv
getModelDesignSettingTable
getPerformanceEvaluationCsv
getTableNamesPlp
insertCsvToDatabase
}
class KNN {
fitKNN
predictKnn
setKNN
}
class LearningCurve {
createLearningCurve
getTrainFractions
lcWrapper
learningCurveHelper
plotLearningCurve
}
class LightGBM {
fitLightGBM
predictLightGBM
setLightGBM
varImpLightGBM
}
class Logging {
checkFileExists
closeLog
createLog
createLogSettings
}
class ParamChecks {
checkBoolean
checkHigher
checkHigherEqual
checkInStringVector
checkIsClass
checkLower
checkLowerEqual
checkNotNull
}
class PatientLevelPrediction
class Plotting {
outcomeSurvivalPlot
plotDemographicSummary
plotF1Measure
plotGeneralizability
plotPlp
plotPrecisionRecall
plotPredictedPDF
plotPredictionDistribution
plotPreferencePDF
plotSmoothCalibration
plotSmoothCalibrationLoess
plotSmoothCalibrationRcs
plotSparseCalibration
plotSparseCalibration2
plotSparseRoc
plotVariableScatterplot
}
class PopulationSettings {
createStudyPopulation
createStudyPopulationSettings
getCounts
getCounts2
}
class Predict {
applyFeatureengineering
applyTidyCovariateData
predictPlp
}
class PredictionDistribution {
getPredictionDistribution
getPredictionDistribution_binary
getPredictionDistribution_survival
}
class PreprocessingData {
createPreprocessSettings
preprocessData
}
class RClassifier {
applyCrossValidationInR
fitRclassifier
}
class Recalibration {
inverseLog
logFunct
recalibratePlp
recalibratePlpRefit
recalibrationInTheLarge
weakRecalibration
}
class RunMultiplePlp {
convertToJson
createModelDesign
loadPlpAnalysesJson
runMultiplePlp
savePlpAnalysesJson
validateMultiplePlp
}
class RunPlp {
runPlp
}
class RunPlpHelpers {
checkInputs
createDefaultExecuteSettings
createExecuteSettings
printHeader
}
class Sampling {
createSampleSettings
overSampleData
sameData
sampleData
underSampleData
}
class SaveLoadPlp {
applyMinCellCount
extractDatabaseToCsv
getPlpSensitiveColumns
loadPlpData
loadPlpModel
loadPlpResult
loadPlpShareable
loadPrediction
removeCellCount
removeList
saveModelPart
savePlpData
savePlpModel
savePlpResult
savePlpShareable
savePrediction
}
class Simulation {
simulatePlpData
}
class SklearnClassifier {
checkPySettings
computeGridPerformance
fitPythonModel
fitSklearn
gridCvPython
predictPythonSklearn
predictValues
}
class SklearnClassifierHelpers {
listCartesian
}
class SklearnClassifierSettings {
AdaBoostClassifierInputs
DecisionTreeClassifierInputs
GaussianNBInputs
MLPClassifierInputs
RandomForestClassifierInputs
setAdaBoost
setDecisionTree
setMLP
setNaiveBayes
setRandomForest
setSVM
SVCInputs
}
class SklearnToJson {
deSerializeAdaboost
deSerializeCsrMatrix
deSerializeDecisionTree
deSerializeMlp
deSerializeNaiveBayes
deSerializeRandomForest
deSerializeSVM
deSerializeTree
serializeAdaboost
serializeCsrMatrix
serializeDecisionTree
serializeMLP
serializeNaiveBayes
serializeRandomForest
serializeSVM
serializeTree
sklearnFromJson
sklearnToJson
}
class ThresholdSummary {
accuracy
checkToByTwoTableInputs
diagnosticOddsRatio
f1Score
falseDiscoveryRate
falseNegativeRate
falseOmissionRate
falsePositiveRate
getThresholdSummary
getThresholdSummary_binary
getThresholdSummary_survival
negativeLikelihoodRatio
negativePredictiveValue
positiveLikelihoodRatio
positivePredictiveValue
sensitivity
specificity
stdca
}
class uploadToDatabase {
addCohort
addDatabase
addModel
addMultipleRunPlpToDatabase
addRunPlpToDatabase
checkJson
checkTable
cleanNum
createDatabaseList
createDatabaseSchemaSettings
createPlpResultTables
deleteTables
enc
getCohortDef
getPlpResultTables
getResultLocations
insertModelInDatabase
insertResultsToSqlite
insertRunPlpToSqlite
}
class uploadToDatabaseDiagnostics {
addDiagnosePlpToDatabase
addDiagnostic
addMultipleDiagnosePlpToDatabase
addResultTable
insertDiagnosisToDatabase
}
class uploadToDatabaseModelDesign {
addCovariateSetting
addFESetting
addModelDesign
addModelSetting
addPlpDataSetting
addPopulationSetting
addSampleSetting
addSplitSettings
addTar
addTidySetting
insertModelDesignInDatabase
insertModelDesignSettings
orderJson
}
class uploadToDatabasePerformance {
addAttrition
addCalibrationSummary
addCovariateSummary
addDemographicSummary
addEvaluation
addEvaluationStatistics
addPerformance
addPredictionDistribution
addThresholdSummary
checkResultExists
getColumnNames
insertPerformanceInDatabase
}
class ViewShinyPlp {
viewDatabaseResultPlp
viewMultiplePlp
viewPlp
viewPlps
}
Related resources: Draft PR for new model API: https://github.com/OHDSI/PatientLevelPrediction/pull/462