/* Motif and amino-acid bias (MAAB) pipeline Stage 2 of MAAB takes sequences with indicative HRGP compositional biases (AGP, EXT, PRP) and classifies into one of twenty-four MAAB classes (23 HRGP classes and a final MAAB class, with <15% known HRGP motifs). These stages are represented in the following 3 parts of Java code. Refer to Figure 3 of Johnson et al. (2016). */ // Stage 2, Part 1: Primary classification int stringency = 2; if (($%PAST$ >= $%PSKY$+stringency) && ($%PAST$ >= $%PVYK$+stringency)) { return "AGP"; } if (($%PSKY$ >= $%PAST$+stringency) && ($%PSKY$ >= $%PVYK$+stringency)) { return "Extensin"; } if (($%PVYK$ >= $%PAST$+stringency) && ($%PVYK$ >= $%PSKY$+stringency)) { return "PRP"; } return "Unknown"; // Stage 2, Part 2: Motif Analysis to Final classification of MAAB Classes 1 to 18 int n_agp = c_FullsequencenumberofAGPmotifsaccepted; double final_agp = n_agp / 2; // since AGP motifs are shorter than Ext/PRP motifs we "normalise" using this int n_ext = c_FullsequencenumberofExtensinmotifsaccepted; int n_prp = c_FullsequencenumberofPRPmotifsaccepted; String family = c_PrimaryClassification; boolean has_gpi = (c_HasGPIanchorprediction != null && c_HasGPIanchorprediction); int n_serpro = c_FullsequencenumberofextensinSPnmotifs; int n_tyro = c_Fullsequencenumberofextensinmotifswithtyrosine; double pc_hrgp_motifs = c_FullsequenceAGPmotifsnooverlapwithPRPorExtensinmotifspermitted + c_FullsequenceExtensinmotifsnooverlapswithPRPmotifspermitted + c_FullsequencePRPmotifs; if (pc_hrgp_motifs < 15.0d) { out_FinalClassification = "24 HRGP"; } double ext_motif_ratio = (n_tyro > 0) ? (((double)n_serpro)/n_tyro) : 100.0d; boolean is_ok_extensin = (n_tyro >= 2 && n_serpro >= 2) ? (ext_motif_ratio >= 0.25 && ext_motif_ratio <= 4.0d) : false; if (family.startsWith("AGP")) { if (final_agp >= n_ext && final_agp >= n_prp) { out_FinalClassification = has_gpi ? "1 Classical GPI-anchored AGP" : "4 Non GPI-anchored AGP"; } else { if (is_ok_extensin) { out_FinalClassification = "5 AGP Bias, high Ext (SPn & Y)"; } else if (ext_motif_ratio > 4.0d) { out_FinalClassification = "6 AGP Bias, high SPn mostly"; } else if (ext_motif_ratio < 0.25d) { out_FinalClassification = "7 AGP Bias, high Tyr mostly"; } else if (n_prp > 0) { out_FinalClassification = "8 AGP Bias, high PRP"; } else { out_FinalClassification = "ERROR"; } } } else if (family.startsWith("Ext")) { if (is_ok_extensin) { out_FinalClassification = has_gpi ? "9 GPI-anchored Extensin" : "2 Classical Extensin"; } else { if (n_prp > 0) { out_FinalClassification = "13 Ext Bias, high PRP"; } else if (ext_motif_ratio < 0.25d) { out_FinalClassification = "12 Ext Bias, high Tyr mostly"; } else if (ext_motif_ratio > 4.0d) { out_FinalClassification = "11 Ext Bias, high SPn mostly"; } else if (final_agp > 0) { out_FinalClassification = "10 Ext Bias, high AGP"; } else { out_FinalClassification = "ERROR"; } } } else if (family.startsWith("PRP")) { if (n_prp >= final_agp && n_prp >= n_ext) { out_FinalClassification = has_gpi ? "14 GPI-anchored PRP" : "3 Classical PRP"; } else { if (n_ext >= n_prp && (n_serpro >= 2 && n_tyro >= 2)) { out_FinalClassification = "16 PRP Bias, high Ext (SPn & Y)"; } else if (ext_motif_ratio > 4.0d) { out_FinalClassification = "17 PRP Bias, high SPn mostly"; } else if (ext_motif_ratio < 0.25d && n_serpro > 0) { out_FinalClassification = "18 PRP Bias, high Tyr mostly"; } else if (final_agp > n_prp && final_agp > n_ext) { out_FinalClassification = "15 PRP Bias, high AGP"; } else { out_FinalClassification = "ERROR"; } } } else { // mark anything else as HRGP which means it will be dealt with by the next step out_FinalClassification = "HRGP"; } // Stage 2, Part 3: Motif Analysis to Final classification of MAAB Classes 1 to 18 if ($Final classification$.equals("HRGP") || $Final classification$.equals("ERROR")) { int n_agp = $Full sequence: number of AGP motifs accepted$; int n_ext = $Full sequence: number of Extensin motifs accepted$; int n_prp = $Full sequence: number of PRP motifs accepted$; int final_agp= n_agp / 2; int n_serpro = $Full sequence: number of extensin SPn motifs$; int n_tyro = $Full sequence: number of extensin motifs with tyrosine$; double ext_motif_ratio = (n_tyro > 0) ? ((double)n_serpro) / n_tyro : 100.0d; if (final_agp > n_ext && final_agp > n_prp) { return "19 HRGP Bias, high AGP"; } else if (n_ext > n_prp && n_ext > final_agp && n_serpro >= 1 && n_tyro >= 1 && ext_motif_ratio >= 0.25d && ext_motif_ratio <= 4.0d) { return "20 HRGP Bias, high Ext (SPn & Y)"; } else if (n_prp > n_ext && n_prp > final_agp) { return "23 HRGP Bias, high PRP"; } else if (n_serpro > n_tyro && n_serpro >= 1 && ext_motif_ratio >= 4.0d) { return "21 HRGP Bias, high SPn mostly"; } else if (n_tyro > n_serpro && n_tyro >= 1 && ext_motif_ratio <= 0.25d) { return "22 HRGP Bias, high Tyr mostly"; } else { double pc_past = $%PAST$; double pc_psky = $%PSKY$; double pc_pvyk = $%PVYK$; if (pc_past > pc_psky && pc_past > pc_pvyk) { return "19 HRGP Bias, high AGP"; } else if (pc_psky > pc_past && pc_psky > pc_pvyk) { return "20 HRGP Bias, high Ext (SPn & Y)"; } else if (pc_pvyk > pc_past && pc_pvyk > pc_psky) { return "23 HRGP Bias, high PRP"; } else { return "ERROR - hrgp"; // Unclassified } } } return $Final classification$;