/*

Motif and amino-acid bias (MAAB) pipeline

Stage 2 of MAAB takes sequences with indicative HRGP compositional biases (AGP, EXT, PRP) and classifies into one of twenty-four MAAB classes
(23 HRGP classes and a final MAAB class, with <15% known HRGP motifs).

These stages are represented in the following 3 parts of Java code. Refer to Figure 3 of Johnson et al. (2016).

*/


// Stage 2, Part 1: Primary classification

int stringency = 2;

if (($%PAST$ >= $%PSKY$+stringency) &&
	($%PAST$ >= $%PVYK$+stringency)) {
	return "AGP";
}

if (($%PSKY$ >= $%PAST$+stringency) &&
	($%PSKY$ >= $%PVYK$+stringency)) {
	return "Extensin";
}

if (($%PVYK$ >= $%PAST$+stringency) &&
	($%PVYK$ >= $%PSKY$+stringency)) {
	return "PRP";
}

return "Unknown";




// Stage 2, Part 2: Motif Analysis to Final classification of MAAB Classes 1 to 18

int       n_agp = c_FullsequencenumberofAGPmotifsaccepted;
double   final_agp = n_agp / 2;        // since AGP motifs are shorter than Ext/PRP motifs we "normalise" using this
int       n_ext = c_FullsequencenumberofExtensinmotifsaccepted;
int       n_prp = c_FullsequencenumberofPRPmotifsaccepted;
String   family = c_PrimaryClassification;
boolean has_gpi = (c_HasGPIanchorprediction != null && c_HasGPIanchorprediction);
int    n_serpro = c_FullsequencenumberofextensinSPnmotifs;
int     n_tyro  = c_Fullsequencenumberofextensinmotifswithtyrosine;

double pc_hrgp_motifs = c_FullsequenceAGPmotifsnooverlapwithPRPorExtensinmotifspermitted +
            c_FullsequenceExtensinmotifsnooverlapswithPRPmotifspermitted +
            c_FullsequencePRPmotifs;

if (pc_hrgp_motifs < 15.0d) {
    out_FinalClassification = "24 HRGP";
}

double ext_motif_ratio = (n_tyro > 0) ? (((double)n_serpro)/n_tyro) : 100.0d;
boolean is_ok_extensin = (n_tyro >= 2 && n_serpro >= 2) ?
            (ext_motif_ratio >= 0.25 && ext_motif_ratio <= 4.0d) : false;

if (family.startsWith("AGP")) {
    if (final_agp >= n_ext && final_agp >= n_prp) {
        out_FinalClassification =  has_gpi ? "1 Classical GPI-anchored AGP" : "4 Non GPI-anchored AGP";
    } else {
        if (is_ok_extensin) {
            out_FinalClassification =  "5 AGP Bias, high Ext (SPn & Y)";
        } else if (ext_motif_ratio > 4.0d) {
            out_FinalClassification =  "6 AGP Bias, high SPn mostly";
        } else if (ext_motif_ratio < 0.25d) {
            out_FinalClassification =  "7 AGP Bias, high Tyr mostly";
        } else if (n_prp > 0) {
            out_FinalClassification =  "8 AGP Bias, high PRP";
        } else {
            out_FinalClassification =  "ERROR";
        }
    }
} else if (family.startsWith("Ext")) {
    if (is_ok_extensin) {
        out_FinalClassification =  has_gpi ? "9 GPI-anchored Extensin" : "2 Classical Extensin";
    } else {
        if (n_prp > 0) {
            out_FinalClassification =  "13 Ext Bias, high PRP";
        } else if (ext_motif_ratio < 0.25d) {
            out_FinalClassification =  "12 Ext Bias, high Tyr mostly";
        } else if (ext_motif_ratio > 4.0d) {
            out_FinalClassification =  "11 Ext Bias, high SPn mostly";
        } else if (final_agp > 0) {
            out_FinalClassification =  "10 Ext Bias, high AGP";
        } else {
            out_FinalClassification =  "ERROR";
        }
    }
} else if (family.startsWith("PRP")) {
    if (n_prp >= final_agp && n_prp >= n_ext) {
        out_FinalClassification =  has_gpi ? "14 GPI-anchored PRP" : "3 Classical PRP";
    } else {
        if (n_ext >= n_prp && (n_serpro >= 2 && n_tyro >= 2)) {
            out_FinalClassification =  "16 PRP Bias, high Ext (SPn & Y)";
        } else if (ext_motif_ratio > 4.0d) {
            out_FinalClassification =  "17 PRP Bias, high SPn mostly";
        } else if (ext_motif_ratio < 0.25d && n_serpro > 0) {
            out_FinalClassification =  "18 PRP Bias, high Tyr mostly";
        } else if (final_agp > n_prp && final_agp > n_ext) {
            out_FinalClassification =  "15 PRP Bias, high AGP";
        } else {
            out_FinalClassification =  "ERROR";
        }
    }
} else {
    // mark anything else as HRGP which means it will be dealt with by the next step
    out_FinalClassification =  "HRGP";
}


// Stage 2, Part 3: Motif Analysis to Final classification of MAAB Classes 1 to 18

if ($Final classification$.equals("HRGP") ||
    $Final classification$.equals("ERROR")) {

    int n_agp    = $Full sequence: number of AGP motifs accepted$;
    int n_ext    = $Full sequence: number of Extensin motifs accepted$;
    int n_prp    = $Full sequence: number of PRP motifs accepted$;
    int final_agp= n_agp / 2;
    int n_serpro = $Full sequence: number of extensin SPn motifs$;
    int n_tyro   = $Full sequence: number of extensin motifs with tyrosine$;
    double ext_motif_ratio = (n_tyro > 0) ? ((double)n_serpro) / n_tyro : 100.0d;

    if (final_agp > n_ext && final_agp > n_prp) {
        return "19 HRGP Bias, high AGP";
    } else if (n_ext > n_prp && n_ext > final_agp && n_serpro >= 1 && n_tyro >= 1 &&
		ext_motif_ratio >= 0.25d && ext_motif_ratio <= 4.0d) {
        return "20 HRGP Bias, high Ext (SPn & Y)";
    } else if (n_prp > n_ext && n_prp > final_agp) {
	return "23 HRGP Bias, high PRP";
    } else if (n_serpro > n_tyro && n_serpro >= 1 && ext_motif_ratio >= 4.0d) {
        return "21 HRGP Bias, high SPn mostly";
    } else if (n_tyro > n_serpro && n_tyro >= 1 && ext_motif_ratio <= 0.25d) {
        return "22 HRGP Bias, high Tyr mostly";
    } else {
	double pc_past = $%PAST$;
	double pc_psky = $%PSKY$;
	double pc_pvyk = $%PVYK$;
	if (pc_past > pc_psky && pc_past > pc_pvyk) {
		return "19 HRGP Bias, high AGP";
	} else if (pc_psky > pc_past && pc_psky > pc_pvyk) {
		return "20 HRGP Bias, high Ext (SPn & Y)";
	} else if (pc_pvyk > pc_past && pc_pvyk > pc_psky) {
		return "23 HRGP Bias, high PRP";
	} else {
		return "ERROR - hrgp";		// Unclassified
	}
    }
}
return $Final classification$;
