/*

XP Motif

Java code to extract XP motifs, written for use in a KNIME Java node.

`motifs_found` will contain the results.

*/

import java.util.regex.*;
import java.util.Arrays;
import java.util.HashMap;



public void snippet() throws TypeException, ColumnException, Abort {

String seq = c_Maskedsequence.toUpperCase().trim();
int[] vec = new int[seq.length()];
Arrays.fill(vec, 0);

HashMap<String,Integer> motifs_found = new HashMap<String,Integer>();
for (String base : new String[] { "PPPPP", "PPPP", "PPP", "PP", "P" }) {
	/*
	 * Note how proline as at the end of the list: this is VERY important. It means thats 'PPPP' is considered less important to match
	 * than 'SPPP' whenever possible. This is a deliberate design decision as the HRGP family features this. For other families it may
	 * not hold.
	 */
	for (char aa : new char[] { 'A', 'C', 'D', 'E', 'F', 'G', 'H', 'I', 'K', 'L', 'M', 'N', 'Q', 'R', 'S', 'T', 'W', 'V', 'Y', 'P' }) {
		String motif = aa+base;
		int    cnt = 0;
		Pattern p = Pattern.compile(motif);
		Matcher m = p.matcher(seq);
		while (m.find()) {
			boolean overlapping = false;
			for (int i=m.start(); i<m.end(); i++) {
				if (vec[i] > 0) {
					overlapping = true; break;
				}
			}
			if (!overlapping) {
				cnt++;
				Arrays.fill(vec, m.start(), m.end(), 1);
			}
		}


		motifs_found.put(motif, cnt);
	}
}

}
