/* XP Motif Java code to extract XP motifs, written for use in a KNIME Java node. `motifs_found` will contain the results. */ import java.util.regex.*; import java.util.Arrays; import java.util.HashMap; public void snippet() throws TypeException, ColumnException, Abort { String seq = c_Maskedsequence.toUpperCase().trim(); int[] vec = new int[seq.length()]; Arrays.fill(vec, 0); HashMap motifs_found = new HashMap(); for (String base : new String[] { "PPPPP", "PPPP", "PPP", "PP", "P" }) { /* * Note how proline as at the end of the list: this is VERY important. It means thats 'PPPP' is considered less important to match * than 'SPPP' whenever possible. This is a deliberate design decision as the HRGP family features this. For other families it may * not hold. */ for (char aa : new char[] { 'A', 'C', 'D', 'E', 'F', 'G', 'H', 'I', 'K', 'L', 'M', 'N', 'Q', 'R', 'S', 'T', 'W', 'V', 'Y', 'P' }) { String motif = aa+base; int cnt = 0; Pattern p = Pattern.compile(motif); Matcher m = p.matcher(seq); while (m.find()) { boolean overlapping = false; for (int i=m.start(); i 0) { overlapping = true; break; } } if (!overlapping) { cnt++; Arrays.fill(vec, m.start(), m.end(), 1); } } motifs_found.put(motif, cnt); } } }