package at.tugraz.genome.biojava.seq.greengenes;

import at.tugraz.genome.biojava.db.FormatDefinitionInterface;
import at.tugraz.genome.biojava.io.GenericEntry;
import at.tugraz.genome.biojava.seq.BioSequenceParserInterface;
import at.tugraz.genome.biojava.seq.fasta.FastaSequence;
import java.util.regex.Matcher;
import java.util.regex.Pattern;
import org.apache.batik.util.XMLConstants;

/* loaded from: input_file:at/tugraz/genome/biojava/seq/greengenes/GreenGenesParser.class */
public class GreenGenesParser implements BioSequenceParserInterface<GreenGenesSequence> {
    private Pattern accession_pattern = Pattern.compile(".*prokMSA_id=([^\\s]*).*");
    private Pattern description_pattern = Pattern.compile(".*prokMSAname=([^\\s]*).*");

    private String[] matchPattern(Pattern pattern, String str, int i) {
        Matcher matcher = this.accession_pattern.matcher(str);
        if (matcher.find()) {
            return new String[]{matcher.group(1)};
        }
        return null;
    }

    @Override // at.tugraz.genome.biojava.seq.BioSequenceParserInterface
    public String[] getAccessions(String str) {
        return matchPattern(this.accession_pattern, str, 1);
    }

    @Override // at.tugraz.genome.biojava.io.GenericEntryParserInterface
    public String[] getIds(String str) {
        return getAccessions(str);
    }

    @Override // at.tugraz.genome.biojava.seq.BioSequenceParserInterface
    public String[] getDescriptions(String str) {
        return matchPattern(this.description_pattern, str, 1);
    }

    @Override // at.tugraz.genome.biojava.io.GenericEntryParserInterface
    public FormatDefinitionInterface getFormatDefinition() {
        return new GreenGenesFormatDefinition();
    }

    public int getType() {
        return GreenGenesFormatDefinition.TYPE;
    }

    @Override // at.tugraz.genome.biojava.io.GenericEntryParserInterface
    public GreenGenesSequence parseEntry(String str) {
        String[] split;
        if (str == null || (split = str.split("\n")) == null) {
            return null;
        }
        GreenGenesSequence greenGenesSequence = new GreenGenesSequence();
        for (String str2 : split) {
            if (!str2.trim().startsWith("BEGIN") && !str2.trim().startsWith("END")) {
                String[] split2 = str2.split(XMLConstants.XML_EQUAL_SIGN);
                if (split2.length >= 2) {
                    if (split2[0].compareTo("prokMSA_id") == 0) {
                        greenGenesSequence.setAccession(split2[1]);
                    } else if (split2[0].compareTo("aligned_seq") == 0) {
                        greenGenesSequence.setSequence(split2[1]);
                    } else {
                        String substring = str2.substring(str2.indexOf(XMLConstants.XML_EQUAL_SIGN));
                        if (substring != null && substring.trim().length() > 0) {
                            greenGenesSequence.addAttribute(split2[0], str2.substring(str2.indexOf(XMLConstants.XML_EQUAL_SIGN) + 1));
                        }
                    }
                }
            }
        }
        return greenGenesSequence;
    }

    @Override // at.tugraz.genome.biojava.io.GenericEntryParserInterface
    public GreenGenesSequence parseEntry(GenericEntry genericEntry) {
        if (genericEntry == null) {
            return null;
        }
        return parseEntry(genericEntry.getContent());
    }

    public FastaSequence parseFastaSequenceFromEntry(String str) {
        String[] split;
        if (str == null || (split = str.split("\n")) == null) {
            return null;
        }
        FastaSequence fastaSequence = null;
        String str2 = null;
        String str3 = null;
        String str4 = null;
        String str5 = null;
        for (String str6 : split) {
            if (!str6.trim().startsWith("BEGIN") && !str6.trim().startsWith("END")) {
                String[] split2 = str6.split(XMLConstants.XML_EQUAL_SIGN);
                if (split2.length >= 2) {
                    if (split2[0].compareTo("prokMSA_id") == 0) {
                        str3 = split2[1];
                    } else if (split2[0].compareTo("aligned_seq") == 0) {
                        str2 = split2[1];
                    } else if (split2[0].compareTo("ncbi_acc_w_ver") == 0) {
                        str4 = split2[1];
                    } else if (split2[0].compareTo("prokMSAname") == 0) {
                        str5 = split2[1];
                    }
                }
            }
        }
        if (str3 != null && str2 != null) {
            StringBuilder sb = new StringBuilder(">");
            sb.append(str3);
            sb.append(" ");
            if (str4 != null) {
                sb.append(str4);
            }
            sb.append(" ");
            if (str5 != null) {
                sb.append(str5);
            }
            fastaSequence = new FastaSequence(sb.toString(), str2);
        }
        return fastaSequence;
    }
}
