KEGGのPathway MapのXMLをparseして,矢印の関係を書き出すJavaプログラム
KEGGのPathwayのデータ(ftp.genome.ad.jp/pub/kegg/xml/map 下の*.xmlとか,ftp.genome.ad.jp/pub/kegg/xml/organisms/*/*.xml とか)をparseして,KEGG上の図で矢印で現れている部分を表示するプログラム.
とか動かすと
ko:K05087 -> ko:K00922 (activation)
ko:K05087 -> ko:K02649 (activation)
ko:K04361 -> ko:K05859 (activation,phosphorylation)
ko:K04363 -> ko:K05859 (activation,phosphorylation)
というふうに,結果が返ってくる.
細かいエラーハンドルは,なし.もちろん,DTDも無視.KGML 0.6.1以外では動くかどうか,わかりませんです,はい.
Bio** 使えって話もありますが,マニュアル見るより書いたほうが早かったので,書きました.
あと, 遺伝子(プロテイン)A,Bがcomplexで,遺伝子Cに向かって矢印が在る場合,A->C, B->Cの両方が出てきます.
import java.io.*; import java.util.ArrayList; import java.util.HashMap; import javax.xml.parsers.*; import org.w3c.dom.*; import org.xml.sax.SAXException; public class KEGGRelation { private String filename = ""; private HashMap<Integer, Element> idToElement; public KEGGRelation(String string) { filename = string; } public static void main(String[] args) { KEGGRelation kegg = new KEGGRelation(args[0]); try { kegg.parse(); } catch (Exception e) { e.printStackTrace(); System.err.println("XML parse error"); } } private void parse() throws ParserConfigurationException, SAXException, IOException { DocumentBuilderFactory dbfactory = DocumentBuilderFactory.newInstance(); DocumentBuilder builder = dbfactory.newDocumentBuilder(); Document doc = builder.parse(new File(filename)); Element root = doc.getDocumentElement(); // parse <entry ...></entry> elements parseEntry(root.getElementsByTagName("entry")); // parse and print <relation></relation> elements showRelations(root.getElementsByTagName("relation")); } private void parseEntry(NodeList entries) { idToElement = new HashMap<Integer, Element>(); int length = entries.getLength(); for(int i = 0; i < length; i++ ) { Element e = (Element)entries.item(i); idToElement.put(Integer.parseInt(e.getAttribute("id")), e); } } private void showRelations(NodeList relations) { int length = relations.getLength(); for(int i = 0; i < length; i++ ) { Element e = (Element)relations.item(i); Element e1 = idToElement.get( Integer.parseInt(e.getAttribute("entry1"))); Element e2 = idToElement.get( Integer.parseInt(e.getAttribute("entry2"))); showSubRelations(e1, e2, e.getElementsByTagName("subtype")); } } private void showSubRelations(Element e1, Element e2, NodeList subtypes) { NodeList componentsForE1 = e1.getElementsByTagName("component"); NodeList componentsForE2 = e2.getElementsByTagName("component"); if(componentsForE1.getLength() != 0 ) { // when e1 contains complex ArrayList<Integer> ids = extractComponentIds(componentsForE1); for(int i: ids) { Element subE1 = idToElement.get(i); showSubRelations(subE1, e2, subtypes); } } else if ( componentsForE2.getLength() != 0) { // when e2 contains complex ArrayList<Integer> ids = extractComponentIds(componentsForE2); for(int i: ids) { Element subE2 = idToElement.get(i); showSubRelations(e1, subE2, subtypes); } } else { // no sub component String[] e1names = e1.getAttribute("name").split(" "); String[] e2names = e2.getAttribute("name").split(" "); // subtype name String subtypeName = ""; for(int i = 0; i < subtypes.getLength(); i++ ) subtypeName += "," + ((Element)subtypes.item(i)).getAttribute("name"); for(String e1name: e1names) for(String e2name: e2names ) { System.out.println(e1name + " -> " + e2name + " (" + subtypeName.substring(1) + ")" ); } } } private ArrayList<Integer> extractComponentIds(NodeList components) { ArrayList<Integer> ids = new ArrayList<Integer>(); int length = components.getLength(); for(int i = 0; i < length; i++ ) { Element e = (Element)components.item(i); ids.add(Integer.parseInt(e.getAttribute("id"))); } return ids; } }