| | 96 | |
| | 97 | When the XML source is too large to fit in the memory of xsltproc, I (Pierre Lindenbaum ) use a custom tool named '''xslstream''' that calls a new XSLT transformation for every chunks of data. For example say you want to convert the XML files of DBSNP ( [ftp://ftp.ncbi.nih.gov/snp/organisms/human_9606/XML/] e.g. ds_ch1.xml.gz is 1099375 KB ) with dbsnp2rdf.xsl ( http://code.google.com/p/lindenb/source/browse/trunk/src/xsl/dbsnp2rdf.xsl ). Download '''xsltstream''' from http://code.google.com/p/lindenb/downloads/list |
| | 98 | And then invoke: |
| | 99 | {{{ |
| | 100 | java -jar xsltstream.jar -x dbsnp2rdf.xsl -q "Rs" 'ftp://ftp.ncbi.nih.gov/snp/organisms/human_9606/XML/ds_ch1.xml.gz' |\ |
| | 101 | grep -v "rdf:RDF" |\ |
| | 102 | grep -v "<?xml |
| | 103 | }}} |
| | 104 | Result: |
| | 105 | {{{ |
| | 106 | (...) |
| | 107 | <o:SNP rdf:about="http://www.ncbi.nlm.nih.gov/snp/2854"> |
| | 108 | <dc:title>rs2854</dc:title> |
| | 109 | <o:taxon rdf:resource="http://www.ncbi.nlm.nih.gov/taxonomy/9606"/> |
| | 110 | <o:het rdf:datatype="http://www.w3.org/2001/XMLSchema#float">0.24</o:het> |
| | 111 | <o:hasMapping> |
| | 112 | <o:Mapping> |
| | 113 | <o:build rdf:resource="urn:void:ncbi:build:Celera/36_3"/> |
| | 114 | <o:chrom rdf:resource="urn:void:ncbi:chromosome:9606/chr1"/> |
| | 115 | <o:start rdf:datatype="http://www.w3.org/2001/XMLSchema#int">196613685</o:start> |
| | 116 | <o:end rdf:datatype="http://www.w3.org/2001/XMLSchema#int">196613686</o:end> |
| | 117 | <o:orient>+</o:orient> |
| | 118 | </o:Mapping> |
| | 119 | </o:hasMapping> |
| | 120 | <o:hasMapping> |
| | 121 | <o:Mapping> |
| | 122 | <o:build rdf:resource="urn:void:ncbi:build:HuRef/36_3"/> |
| | 123 | <o:chrom rdf:resource="urn:void:ncbi:chromosome:9606/chr1"/> |
| | 124 | <o:start rdf:datatype="http://www.w3.org/2001/XMLSchema#int">194069483</o:start> |
| | 125 | <o:end rdf:datatype="http://www.w3.org/2001/XMLSchema#int">194069484</o:end> |
| | 126 | <o:orient>+</o:orient> |
| | 127 | </o:Mapping> |
| | 128 | </o:hasMapping> |
| | 129 | <o:hasMapping> |
| | 130 | <o:Mapping> |
| | 131 | <o:build rdf:resource="urn:void:ncbi:build:reference/36_3"/> |
| | 132 | <o:chrom rdf:resource="urn:void:ncbi:chromosome:9606/chr1"/> |
| | 133 | <o:start rdf:datatype="http://www.w3.org/2001/XMLSchema#int">221460932</o:start> |
| | 134 | <o:end rdf:datatype="http://www.w3.org/2001/XMLSchema#int">221460933</o:end> |
| | 135 | <o:orient>+</o:orient> |
| | 136 | </o:Mapping> |
| | 137 | </o:hasMapping> |
| | 138 | </o:SNP> |
| | 139 | <o:SNP rdf:about="http://www.ncbi.nlm.nih.gov/snp/2866"> |
| | 140 | <dc:title>rs2866</dc:title> |
| | 141 | <o:taxon rdf:resource="http://www.ncbi.nlm.nih.gov/taxonomy/9606"/> |
| | 142 | <o:het rdf:datatype="http://www.w3.org/2001/XMLSchema#float">0.50</o:het> |
| | 143 | <o:hasMapping> |
| | 144 | <o:Mapping> |
| | 145 | <o:build rdf:resource="urn:void:ncbi:build:Celera/36_3"/> |
| | 146 | <o:chrom rdf:resource="urn:void:ncbi:chromosome:9606/chr1"/> |
| | 147 | <o:start rdf:datatype="http://www.w3.org/2001/XMLSchema#int">220636770</o:start> |
| | 148 | <o:end rdf:datatype="http://www.w3.org/2001/XMLSchema#int">220636771</o:end> |
| | 149 | <o:orient>-</o:orient> |
| | 150 | </o:Mapping> |
| | 151 | </o:hasMapping> |
| | 152 | <o:hasMapping> |
| | 153 | <o:Mapping> |
| | 154 | <o:build rdf:resource="urn:void:ncbi:build:HuRef/36_3"/> |
| | 155 | <o:chrom rdf:resource="urn:void:ncbi:chromosome:9606/chr1"/> |
| | 156 | <o:start rdf:datatype="http://www.w3.org/2001/XMLSchema#int">217734218</o:start> |
| | 157 | <o:end rdf:datatype="http://www.w3.org/2001/XMLSchema#int">217734219</o:end> |
| | 158 | <o:orient>-</o:orient> |
| | 159 | </o:Mapping> |
| | 160 | </o:hasMapping> |
| | 161 | <o:hasMapping> |
| | 162 | <o:Mapping> |
| | 163 | <o:build rdf:resource="urn:void:ncbi:build:reference/36_3"/> |
| | 164 | <o:chrom rdf:resource="urn:void:ncbi:chromosome:9606/chr1"/> |
| | 165 | <o:start rdf:datatype="http://www.w3.org/2001/XMLSchema#int">245407000</o:start> |
| | 166 | <o:end rdf:datatype="http://www.w3.org/2001/XMLSchema#int">245407001</o:end> |
| | 167 | <o:orient>-</o:orient> |
| | 168 | </o:Mapping> |
| | 169 | </o:hasMapping> |
| | 170 | </o:SNP> |
| | 171 | (...) |
| | 172 | }}} |
| | 173 | |
| | 174 | |