| 96 | |
| 97 | When the XML source is too large to fit in the memory of xsltproc, I (Pierre Lindenbaum ) use a custom tool named '''xslstream''' that calls a new XSLT transformation for every chunks of data. For example say you want to convert the XML files of DBSNP ( [ftp://ftp.ncbi.nih.gov/snp/organisms/human_9606/XML/] e.g. ds_ch1.xml.gz is 1099375 KB ) with dbsnp2rdf.xsl ( http://code.google.com/p/lindenb/source/browse/trunk/src/xsl/dbsnp2rdf.xsl ). Download '''xsltstream''' from http://code.google.com/p/lindenb/downloads/list |
| 98 | And then invoke: |
| 99 | {{{ |
| 100 | java -jar xsltstream.jar -x dbsnp2rdf.xsl -q "Rs" 'ftp://ftp.ncbi.nih.gov/snp/organisms/human_9606/XML/ds_ch1.xml.gz' |\ |
| 101 | grep -v "rdf:RDF" |\ |
| 102 | grep -v "<?xml |
| 103 | }}} |
| 104 | Result: |
| 105 | {{{ |
| 106 | (...) |
| 107 | <o:SNP rdf:about="http://www.ncbi.nlm.nih.gov/snp/2854"> |
| 108 | <dc:title>rs2854</dc:title> |
| 109 | <o:taxon rdf:resource="http://www.ncbi.nlm.nih.gov/taxonomy/9606"/> |
| 110 | <o:het rdf:datatype="http://www.w3.org/2001/XMLSchema#float">0.24</o:het> |
| 111 | <o:hasMapping> |
| 112 | <o:Mapping> |
| 113 | <o:build rdf:resource="urn:void:ncbi:build:Celera/36_3"/> |
| 114 | <o:chrom rdf:resource="urn:void:ncbi:chromosome:9606/chr1"/> |
| 115 | <o:start rdf:datatype="http://www.w3.org/2001/XMLSchema#int">196613685</o:start> |
| 116 | <o:end rdf:datatype="http://www.w3.org/2001/XMLSchema#int">196613686</o:end> |
| 117 | <o:orient>+</o:orient> |
| 118 | </o:Mapping> |
| 119 | </o:hasMapping> |
| 120 | <o:hasMapping> |
| 121 | <o:Mapping> |
| 122 | <o:build rdf:resource="urn:void:ncbi:build:HuRef/36_3"/> |
| 123 | <o:chrom rdf:resource="urn:void:ncbi:chromosome:9606/chr1"/> |
| 124 | <o:start rdf:datatype="http://www.w3.org/2001/XMLSchema#int">194069483</o:start> |
| 125 | <o:end rdf:datatype="http://www.w3.org/2001/XMLSchema#int">194069484</o:end> |
| 126 | <o:orient>+</o:orient> |
| 127 | </o:Mapping> |
| 128 | </o:hasMapping> |
| 129 | <o:hasMapping> |
| 130 | <o:Mapping> |
| 131 | <o:build rdf:resource="urn:void:ncbi:build:reference/36_3"/> |
| 132 | <o:chrom rdf:resource="urn:void:ncbi:chromosome:9606/chr1"/> |
| 133 | <o:start rdf:datatype="http://www.w3.org/2001/XMLSchema#int">221460932</o:start> |
| 134 | <o:end rdf:datatype="http://www.w3.org/2001/XMLSchema#int">221460933</o:end> |
| 135 | <o:orient>+</o:orient> |
| 136 | </o:Mapping> |
| 137 | </o:hasMapping> |
| 138 | </o:SNP> |
| 139 | <o:SNP rdf:about="http://www.ncbi.nlm.nih.gov/snp/2866"> |
| 140 | <dc:title>rs2866</dc:title> |
| 141 | <o:taxon rdf:resource="http://www.ncbi.nlm.nih.gov/taxonomy/9606"/> |
| 142 | <o:het rdf:datatype="http://www.w3.org/2001/XMLSchema#float">0.50</o:het> |
| 143 | <o:hasMapping> |
| 144 | <o:Mapping> |
| 145 | <o:build rdf:resource="urn:void:ncbi:build:Celera/36_3"/> |
| 146 | <o:chrom rdf:resource="urn:void:ncbi:chromosome:9606/chr1"/> |
| 147 | <o:start rdf:datatype="http://www.w3.org/2001/XMLSchema#int">220636770</o:start> |
| 148 | <o:end rdf:datatype="http://www.w3.org/2001/XMLSchema#int">220636771</o:end> |
| 149 | <o:orient>-</o:orient> |
| 150 | </o:Mapping> |
| 151 | </o:hasMapping> |
| 152 | <o:hasMapping> |
| 153 | <o:Mapping> |
| 154 | <o:build rdf:resource="urn:void:ncbi:build:HuRef/36_3"/> |
| 155 | <o:chrom rdf:resource="urn:void:ncbi:chromosome:9606/chr1"/> |
| 156 | <o:start rdf:datatype="http://www.w3.org/2001/XMLSchema#int">217734218</o:start> |
| 157 | <o:end rdf:datatype="http://www.w3.org/2001/XMLSchema#int">217734219</o:end> |
| 158 | <o:orient>-</o:orient> |
| 159 | </o:Mapping> |
| 160 | </o:hasMapping> |
| 161 | <o:hasMapping> |
| 162 | <o:Mapping> |
| 163 | <o:build rdf:resource="urn:void:ncbi:build:reference/36_3"/> |
| 164 | <o:chrom rdf:resource="urn:void:ncbi:chromosome:9606/chr1"/> |
| 165 | <o:start rdf:datatype="http://www.w3.org/2001/XMLSchema#int">245407000</o:start> |
| 166 | <o:end rdf:datatype="http://www.w3.org/2001/XMLSchema#int">245407001</o:end> |
| 167 | <o:orient>-</o:orient> |
| 168 | </o:Mapping> |
| 169 | </o:hasMapping> |
| 170 | </o:SNP> |
| 171 | (...) |
| 172 | }}} |
| 173 | |
| 174 | |