| 1 | |
| 2 | = Day-1 = |
| 3 | |
| 4 | = Day0 = |
| 5 | |
| 6 | = Day1 = |
| 7 | |
| 8 | = Day2 = |
| 9 | == Designing a mechanism to output object using the ERb template engine for writing data in RDF == |
| 10 | |
| 11 | * Mitsuteru Nakao |
| 12 | * Naohisa Goto |
| 13 | * Toshiaki Katayama |
| 14 | * Raoul Jean Pierre Bonnal |
| 15 | |
| 16 | === Basic idea === |
| 17 | {{{ |
| 18 | medline = Bio::MEDLINE.new(medline_flatfile) |
| 19 | puts medline.output_rdf # => print a medline abstract in RDF format. |
| 20 | }}} |
| 21 | * Writing a Bio::DB object data in RDF format |
| 22 | * Using the Erb template system |
| 23 | * Template replaceablity |
| 24 | |
| 25 | |
| 26 | === Proposed architecture === |
| 27 | |
| 28 | Adding a module Bio::OutputErb |
| 29 | {{{ |
| 30 | module Bio |
| 31 | module OutputErb |
| 32 | require 'erb' |
| 33 | def output_method_erb(m, t) |
| 34 | erb = ERB.new(File.read(t)) |
| 35 | erb.def_method(self, m, t) |
| 36 | end |
| 37 | end |
| 38 | end |
| 39 | }}} |
| 40 | |
| 41 | Extending the Bio::MEDLINE |
| 42 | {{{ |
| 43 | class MEDLINE |
| 44 | extend OutputErb |
| 45 | def_output_method_erb("output_ttl", 'bio/bio/db/medline/medline.ttl.erb') |
| 46 | |
| 47 | def output(t) |
| 48 | send("output_#{t.to_s}") |
| 49 | end |
| 50 | end |
| 51 | }}} |
| 52 | |
| 53 | An Erb template file medline.ttl.erb for RDF/Turtle (partial) |
| 54 | {{{ |
| 55 | <% |
| 56 | require 'date' |
| 57 | # http://www.nlm.nih.gov/bsd/mms/medlineelements.html |
| 58 | # A generic RDF subject URI at the TogoWS REST |
| 59 | @prefix = "http://togows.dbcls.jp/entry/ncbi-pubmed" |
| 60 | |
| 61 | def uri |
| 62 | "<#{@prefix}/#{c(pubmed['PMID'])}>" |
| 63 | end |
| 64 | |
| 65 | # Generate a generic RDF predicate URI at the TogoWS REST. |
| 66 | def predicate(field_name) |
| 67 | "<http://togows.dbcls.jp/nezu/1.0/ncbi-pubmed##{field_name}>" |
| 68 | end |
| 69 | |
| 70 | def ndate(str) |
| 71 | str.strip |
| 72 | case str |
| 73 | when /^\d+$/ |
| 74 | str.gsub(/(\d{4})(\d{2})(\d{2})/) { "#{$1}-#{$2}-#{$3}"} |
| 75 | else |
| 76 | str.gsub("/", '-') |
| 77 | end |
| 78 | end |
| 79 | |
| 80 | def ndatetime(str) |
| 81 | d,t = str.split(" ") |
| 82 | [ndate(d), t].join(" ") |
| 83 | end |
| 84 | %><%= uri %> <%= predicate('pmid') %> "<%=c pubmed['PMID'] %>" . |
| 85 | <%= uri %> <http://www.w3.org/2000/01/rdf-schema#label> "pmid:<%=c pubmed['PMID'] %>" . |
| 86 | <%= uri %> <http://purl.org/dc/elements/1.1/title> "pmid:<%=c pubmed['PMID'] %>" . |
| 87 | <%= uri %> <http://purl.org/dc/elements/1.1/identifier> <http://pubmed.org/<%=c pubmed['PMID'] %>> . |
| 88 | <%= uri %> <%= predicate('own') %> "<%=c pubmed['OWN'] %>" . |
| 89 | <%= uri %> <%= predicate('stat') %> "<%=c pubmed['STAT'] %>" . |
| 90 | <%= uri %> <%= predicate('da') %> "<%= ndate(c pubmed['DA']) %>" . |
| 91 | <%= uri %> <%= predicate('dcom') %> "<%=ndate(c pubmed['DCOM']) %>" . |
| 92 | <%= uri %> <%= predicate('lr') %> "<%=ndate(c pubmed['LR']) %>" . |
| 93 | <% pubmed['IS'].scan(/(\d+-\d+ \(\S+\))/).flatten.each do |is| %> |
| 94 | <%= uri %> <%= predicate('is') %> "<%=c "#{is}" %>" . |
| 95 | <% end %> |
| 96 | <%= uri %> <%= predicate('vi') %> "<%=c pubmed['VI'] %>" . |
| 97 | <%= uri %> <http://prismstandard.org/namespaces/2.0/basic/volume> "<%=c pubmed['VI'] %>" . |
| 98 | <%= uri %> <%= predicate('dp') %> "<%=c pubmed['DP'] %>" . |
| 99 | ... |
| 100 | }}} |
| 101 | |
| 102 | On using |
| 103 | {{{ |
| 104 | medline = Bio::MEDLINE.new(medline_flatfile) |
| 105 | |
| 106 | mdeline.output_ttl # .ttl == RDF/Turtle |
| 107 | mdeline.output(:ttl) # alias |
| 108 | medline.to_ttl # alias |
| 109 | medline.to(:ttl) # alias |
| 110 | }}} |
| 111 | |
| 112 | Use user template |
| 113 | {{{ |
| 114 | class MEDLINE |
| 115 | def_output_method_erb("output_rdfxml", 'bio/bio/db/medline/medline.rdfxml.erb') |
| 116 | end |
| 117 | |
| 118 | medline = Bio::MEDLINE.new(medline_flatfile) |
| 119 | mdeline.output_rdfxml |
| 120 | }}} |
| 121 | |
| 122 | File arrangement |
| 123 | * lib/bio/db/ |
| 124 | * medline.rb |
| 125 | * medline/medline.ttl.erb |
| 126 | |
| 127 | === Issues ==== |
| 128 | Naming issue |
| 129 | * Choice: Bio::Renderer / Bio::Render / Bio::Template / Bio::Output / Bio::Export / Bio::Exporter / Bio::Writer / Bio::OutputErb |
| 130 | * Choice: medline.output_ttl / medline.output(:ttl) / medline.to_ttl / medline.to(:ttl) |
| 131 | * Pros: the to_ttl naming is easy for beginner. |
| 132 | * Cons: to_ttl style name is for converting class. The to_s method is to convert a object to String expression. And to_json may be confusional. |
| 133 | Method namespace |
| 134 | * Functions defined at the template file contaminates the namespace of the Bio::MEDLINE class. |