RELAX NG Schema for OTMI

From OpenTextMining

Jump to: navigation, search

Return to OTMI Specification

This annex defines a RELAX NG schema specification for OTMI.

<?xml version="1.0" encoding="UTF-8"?>
<grammar xmlns:a="http://relaxng.org/ns/compatibility/annotations/1.0" xmlns:otmi="http://www.nature.com/schema/2006/03/otmi" xmlns="http://relaxng.org/ns/structure/1.0" datatypeLibrary="http://www.w3.org/2001/XMLSchema-datatypes">
  <start>
    <ref name="data"/>
  </start>
  <define name="data">
    <a:documentation>The "data" element added to an Atom Entry document</a:documentation>
    <element name="otmi:data">
      <attribute name="version">
        <data type="NMTOKEN"/>
      </attribute>
      <optional>
        <ref name="stoplist"/>
      </optional>
      <ref name="sections"/>
      <optional>
        <ref name="floats"/>
      </optional>
      <optional>
        <ref name="references"/>
      </optional>
    </element>
  </define>
  <define name="stoplist">
    <a:documentation>Stoplist is an optional element and references by URI 
the stoplist document used </a:documentation>
    <element name="otmi:stoplist">
      <attribute name="href">
        <data type="anyURI"/>
      </attribute>
      <empty/>
    </element>
  </define>
  <define name="sections">
    <a:documentation>Sections   </a:documentation>
    <oneOrMore>
      <ref name="section"/>
    </oneOrMore>
  </define>
  <define name="section">
    <a:documentation>Section is either a front section ("abstract" | "standfirst")
and otmi-text content, or else a body section ("body") and child
sections ("firstpara" | "methods" | "conclusions" | "others")
and otmi-text content</a:documentation>
    <element name="otmi:section">
      <attribute name="name">
        <data type="NCName"/>
      </attribute>
      <choice>
        <ref name="otmi-text"/>
        <oneOrMore>
          <ref name="section"/>
        </oneOrMore>
      </choice>
    </element>
  </define>
  <define name="floats">
    <a:documentation>Floats - for now just figures and tables are included
(other floating objects could be added)</a:documentation>
    <choice>
      <ref name="figures"/>
      <ref name="tables"/>
    </choice>
  </define>
  <define name="figures">
    <a:documentation>Figures element includes figure titles and captions</a:documentation>
    <oneOrMore>
      <element name="otmi:figure">
        <ref name="title"/>
        <ref name="caption"/>
      </element>
    </oneOrMore>
  </define>
  <define name="tables">
    <a:documentation>Tables element includes table titles</a:documentation>
    <oneOrMore>
      <element name="otmi:table">
        <ref name="title"/>
      </element>
    </oneOrMore>
  </define>
  <define name="title">
    <element name="otmi:title">
      <ref name="otmi-text"/>
    </element>
  </define>
  <define name="caption">
    <element name="otmi:caption">
      <ref name="otmi-text"/>
    </element>
  </define>
  <define name="references">
    <a:documentation>References - no text is provided but URI references</a:documentation>
    <element name="otmi:references">
      <oneOrMore>
        <element name="otmi:ref-id">
          <a:documentation>The "ref-id" element references documents by URI</a:documentation>
          <data type="anyURI"/>
        </element>
      </oneOrMore>
      <element name="otmi:refs-noid">
        <a:documentation>The "refs-noid" element provides count of references
with no URI</a:documentation>
        <data type="integer"/>
      </element>
    </element>
  </define>
  <define name="otmi-text">
    <a:documentation>OTMI Text - This is the actual payload for an OTMI file</a:documentation>
    <optional>
      <ref name="vectors"/>
    </optional>
    <optional>
      <ref name="snippets"/>
    </optional>
    <optional>
      <ref name="full-text"/>
    </optional>
  </define>
  <define name="vectors">
    <a:documentation>Vectors is a table listing word vectors</a:documentation>
    <element name="otmi:vectors">
      <attribute name="number">
        <data type="integer"/>
      </attribute>
      <ref name="split-regex"/>
      <oneOrMore>
        <element name="otmi:vector">
          <attribute name="count">
            <data type="integer"/>
          </attribute>
          <text/>
        </element>
      </oneOrMore>
    </element>
  </define>
  <define name="snippets">
    <a:documentation>Snippets is a table listing text snippets</a:documentation>
    <element name="otmi:snippets">
      <attribute name="number">
        <data type="integer"/>
      </attribute>
      <ref name="split-regex"/>
      <oneOrMore>
        <element name="otmi:snippet">
          <text/>
        </element>
      </oneOrMore>
    </element>
  </define>
  <define name="split-regex">
    <a:documentation>The "split-regex" expression used to split text </a:documentation>
    <element name="otmi:split-regex">
      <text/>
    </element>
  </define>
  <define name="full-text">
    <a:documentation>Full text is either with stopwords removed or without </a:documentation>
    <choice>
      <ref name="reduced-text"/>
      <ref name="raw-text"/>
    </choice>
  </define>
  <define name="reduced-text">
    <a:documentation>The "reduced-text" element provides arbitrary text cleaned of markup
and with stopwords removed</a:documentation>
    <element name="otmi:reduced-text">
      <text/>
    </element>
  </define>
  <define name="raw-text">
    <a:documentation>The "raw-text" element provides arbitrary text cleaned of markup
�but without stopwords removed</a:documentation>
    <element name="otmi:raw-text">
      <text/>
    </element>
  </define>
</grammar>
Personal tools