W3C XML Schema for OTMI
From OpenTextMining
Return to OTMI Specification
This annex defines a W3C XML schema specification for OTMI.
<?xml version="1.0" encoding="UTF-8"?>
<xs:schema xmlns:xs="http://www.w3.org/2001/XMLSchema" elementFormDefault="qualified" targetNamespace="http://www.nature.com/schema/2006/03/otmi" xmlns:otmi="http://www.nature.com/schema/2006/03/otmi">
<xs:element name="data">
<xs:complexType>
<xs:sequence>
<xs:element minOccurs="0" ref="otmi:stoplist"/>
<xs:group ref="otmi:sections"/>
<xs:group minOccurs="0" ref="otmi:floats"/>
<xs:element minOccurs="0" ref="otmi:references"/>
</xs:sequence>
<xs:attribute name="version" use="required" type="xs:NMTOKEN"/>
</xs:complexType>
</xs:element>
<xs:element name="stoplist">
<xs:complexType>
<xs:attribute name="href" use="required" type="xs:anyURI"/>
</xs:complexType>
</xs:element>
<xs:group name="sections">
<xs:annotation>
<xs:documentation>Sections </xs:documentation>
</xs:annotation>
<xs:sequence>
<xs:element maxOccurs="unbounded" ref="otmi:section"/>
</xs:sequence>
</xs:group>
<xs:element name="section">
<xs:complexType>
<xs:choice>
<xs:group ref="otmi:otmi-text"/>
<xs:element maxOccurs="unbounded" ref="otmi:section"/>
</xs:choice>
<xs:attribute name="name" use="required" type="xs:NCName"/>
</xs:complexType>
</xs:element>
<xs:group name="floats">
<xs:choice>
<xs:group ref="otmi:figures"/>
<xs:group ref="otmi:tables"/>
</xs:choice>
</xs:group>
<xs:group name="figures">
<xs:annotation>
<xs:documentation>Figures element includes figure titles and captions</xs:documentation>
</xs:annotation>
<xs:sequence>
<xs:element maxOccurs="unbounded" ref="otmi:figure"/>
</xs:sequence>
</xs:group>
<xs:element name="figure">
<xs:complexType>
<xs:complexContent>
<xs:extension base="otmi:title">
<xs:sequence>
<xs:element ref="otmi:caption"/>
</xs:sequence>
</xs:extension>
</xs:complexContent>
</xs:complexType>
</xs:element>
<xs:group name="tables">
<xs:annotation>
<xs:documentation>Tables element includes table titles</xs:documentation>
</xs:annotation>
<xs:sequence>
<xs:element maxOccurs="unbounded" ref="otmi:table"/>
</xs:sequence>
</xs:group>
<xs:element name="table" type="otmi:title"/>
<xs:complexType name="title">
<xs:sequence>
<xs:element ref="otmi:title"/>
</xs:sequence>
</xs:complexType>
<xs:element name="title">
<xs:complexType>
<xs:group ref="otmi:otmi-text"/>
</xs:complexType>
</xs:element>
<xs:element name="caption">
<xs:complexType>
<xs:group ref="otmi:otmi-text"/>
</xs:complexType>
</xs:element>
<xs:element name="references">
<xs:complexType>
<xs:sequence>
<xs:element maxOccurs="unbounded" ref="otmi:ref-id"/>
<xs:element ref="otmi:refs-noid"/>
</xs:sequence>
</xs:complexType>
</xs:element>
<xs:element name="ref-id" type="xs:anyURI">
<xs:annotation>
<xs:documentation>The "ref-id" element references documents by URI</xs:documentation>
</xs:annotation>
</xs:element>
<xs:element name="refs-noid" type="xs:integer"/>
<xs:group name="otmi-text">
<xs:annotation>
<xs:documentation>OTMI Text - This is the actual payload for an OTMI file</xs:documentation>
</xs:annotation>
<xs:sequence>
<xs:element minOccurs="0" ref="otmi:vectors"/>
<xs:element minOccurs="0" ref="otmi:snippets"/>
<xs:element minOccurs="0" ref="otmi:full-text"/>
</xs:sequence>
</xs:group>
<xs:element name="vectors">
<xs:complexType>
<xs:sequence>
<xs:element ref="otmi:split-regex"/>
<xs:element maxOccurs="unbounded" ref="otmi:vector"/>
</xs:sequence>
<xs:attribute name="number" use="required" type="xs:integer"/>
</xs:complexType>
</xs:element>
<xs:element name="vector">
<xs:complexType mixed="true">
<xs:attribute name="count" use="required" type="xs:integer"/>
</xs:complexType>
</xs:element>
<xs:element name="snippets">
<xs:complexType>
<xs:sequence>
<xs:element ref="otmi:split-regex"/>
<xs:element maxOccurs="unbounded" ref="otmi:snippet"/>
</xs:sequence>
<xs:attribute name="number" use="required" type="xs:integer"/>
</xs:complexType>
</xs:element>
<xs:element name="snippet" type="xs:string"/>
<xs:element name="split-regex" type="xs:string"/>
<xs:element name="full-text" abstract="true" type="xs:string">
<xs:annotation>
<xs:documentation>Full text is either with stopwords removed or without </xs:documentation>
</xs:annotation>
</xs:element>
<xs:element name="reduced-text" substitutionGroup="otmi:full-text"/>
<xs:element name="raw-text" substitutionGroup="otmi:full-text"/>
</xs:schema>
