OpenConvert
<?xml version="1.0" encoding="UTF-8"?>
<cmd:CMD xmlns:cmd="http://www.clarin.eu/cmd/1"
xmlns:cmdp="http://www.clarin.eu/cmd/1/profiles/clarin.eu:cr1:p_1342181139640"
xmlns:xsi="http://www.w3.org/2001/XMLSchema-instance"
CMDVersion="1.2"
xsi:schemaLocation="http://www.clarin.eu/cmd/1 https://infra.clarin.eu/CMDI/1.x/xsd/cmd-envelop.xsd http://www.clarin.eu/cmd/1/profiles/clarin.eu:cr1:p_1342181139640 https://catalog.clarin.eu/ds/ComponentRegistry/rest/registry/1.1/profiles/clarin.eu:cr1:p_1342181139640/1.2/xsd">
<cmd:Header>
<cmd:MdCreator>rogierkraf</cmd:MdCreator>
<cmd:MdCreationDate>2013-11-30+02:00</cmd:MdCreationDate>
<cmd:MdProfile>clarin.eu:cr1:p_1342181139640</cmd:MdProfile>
<cmd:MdCollectionDisplayName>CLARIN Netherlands</cmd:MdCollectionDisplayName>
</cmd:Header>
<cmd:Resources>
<cmd:ResourceProxyList>
<cmd:ResourceProxy id="OCV001">
<cmd:ResourceType>Resource</cmd:ResourceType>
<cmd:ResourceRef>http://openconvert.clarin.inl.nl/</cmd:ResourceRef>
</cmd:ResourceProxy>
</cmd:ResourceProxyList>
<cmd:JournalFileProxyList/>
<cmd:ResourceRelationList/>
</cmd:Resources>
<cmd:Components>
<cmdp:ClarinSoftwareDescription>
<cmdp:GeneralInfo>
<cmdp:name xml:lang="eng">OpenConvert</cmdp:name>
<cmdp:title xml:lang="eng">OpenConvert</cmdp:title>
<cmdp:publicationYear>2015</cmdp:publicationYear>
<cmdp:url>http://openconvert.clarin.inl.nl</cmdp:url>
<cmdp:CLARINCentre>Dutch Language Institute</cmdp:CLARINCentre>
<cmdp:OriginalSource>http://portal.clarin.nl/node/4224</cmdp:OriginalSource>
<cmdp:ReleaseStatus>
<cmdp:LifeCycleStatus>released</cmdp:LifeCycleStatus>
<cmdp:lastUpdate>2015-10-07</cmdp:lastUpdate>
</cmdp:ReleaseStatus>
<cmdp:NationalProjects>
<cmdp:Project>
<cmdp:name>CLARIN-NL</cmdp:name>
<cmdp:title>CLARIN in the Netherlands</cmdp:title>
<cmdp:id>184.021.003</cmdp:id>
<cmdp:funder>NWO</cmdp:funder>
<cmdp:url>http://www.clarin.nl</cmdp:url>
<cmdp:Contact>
<cmdp:Person>Jan Odijk</cmdp:Person>
<cmdp:Role>National Coordinator</cmdp:Role>
<cmdp:Address>Utrecht, the Netherlands</cmdp:Address>
<cmdp:Email>j.odijk@uu.nl</cmdp:Email>
<cmdp:Department>UiL-OTS</cmdp:Department>
<cmdp:Organisation>Utrecht University</cmdp:Organisation>
</cmdp:Contact>
<cmdp:Duration>
<cmdp:StartYear>2009</cmdp:StartYear>
<cmdp:CompletionYear>2015</cmdp:CompletionYear>
</cmdp:Duration>
</cmdp:Project>
<cmdp:Project>
<cmdp:name>CLARIAH-CORE</cmdp:name>
<cmdp:title>Common Lab Research Infrastructure for the Arts and the Humanities</cmdp:title>
<cmdp:id>184.033.101</cmdp:id>
<cmdp:funder>NWO</cmdp:funder>
<cmdp:url>http://www.clariah.nl</cmdp:url>
<cmdp:Contact>
<cmdp:Person>Jan Odijk</cmdp:Person>
<cmdp:Role>National Coordinator</cmdp:Role>
<cmdp:Address>Utrecht, the Netherlands</cmdp:Address>
<cmdp:Email>j.odijk@uu.nl</cmdp:Email>
<cmdp:Department>UiL-OTS</cmdp:Department>
<cmdp:Organisation>Utrecht University</cmdp:Organisation>
</cmdp:Contact>
<cmdp:Duration>
<cmdp:StartYear>2015</cmdp:StartYear>
<cmdp:CompletionYear>2018</cmdp:CompletionYear>
</cmdp:Duration>
</cmdp:Project>
</cmdp:NationalProjects>
<cmdp:Country>
<cmdp:CountryName>Netherlands</cmdp:CountryName>
<cmdp:CountryCoding>NL</cmdp:CountryCoding>
</cmdp:Country>
<cmdp:Description>
<cmdp:Description>The OpenConvert tools convert to TEI or FOLiA from a number of input formats (alto, text, word, HTML, ePub). The tools are available as a Java command line tool, a web service and a web application.The OpenConvert Tools were created by IVDNT in the OpenConvert project. The OpenConvert tools convert to TEI or FOLiA from a number of input formats (alto, text, word, HTML, ePub). The tools are available as a Java command line tool, a web service and a web application. Furthermore, as a proof of concept, the website currently provides two annotation tools: a simple Tokenizer for TEI files and a modern Dutch part of speech tagger.</cmdp:Description>
</cmdp:Description>
</cmdp:GeneralInfo>
<cmdp:SoftwareFunction>
<cmdp:toolCategory>conversion tool</cmdp:toolCategory>
<cmdp:toolCategory>annotation tool</cmdp:toolCategory>
<cmdp:toolCategory>written language tool</cmdp:toolCategory>
<cmdp:ToolTasks>
<cmdp:toolTask>corpus processing</cmdp:toolTask>
<cmdp:toolTask>format conversion</cmdp:toolTask>
<cmdp:toolTask>text conversion</cmdp:toolTask>
<cmdp:toolTask>tokenisation</cmdp:toolTask>
<cmdp:toolTask>part of speech tagging</cmdp:toolTask>
</cmdp:ToolTasks>
<cmdp:ResearchPhases>
<cmdp:ResearchPhase>Enriching Data</cmdp:ResearchPhase>
</cmdp:ResearchPhases>
<cmdp:ResearchDomains>
<cmdp:researchDomain>Linguistics</cmdp:researchDomain>
<cmdp:researchDomain>Religion Studies</cmdp:researchDomain>
<cmdp:researchDomain>Communication and Media Studies</cmdp:researchDomain>
<cmdp:researchDomain>Cultural Sciences</cmdp:researchDomain>
<cmdp:researchDomain>History</cmdp:researchDomain>
<cmdp:researchDomain>Literary Studies</cmdp:researchDomain>
<cmdp:researchDomain>Philosophy</cmdp:researchDomain>
<cmdp:researchDomain>Political Studies</cmdp:researchDomain>
</cmdp:ResearchDomains>
<cmdp:LanguageVariety>
<cmdp:languageDependent>no</cmdp:languageDependent>
<cmdp:Centuries>
<cmdp:centuryDependent>no</cmdp:centuryDependent>
</cmdp:Centuries>
</cmdp:LanguageVariety>
</cmdp:SoftwareFunction>
<cmdp:SoftwareImplementation>
<cmdp:distributionMedium>Download</cmdp:distributionMedium>
<cmdp:distributionMedium>Online available</cmdp:distributionMedium>
<cmdp:sourcecodeURI>https://github.com/INL/OpenConvert</cmdp:sourcecodeURI>
<cmdp:UserInterface>
<cmdp:interfaceType>command line interface</cmdp:interfaceType>
<cmdp:applicationType>local desktop</cmdp:applicationType>
</cmdp:UserInterface>
<cmdp:UserInterface>
<cmdp:interfaceType>graphical user interface</cmdp:interfaceType>
<cmdp:applicationType>web application</cmdp:applicationType>
</cmdp:UserInterface>
<cmdp:UserInterface>
<cmdp:interfaceType>other</cmdp:interfaceType>
<cmdp:applicationType>web service</cmdp:applicationType>
<cmdp:Description>
<cmdp:Description>The tool service can be called as a REST webservice which returns responses in XML, allowing it to be part of a webservice tool chain.</cmdp:Description>
</cmdp:Description>
</cmdp:UserInterface>
<cmdp:Input>
<cmdp:inputType>text</cmdp:inputType>
<cmdp:inputResource>input text</cmdp:inputResource>
<cmdp:MimeType>
<cmdp:MimeType>text/plain</cmdp:MimeType>
</cmdp:MimeType>
<cmdp:MimeType>
<cmdp:MimeType>application/msword</cmdp:MimeType>
</cmdp:MimeType>
<cmdp:MimeType>
<cmdp:MimeType>text/html</cmdp:MimeType>
</cmdp:MimeType>
<cmdp:Description>
<cmdp:Description>Input TEI, plain text, HTML</cmdp:Description>
</cmdp:Description>
</cmdp:Input>
<cmdp:Input>
<cmdp:inputType>text</cmdp:inputType>
<cmdp:inputResource>input text</cmdp:inputResource>
<cmdp:Schema>
<cmdp:schemaname>ALTO</cmdp:schemaname>
</cmdp:Schema>
<cmdp:MimeType>
<cmdp:MimeType>text/xml</cmdp:MimeType>
</cmdp:MimeType>
<cmdp:Description>
<cmdp:Description>ALTO XML input</cmdp:Description>
</cmdp:Description>
</cmdp:Input>
<cmdp:Input>
<cmdp:inputType>text</cmdp:inputType>
<cmdp:inputResource>input text</cmdp:inputResource>
<cmdp:MimeType>
<cmdp:MimeType>application/epub+zip</cmdp:MimeType>
</cmdp:MimeType>
<cmdp:Description>
<cmdp:Description>ePub input</cmdp:Description>
</cmdp:Description>
</cmdp:Input>
<cmdp:Input>
<cmdp:inputType>directory</cmdp:inputType>
<cmdp:inputResource>input text</cmdp:inputResource>
<!-- <MimeType>
<MimeType>no mimetype for folders or directories!</MimeType>
</MimeType> -->
<cmdp:Description>
<cmdp:Description>directory containing files of a valid input type</cmdp:Description>
</cmdp:Description>
</cmdp:Input>
<cmdp:Input>
<cmdp:inputType>zipped text</cmdp:inputType>
<cmdp:inputResource>input text</cmdp:inputResource>
<cmdp:MimeType>
<cmdp:MimeType>application/zip</cmdp:MimeType>
</cmdp:MimeType>
<cmdp:Description>
<cmdp:Description>zip file (with extension .zip) containing files of a valid input type</cmdp:Description>
</cmdp:Description>
</cmdp:Input>
<cmdp:Output>
<cmdp:outputType>text</cmdp:outputType>
<cmdp:characterEncoding>UTF8</cmdp:characterEncoding>
<cmdp:outputResource>conversion result</cmdp:outputResource>
<cmdp:Schema>
<cmdp:schemaname>FoLiA</cmdp:schemaname>
<cmdp:schemaURL>https://github.com/proycon/folia/blob/master/schemas/folia.rng</cmdp:schemaURL>
</cmdp:Schema>
<cmdp:MimeType>
<cmdp:MimeType>text/xml</cmdp:MimeType>
</cmdp:MimeType>
</cmdp:Output>
<cmdp:Output>
<cmdp:outputType>text</cmdp:outputType>
<cmdp:characterEncoding>UTF8</cmdp:characterEncoding>
<cmdp:outputResource>conversion result</cmdp:outputResource>
<cmdp:Schema>
<cmdp:schemaname>TEI</cmdp:schemaname>
</cmdp:Schema>
<cmdp:MimeType>
<cmdp:MimeType>text/xml</cmdp:MimeType>
</cmdp:MimeType>
</cmdp:Output>
</cmdp:SoftwareImplementation>
<cmdp:Access>
<cmdp:ResourceLicense>
<cmdp:license>other</cmdp:license>
<cmdp:distributionType>public</cmdp:distributionType>
<cmdp:url>https://github.com/INL/OpenConvert</cmdp:url>
<cmdp:Description>
<cmdp:Description>Free for academic use. Non-applicable for commercial parties</cmdp:Description>
<cmdp:Description>CLARIN based login required. The Clarin federation accepts login from many europian institutions. please seehttp://www.clarin.eu/content/service-provider-federation for more details </cmdp:Description>
</cmdp:Description>
<cmdp:Price>
<cmdp:amount>0</cmdp:amount>
<cmdp:ISO4217>
<cmdp:iso-4217-currency>EUR</cmdp:iso-4217-currency>
</cmdp:ISO4217>
</cmdp:Price>
</cmdp:ResourceLicense>
<cmdp:Contact>
<cmdp:Email>servicedesk@ivdnt.org</cmdp:Email>
<cmdp:Organisation xml:lang="nld">Instituut voor de Nederlandse Taal</cmdp:Organisation>
<cmdp:Organisation xml:lang="eng">Institute for the Dutch Language</cmdp:Organisation>
<cmdp:Url>http://www.ivdnt.org/</cmdp:Url>
</cmdp:Contact>
</cmdp:Access>
<cmdp:ResourceDocumentation>
<cmdp:Documentation>
<cmdp:title>OpenConvert help</cmdp:title>
<cmdp:documentationTarget>user</cmdp:documentationTarget>
<cmdp:url>http://openconvert.clarin.inl.nl/openconvert/web/help.html</cmdp:url>
<cmdp:ISO639>
<cmdp:iso-639-3-code>eng</cmdp:iso-639-3-code>
</cmdp:ISO639>
</cmdp:Documentation>
<cmdp:Documentation>
<cmdp:title>OpenConvert help</cmdp:title>
<cmdp:documentationTarget>technical</cmdp:documentationTarget>
<cmdp:url>http://openconvert.clarin.inl.nl/openconvert/web/help.html</cmdp:url>
<cmdp:ISO639>
<cmdp:iso-639-3-code>eng</cmdp:iso-639-3-code>
</cmdp:ISO639>
</cmdp:Documentation>
<cmdp:Pictures>
<cmdp:picture type="other">
http://dev.clarin.nl/sites/default/files/picture.jpg
</cmdp:picture>
</cmdp:Pictures>
</cmdp:ResourceDocumentation>
<cmdp:SoftwareDevelopment>
<cmdp:Project>
<cmdp:name>OpenConvert</cmdp:name>
<cmdp:title>OpenConvert</cmdp:title>
<cmdp:funder>http://clarin.nl</cmdp:funder>
<cmdp:url>http://portal.clarin.nl/node/4224</cmdp:url>
<cmdp:Contact>
<cmdp:Person>Jan Theo Bakker</cmdp:Person>
<cmdp:Email>jantheo.bakker@ivdnt.org</cmdp:Email>
<cmdp:Organisation xml:lang="nld">Instituut voor de Nederlandse Taal</cmdp:Organisation>
<cmdp:Organisation xml:lang="eng">Institute for the Dutch Language</cmdp:Organisation>
<cmdp:Url>http://www.ivdnt.org/over-ons/contact/medewerkers</cmdp:Url>
</cmdp:Contact>
<cmdp:Duration/>
</cmdp:Project>
<cmdp:Creator>
<cmdp:Role>Developer</cmdp:Role>
<cmdp:Contact>
<cmdp:Person>Jan Theo Bakker</cmdp:Person>
<cmdp:Email>jantheo.bakker@ivdnt.org</cmdp:Email>
<cmdp:Organisation xml:lang="nld">Instituut voor de Nederlandse Taal</cmdp:Organisation>
<cmdp:Organisation xml:lang="eng">Institute for the Dutch Language</cmdp:Organisation>
<cmdp:Url>http://www.ivdnt.org/over-ons/contact/medewerkers</cmdp:Url>
</cmdp:Contact>
</cmdp:Creator>
</cmdp:SoftwareDevelopment>
<cmdp:TechnicalInfo>
<cmdp:ImplementationLanguage>
<cmdp:implementationLanguage>Java</cmdp:implementationLanguage>
<cmdp:version>unknown</cmdp:version>
</cmdp:ImplementationLanguage>
</cmdp:TechnicalInfo>
<cmdp:Service><!-- 0-unbounded -->
<cmdp:Name><!-- 1-1 -->
</cmdp:Name>
<cmdp:Description><!-- 0-unbounded -->
</cmdp:Description>
<cmdp:ServiceDescriptionLocation/>
<!-- 1-1 -->
<cmdp:Operations><!-- 1-1 -->
<cmdp:Operation><!-- 1-unbounded -->
<cmdp:Name>Format Conversion, tokenisation, part of speech tagging (the latter for Dutch)<!-- 1-1 -->
</cmdp:Name>
<cmdp:Description><!-- 0-unbounded -->
</cmdp:Description>
<cmdp:Input><!-- 0-1 -->
<cmdp:Parameter><!-- 0-unbounded -->
<cmdp:Name>input<!-- 1-1 --></cmdp:Name>
<cmdp:Description>input file name (File upload)<!-- 0-unbounded --></cmdp:Description>
<cmdp:DataType>xsd:string</cmdp:DataType>
<cmdp:isConfigurationParameter><!-- 0-1 -->false</cmdp:isConfigurationParameter>
<cmdp:DataCategory>http://hdl.handle.net/11459/CCR_C-3825_36820064-e2e2-a526-9eea-827cff915dbb</cmdp:DataCategory>
<cmdp:TechnicalMetadata><!-- 1-1 --></cmdp:TechnicalMetadata>
</cmdp:Parameter>
<cmdp:Parameter><!-- 0-unbounded -->
<cmdp:Name>format<!-- 1-1 --></cmdp:Name>
<cmdp:Description>Format of input file<!-- 0-unbounded --></cmdp:Description>
<cmdp:DataType>xsd:string</cmdp:DataType>
<cmdp:isConfigurationParameter><!-- 0-1 -->true</cmdp:isConfigurationParameter>
<cmdp:DataCategory><!-- 0-1 --></cmdp:DataCategory>
<cmdp:SemanticType><!-- 0-1 --></cmdp:SemanticType>
<cmdp:Values><!-- 0-1 -->
<cmdp:ParameterValue><!-- 1-unbounded -->
<cmdp:Value><!-- 1-1 -->tei</cmdp:Value>
<cmdp:Description><!-- 0-1 -->input file mimetype is application/tei+xml</cmdp:Description>
<cmdp:DataCategory><!-- 0-1 --></cmdp:DataCategory>
</cmdp:ParameterValue>
<cmdp:ParameterValue><!-- 1-unbounded -->
<cmdp:Value><!-- 1-1 -->html</cmdp:Value>
<cmdp:Description><!-- 0-1 -->input file mimetype is text/html</cmdp:Description>
<cmdp:DataCategory><!-- 0-1 --></cmdp:DataCategory>
</cmdp:ParameterValue>
<cmdp:ParameterValue><!-- 1-unbounded -->
<cmdp:Value><!-- 1-1 -->alto</cmdp:Value>
<cmdp:Description><!-- 0-1 -->input file mimetype is text/alto+xml</cmdp:Description>
<cmdp:DataCategory><!-- 0-1 --></cmdp:DataCategory>
</cmdp:ParameterValue>
<cmdp:ParameterValue><!-- 1-unbounded -->
<cmdp:Value><!-- 1-1 -->word</cmdp:Value>
<cmdp:Description><!-- 0-1 -->input file mimetype is application/msword</cmdp:Description>
<cmdp:DataCategory><!-- 0-1 --></cmdp:DataCategory>
</cmdp:ParameterValue>
<cmdp:ParameterValue><!-- 1-unbounded -->
<cmdp:Value><!-- 1-1 -->docx</cmdp:Value>
<cmdp:Description><!-- 0-1 -->input file mimetype is application/msword</cmdp:Description>
<cmdp:DataCategory><!-- 0-1 --></cmdp:DataCategory>
</cmdp:ParameterValue>
<cmdp:ParameterValue><!-- 1-unbounded -->
<cmdp:Value><!-- 1-1 -->epub</cmdp:Value>
<cmdp:Description><!-- 0-1 -->input file mimetype is application/epub+zip</cmdp:Description>
<cmdp:DataCategory><!-- 0-1 --></cmdp:DataCategory>
</cmdp:ParameterValue>
<cmdp:ParameterValue><!-- 1-unbounded -->
<cmdp:Value><!-- 1-1 -->text</cmdp:Value>
<cmdp:Description><!-- 0-1 -->input file mimetype is text/plain</cmdp:Description>
<cmdp:DataCategory><!-- 0-1 --></cmdp:DataCategory>
</cmdp:ParameterValue>
</cmdp:Values>
<cmdp:TechnicalMetadata><!-- 1-1 --></cmdp:TechnicalMetadata>
</cmdp:Parameter>
<cmdp:Parameter><!-- 0-unbounded -->
<cmdp:Name>to<!-- 1-1 --></cmdp:Name>
<cmdp:Description>Format of output file<!-- 0-unbounded --></cmdp:Description>
<cmdp:DataType>xsd:string</cmdp:DataType>
<cmdp:isConfigurationParameter><!-- 0-1 -->true</cmdp:isConfigurationParameter>
<cmdp:DataCategory><!-- 0-1 --></cmdp:DataCategory>
<cmdp:SemanticType><!-- 0-1 --></cmdp:SemanticType>
<cmdp:Values><!-- 0-1 -->
<cmdp:ParameterValue><!-- 1-unbounded -->
<cmdp:Value><!-- 1-1 -->tei</cmdp:Value>
<cmdp:Description><!-- 0-1 -->output file mimetype is application/tei+xml</cmdp:Description>
<cmdp:DataCategory><!-- 0-1 --></cmdp:DataCategory>
</cmdp:ParameterValue>
<cmdp:ParameterValue><!-- 1-unbounded -->
<cmdp:Value><!-- 1-1 -->folia</cmdp:Value>
<cmdp:Description><!-- 0-1 -->output file mimetype is text/folia+xml</cmdp:Description>
<cmdp:DataCategory><!-- 0-1 --></cmdp:DataCategory>
</cmdp:ParameterValue>
</cmdp:Values>
<cmdp:TechnicalMetadata><!-- 1-1 --></cmdp:TechnicalMetadata>
</cmdp:Parameter>
<cmdp:Parameter><!-- 0-unbounded -->
<cmdp:Name>tagger<!-- 1-1 --></cmdp:Name>
<cmdp:Description>to specify the tagger or tokeniser<!-- 0-unbounded --></cmdp:Description>
<cmdp:DataType>xsd:string</cmdp:DataType>
<cmdp:isConfigurationParameter><!-- 0-1 -->true</cmdp:isConfigurationParameter>
<cmdp:DataCategory><!-- 0-1 --></cmdp:DataCategory>
<cmdp:SemanticType><!-- 0-1 --></cmdp:SemanticType>
<cmdp:Values><!-- 0-1 -->
<cmdp:ParameterValue><!-- 1-unbounded -->
<cmdp:Value><!-- 1-1 -->chn-tagger</cmdp:Value>
<cmdp:Description><!-- 0-1 -->Basic tagger-lemmatizer for modern Dutch</cmdp:Description>
<cmdp:DataCategory><!-- 0-1 --></cmdp:DataCategory>
</cmdp:ParameterValue>
<cmdp:ParameterValue><!-- 1-unbounded -->
<cmdp:Value><!-- 1-1 -->tokenizer</cmdp:Value>
<cmdp:Description><!-- 0-1 -->a TEI tokenizer</cmdp:Description>
<cmdp:DataCategory><!-- 0-1 --></cmdp:DataCategory>
</cmdp:ParameterValue>
</cmdp:Values>
<cmdp:TechnicalMetadata><!-- 1-1 --></cmdp:TechnicalMetadata>
</cmdp:Parameter>
</cmdp:Input>
<cmdp:Output><!-- 1-1 --></cmdp:Output>
</cmdp:Operation>
</cmdp:Operations>
</cmdp:Service>
</cmdp:ClarinSoftwareDescription>
</cmd:Components>
</cmd:CMD>
Organisation:
- Institute for the Dutch Language
- Utrecht University
- Instituut voor de Nederlandse Taal