Frog: An advanced Natural Language Processing suite for Dutch

<?xml version="1.0" encoding="UTF-8"?>
<cmd:CMD xmlns:cmd="http://www.clarin.eu/cmd/1"
         xmlns:cmdp="http://www.clarin.eu/cmd/1/profiles/clarin.eu:cr1:p_1342181139640"
         xmlns:xsi="http://www.w3.org/2001/XMLSchema-instance"
         CMDVersion="1.2"
         xsi:schemaLocation="http://www.clarin.eu/cmd/1 https://infra.clarin.eu/CMDI/1.x/xsd/cmd-envelop.xsd http://www.clarin.eu/cmd/1/profiles/clarin.eu:cr1:p_1342181139640 https://catalog.clarin.eu/ds/ComponentRegistry/rest/registry/1.1/profiles/clarin.eu:cr1:p_1342181139640/1.2/xsd">
   <cmd:Header>
      <cmd:MdCreator>bobboelhouwer</cmd:MdCreator>
      <cmd:MdCreationDate>2016-09-05+02:00</cmd:MdCreationDate>
      <cmd:MdProfile>clarin.eu:cr1:p_1342181139640</cmd:MdProfile>
      <cmd:MdCollectionDisplayName>CLARIN Netherlands</cmd:MdCollectionDisplayName>
   </cmd:Header>
   <cmd:Resources>
      <cmd:ResourceProxyList>
		       <cmd:ResourceProxy id="FROG001">
			         <cmd:ResourceType>LandingPage</cmd:ResourceType>
			         <cmd:ResourceRef>http://tst-centrale.org/nl/tst-materialen/tools/frog-detail</cmd:ResourceRef>
		       </cmd:ResourceProxy>
		       <cmd:ResourceProxy id="FROG002">
			         <cmd:ResourceType>Resource</cmd:ResourceType>
			         <cmd:ResourceRef>https://webservices-lst.science.ru.nl/frog/</cmd:ResourceRef>
		       </cmd:ResourceProxy>
	     </cmd:ResourceProxyList>
      <cmd:JournalFileProxyList/>
      <cmd:ResourceRelationList/>
   </cmd:Resources>
   <cmd:Components>
      <cmdp:ClarinSoftwareDescription>
         <cmdp:GeneralInfo>
            <cmdp:name xml:lang="eng">Frog</cmdp:name>
            <cmdp:title xml:lang="eng">Frog: An advanced Natural Language Processing suite for Dutch</cmdp:title>
            <cmdp:publicationYear>2014</cmdp:publicationYear>
            <cmdp:url>http://tst-centrale.org/nl/tst-materialen/tools/frog-detail</cmdp:url>
            <cmdp:CLARINCentre>Dutch Language Institute</cmdp:CLARINCentre>
            <cmdp:ReleaseStatus>
               <cmdp:LifeCycleStatus>published</cmdp:LifeCycleStatus>
               <cmdp:lastUpdate>2016-07-11</cmdp:lastUpdate>
            </cmdp:ReleaseStatus>
            <cmdp:NationalProjects>
               <cmdp:Project>
                  <cmdp:name>CLARIN-NL</cmdp:name>
                  <cmdp:title>CLARIN in the Netherlands</cmdp:title>
                  <cmdp:id>184.021.003</cmdp:id>
                  <cmdp:funder>NWO</cmdp:funder>
                  <cmdp:url>http://www.clarin.nl</cmdp:url>
                  <cmdp:Contact>
                     <cmdp:Person>Jan Odijk</cmdp:Person>
                     <cmdp:Role>National Coordinator</cmdp:Role>
                     <cmdp:Address>Utrecht, the Netherlands</cmdp:Address>
                     <cmdp:Email>j.odijk@uu.nl</cmdp:Email>
                     <cmdp:Department>UiL-OTS</cmdp:Department>
                     <cmdp:Organisation>Utrecht University</cmdp:Organisation>
                  </cmdp:Contact>
                  <cmdp:Duration>
                     <cmdp:StartYear>2009</cmdp:StartYear>
                     <cmdp:CompletionYear>2015</cmdp:CompletionYear>
                  </cmdp:Duration>
               </cmdp:Project>
               <cmdp:Project>
                  <cmdp:name>CLARIAH-CORE</cmdp:name>
                  <cmdp:title>Common Lab Research Infrastructure for the Arts and the Humanities</cmdp:title>
                  <cmdp:id>184.033.101</cmdp:id>
                  <cmdp:funder>NWO</cmdp:funder>
                  <cmdp:url>http://www.clariah.nl</cmdp:url>
                  <cmdp:Contact>
                     <cmdp:Person>Jan Odijk</cmdp:Person>
                     <cmdp:Role>National Coordinator</cmdp:Role>
                     <cmdp:Address>Utrecht, the Netherlands</cmdp:Address>
                     <cmdp:Email>j.odijk@uu.nl</cmdp:Email>
                     <cmdp:Department>UiL-OTS</cmdp:Department>
                     <cmdp:Organisation>Utrecht University</cmdp:Organisation>
                  </cmdp:Contact>
                  <cmdp:Duration>
                     <cmdp:StartYear>2015</cmdp:StartYear>
                     <cmdp:CompletionYear>2018</cmdp:CompletionYear>
                  </cmdp:Duration>
               </cmdp:Project>
            </cmdp:NationalProjects>
            <cmdp:Country>
               <cmdp:CountryName>Netherlands</cmdp:CountryName>
               <cmdp:CountryCoding>NL</cmdp:CountryCoding>
            </cmdp:Country>
            <cmdp:Description>
	              <cmdp:Description xml:lang="eng">Frog's current version will tokenize, tag, lemmatize, and morphologically segment word tokens in Dutch text files, will assign a dependency graph to each sentence, will identify the base phrase chunks in the sentence, and will attempt to find and label all named entities.</cmdp:Description>
            </cmdp:Description>
         </cmdp:GeneralInfo>
         <cmdp:SoftwareFunction>
            <cmdp:toolCategory>written language tool</cmdp:toolCategory>
            <cmdp:toolCategory>mono-lingual tool</cmdp:toolCategory>
            <cmdp:ToolTasks>
               <cmdp:toolTask>dependency parsing</cmdp:toolTask>
               <cmdp:toolTask>lemmatisation</cmdp:toolTask>
	              <cmdp:toolTask>morphological analysis</cmdp:toolTask>
	              <cmdp:toolTask>named entity recognition</cmdp:toolTask>
	              <cmdp:toolTask>part of speech tagging</cmdp:toolTask>
	              <cmdp:toolTask>sentence splitting</cmdp:toolTask>
	              <cmdp:toolTask>tokenisation</cmdp:toolTask>
			         </cmdp:ToolTasks>
            <cmdp:ResearchPhases>
               <cmdp:ResearchPhase>Enriching Data</cmdp:ResearchPhase>
            </cmdp:ResearchPhases>
            <cmdp:ResearchDomains>
				           <cmdp:researchDomain>Linguistics</cmdp:researchDomain>
			         </cmdp:ResearchDomains>
            <cmdp:LinguisticsSubject>
               <cmdp:linguisticsSubject>general linguistics</cmdp:linguisticsSubject>
	              <cmdp:Description>
		                <cmdp:Description/>
	              </cmdp:Description>
            </cmdp:LinguisticsSubject>
            <cmdp:LinguisticsSubject>
               <cmdp:linguisticsSubject>syntax</cmdp:linguisticsSubject>
	              <cmdp:Description>
		                <cmdp:Description/>
	              </cmdp:Description>
            </cmdp:LinguisticsSubject>
            <cmdp:LanguageVariety>
               <cmdp:languageDependent>yes</cmdp:languageDependent>
               <cmdp:Language>
                  <cmdp:LanguageName>Dutch</cmdp:LanguageName>
                  <cmdp:ISO639>
                     <cmdp:iso-639-3-code>nld</cmdp:iso-639-3-code>
                  </cmdp:ISO639>
               </cmdp:Language>
               <cmdp:Centuries>
					             <cmdp:centuryDependent>yes</cmdp:centuryDependent>
					             <cmdp:CenturyInterval>
					                <cmdp:centuryFrom>20</cmdp:centuryFrom>
					                <cmdp:centuryThrough>21</cmdp:centuryThrough>
					             </cmdp:CenturyInterval>
				           </cmdp:Centuries>
            </cmdp:LanguageVariety>
         </cmdp:SoftwareFunction>
         <cmdp:SoftwareImplementation>
            <cmdp:distributionMedium>Download</cmdp:distributionMedium>
            <cmdp:UserInterface>
               <cmdp:interfaceType>command line interface</cmdp:interfaceType>
               <cmdp:applicationType>local desktop</cmdp:applicationType>
            </cmdp:UserInterface>
            <cmdp:Input>
               <cmdp:inputType>text</cmdp:inputType>
            </cmdp:Input>
            <cmdp:Output>
               <cmdp:outputType>text</cmdp:outputType>
               <cmdp:MimeType>
		                <cmdp:MimeType>text/plain</cmdp:MimeType>
		                <cmdp:MimeType>text/xml</cmdp:MimeType>
	              </cmdp:MimeType>
            </cmdp:Output>
         </cmdp:SoftwareImplementation>
         <cmdp:Access>
            <cmdp:ResourceLicense>
               <cmdp:license>unknown</cmdp:license>
               <cmdp:distributionType>public</cmdp:distributionType>
               <cmdp:url/>
               <cmdp:Price>
                  <cmdp:amount>0</cmdp:amount>
                  <cmdp:ISO4217>
                     <cmdp:iso-4217-currency>EUR</cmdp:iso-4217-currency>
                  </cmdp:ISO4217>
               </cmdp:Price>
            </cmdp:ResourceLicense>
            <cmdp:Contact>
               <cmdp:Email>servicedesk@ivdnt.org</cmdp:Email>
               <cmdp:Organisation xml:lang="nld">Instituut voor de Nederlandse Taal</cmdp:Organisation>
               <cmdp:Organisation xml:lang="eng">Institute for the Dutch Language</cmdp:Organisation>
               <cmdp:Url>http://www.ivdnt.org/</cmdp:Url>
            </cmdp:Contact>
         </cmdp:Access>
         <cmdp:ResourceDocumentation>
            <cmdp:Documentation>
               <cmdp:title>Frog: A Natural Language Processing Suite for Dutch</cmdp:title>
               <cmdp:documentationTarget>user</cmdp:documentationTarget>
               <cmdp:url>https://github.com/LanguageMachines/frog/raw/master/docs/frogmanual.pdf</cmdp:url>
               <cmdp:ISO639>
                  <cmdp:iso-639-3-code>eng</cmdp:iso-639-3-code>
               </cmdp:ISO639>
            </cmdp:Documentation>
            <cmdp:Documentation>
               <cmdp:title>Frog: An advanced Natural Language Processing suite for Dutch</cmdp:title>
               <cmdp:documentationTarget>user</cmdp:documentationTarget>
               <cmdp:url>https://languagemachines.github.io/frog/</cmdp:url>
               <cmdp:ISO639>
                  <cmdp:iso-639-3-code>eng</cmdp:iso-639-3-code>
               </cmdp:ISO639>
            </cmdp:Documentation>
		          <cmdp:Publication>
		             <cmdp:publicationCategory>in proceedings</cmdp:publicationCategory>
		             <cmdp:publicationPurpose>scientific background</cmdp:publicationPurpose>
		             <cmdp:peerReviewStatus>yes</cmdp:peerReviewStatus>
		             <cmdp:Description>
		                <cmdp:Description LanguageID="eng">Van den Bosch, A., Busser, G.J., Daelemans, W., and Canisius, S. (2007). An efficient memory-based morphosyntactic tagger and parser for Dutch, In F. van Eynde, P. Dirix, I. Schuurman, and V. Vandeghinste (Eds.), Selected Papers of the 17th Computational Linguistics in the Netherlands Meeting, Leuven, Belgium, pp. 99-114
		   </cmdp:Description>
		             </cmdp:Description>
		          </cmdp:Publication>
         </cmdp:ResourceDocumentation>
         <cmdp:SoftwareDevelopment>
            <cmdp:Project>
               <cmdp:name>CGN</cmdp:name>
               <cmdp:title>Corpus Gesproken Nederlands</cmdp:title>
               <cmdp:funder>NWO</cmdp:funder>
               <cmdp:url/>
               <cmdp:Contact>
		                <cmdp:Person/>
		                <cmdp:Email/>
		                <cmdp:Organisation xml:lang="eng"/>
	              </cmdp:Contact>
               <cmdp:Duration/>
            </cmdp:Project>
            <cmdp:Project>
               <cmdp:name>CGN</cmdp:name>
               <cmdp:title>Corpus Gesproken Nederlands</cmdp:title>
               <cmdp:funder>NWO</cmdp:funder>
               <cmdp:url/>
               <cmdp:Contact>
		                <cmdp:Person/>
		                <cmdp:Email/>
		                <cmdp:Organisation xml:lang="eng"/>
	              </cmdp:Contact>
               <cmdp:Duration/>
            </cmdp:Project>
            <cmdp:Project>
               <cmdp:name>CLARIN-NL</cmdp:name>
               <cmdp:title/>
               <cmdp:funder>NWO</cmdp:funder>
               <cmdp:url/>
               <cmdp:Contact>
		                <cmdp:Person/>
		                <cmdp:Email/>
		                <cmdp:Organisation xml:lang="eng"/>
	              </cmdp:Contact>
               <cmdp:Duration/>
            </cmdp:Project>
            <cmdp:Project>
               <cmdp:name>CLARIAH</cmdp:name>
               <cmdp:title/>
               <cmdp:funder>NWO</cmdp:funder>
               <cmdp:url/>
               <cmdp:Contact>
		                <cmdp:Person/>
		                <cmdp:Email/>
		                <cmdp:Organisation xml:lang="eng"/>
	              </cmdp:Contact>
               <cmdp:Duration/>
            </cmdp:Project>
            <cmdp:Creator>
               <cmdp:Contact>
		                <cmdp:Person>Antal van den Bosch</cmdp:Person>
		                <cmdp:Email/>
		                <cmdp:Organisation xml:lang="eng"/>
	              </cmdp:Contact>
            </cmdp:Creator>
         </cmdp:SoftwareDevelopment>
         <cmdp:TechnicalInfo>
            <cmdp:ImplementationLanguage>
               <cmdp:implementationLanguage>unknown</cmdp:implementationLanguage>
               <cmdp:version>unknown</cmdp:version>
            </cmdp:ImplementationLanguage>
         </cmdp:TechnicalInfo>
         <?ignore <LRSSection><!-- 0-1 -->
	<LRS><!-- 1- unbounded -->
		<Description><!-- 0-1 --><Description>Frog (plain text input)</Description></Description>
		<Tasks><!-- 1-1 -->
			<toolTask>dependency parsing</toolTask>
			<toolTask>lemmatisation</toolTask>
			<toolTask>morphological analysis</toolTask>
			<toolTask>named entity recognition</toolTask>
			<toolTask>part of speech tagging</toolTask>
			<toolTask>sentence splitting</toolTask>
			<toolTask>tokenisation</toolTask>
		</Tasks>
		<Input><MimeTypes><MimeType>text/plain</MimeType></MimeTypes></Input>
		<Output><MimeTypes><MimeType>text/csv</MimeType></MimeTypes>
                <Schema>Tadpole Columned Output Format</Schema> 
		</Output>
		<Output><MimeTypes><MimeType>text/xml+folia</MimeType></MimeTypes>
                <Schema>FoLiA</Schema> 
		</Output>
		<ActualParameters><!--0-1 -->
			<ActualParameter><!--1 - unbounded -->
				<ActualParameterName>project</ActualParameterName>
				<ActualParameterValue>new</ActualParameterValue>
			</ActualParameter>
			<ActualParameter><!--1 - unbounded -->
				<ActualParameterName>input</ActualParameterName>
				<ActualParameterValue>self.linkToResource</ActualParameterValue>
			</ActualParameter>
		</ActualParameters>
		<LRSMapping><!-- 0-unbounded -->
		   <LRSParameterName>input</LRSParameterName>
		   <ActualParameterName>maininput_url</ActualParameterName>
		</LRSMapping>
	</LRS>
	<LRS><!-- 1- unbounded -->
		<Description><!-- 0-1 --><Description>Frog (folia+xml input)</Description></Description>
		<ToolTasks><!-- 1-1 -->
			<toolTask>dependency parsing</toolTask>
			<toolTask>lemmatisation</toolTask>
			<toolTask>morphological analysis</toolTask>
			<toolTask>named entity recognition</toolTask>
			<toolTask>part of speech tagging</toolTask>
			<toolTask>sentence splitting</toolTask>
			<toolTask>tokenisation</toolTask>
		</ToolTasks>
		<Input><MimeTypes><MimeType>text/xml+folia</MimeType></MimeTypes></Input>
		<Output><MimeTypes><MimeType>text/csv</MimeType></MimeTypes>
                <Schema>Tadpole Columned Output Format</Schema> 
		</Output>
		<Output><MimeTypes><MimeType>text/xml+folia</MimeType></MimeTypes>
                <Schema>FoLiA</Schema> 
		</Output>
		<ActualParameters><!--0-1 -->
			<ActualParameter><!--1 - unbounded -->
				<ActualParameterName>project</ActualParameterName>
				<ActualParameterValue>new</ActualParameterValue>
			</ActualParameter>
			<ActualParameter><!--1 - unbounded -->
				<ActualParameterName>input</ActualParameterName>
				<ActualParameterValue>self.linkToResource</ActualParameterValue>
			</ActualParameter>
		</ActualParameters>
		<LRSMapping><!-- 0-unbounded -->
		   <LRSParameterName>input</LRSParameterName>
		   <ActualParameterName>foliainput_url</ActualParameterName>
		</LRSMapping>
	</LRS>
</LRSSection>
?>
      </cmdp:ClarinSoftwareDescription>
   </cmd:Components>
</cmd:CMD>
Organisation:
  • Institute for the Dutch Language
  • Utrecht University
  • Instituut voor de Nederlandse Taal

Resources:

Resource

text/plain