The CLASSLA-StanfordNLP model for UD dependency parsing of standard Bulgarian 1.0

<?xml version="1.0" encoding="UTF-8"?>
<cmd:CMD xmlns:cmd="http://www.clarin.eu/cmd/1"
         xmlns:cmdp="http://www.clarin.eu/cmd/1/profiles/clarin.eu:cr1:p_1403526079380"
         xmlns:xsi="http://www.w3.org/2001/XMLSchema-instance"
         CMDVersion="1.2"
         xsi:schemaLocation="http://www.clarin.eu/cmd/1 https://infra.clarin.eu/CMDI/1.x/xsd/cmd-envelop.xsd http://www.clarin.eu/cmd/1/profiles/clarin.eu:cr1:p_1403526079380 https://catalog.clarin.eu/ds/ComponentRegistry/rest/registry/1.x/profiles/clarin.eu:cr1:p_1403526079380/xsd">
   <cmd:Header>
      <cmd:MdCreationDate>2023-06-29</cmd:MdCreationDate>
      <cmd:MdSelfLink>https://hdl.handle.net/11356/1328@format=cmdi</cmd:MdSelfLink>
      <cmd:MdProfile>clarin.eu:cr1:p_1403526079380</cmd:MdProfile>
      <cmd:MdCollectionDisplayName>CLARIN.SI data &amp; tools</cmd:MdCollectionDisplayName>
   </cmd:Header>
   <cmd:Resources>
      <cmd:ResourceProxyList>
            <cmd:ResourceProxy id="lp_1863">
                <cmd:ResourceType>LandingPage</cmd:ResourceType>
                <cmd:ResourceRef>https://hdl.handle.net/11356/1328</cmd:ResourceRef>
            </cmd:ResourceProxy>
            <cmd:ResourceProxy id="uri_1">
                <cmd:ResourceType mimetype="text/html">Resource</cmd:ResourceType>
                <cmd:ResourceRef>https://github.com/clarinsi/classla-stanfordnlp</cmd:ResourceRef>
            </cmd:ResourceProxy>
            <cmd:ResourceProxy id="_3457">
                <cmd:ResourceType mimetype="application/octet-stream">Resource</cmd:ResourceType>
                <cmd:ResourceRef xmlns:lindat="http://lindat.mff.cuni.cz/ns/experimental/cmdi"
                             lindat:md5_checksum="b0198a347315e47c356bd7f8af68b567">https://www.clarin.si/repository/xmlui/bitstream/handle/11356/1328/BTB_ud?sequence=1</cmd:ResourceRef>
            </cmd:ResourceProxy>
            <cmd:ResourceProxy id="_3458">
                <cmd:ResourceType mimetype="application/octet-stream">Resource</cmd:ResourceType>
                <cmd:ResourceRef xmlns:lindat="http://lindat.mff.cuni.cz/ns/experimental/cmdi"
                             lindat:md5_checksum="54093d4962f7dbe48aba1dbddc836ffa">https://www.clarin.si/repository/xmlui/bitstream/handle/11356/1328/BTB_ud.pretrain.pt?sequence=2</cmd:ResourceRef>
            </cmd:ResourceProxy>
        </cmd:ResourceProxyList>
      <cmd:JournalFileProxyList/>
      <cmd:ResourceRelationList/>
   </cmd:Resources>
   <cmd:Components>
        <cmdp:LINDAT_CLARIN>
            <cmdp:bibliographicInfo>
                <cmdp:projectUrl>https://github.com/clarinsi/classla-stanfordnlp</cmdp:projectUrl>
                <cmdp:titles>
                    <cmdp:title xml:lang="en">The CLASSLA-StanfordNLP model for UD dependency parsing of standard Bulgarian 1.0</cmdp:title>
                </cmdp:titles>
                <cmdp:authors>
                    <cmdp:author>
                        <cmdp:lastName>Ljubešić</cmdp:lastName>
                        <cmdp:firstName> Nikola</cmdp:firstName>
                    </cmdp:author>
                    <cmdp:author>
                        <cmdp:lastName>Osenova</cmdp:lastName>
                        <cmdp:firstName> Petya</cmdp:firstName>
                    </cmdp:author>
                    <cmdp:author>
                        <cmdp:lastName>Simov</cmdp:lastName>
                        <cmdp:firstName> Kiril</cmdp:firstName>
                    </cmdp:author>
                </cmdp:authors>
                <cmdp:dates>
                    <cmdp:dateIssued>2020-06-24</cmdp:dateIssued>
                </cmdp:dates>
                <cmdp:identifiers>
                    <cmdp:identifier type="Handle">https://hdl.handle.net/11356/1328</cmdp:identifier>
                </cmdp:identifiers>
                <cmdp:funds>
                    <cmdp:funding>
                        <cmdp:organization>ARRS (Slovenian Research Agency)</cmdp:organization>
                        <cmdp:code>P6-0411</cmdp:code>
                        <cmdp:projectName>Language Resources and Technologies for Slovene</cmdp:projectName>
                        <cmdp:fundsType>nationalFunds</cmdp:fundsType>
                    </cmdp:funding>
                    <cmdp:funding>
                        <cmdp:organization>Ministry of Education and Science Republic of Bulgaria</cmdp:organization>
                        <cmdp:code>DO01-272/16.12.2019</cmdp:code>
                        <cmdp:projectName>Bulgarian National Interdisciplinary Research e-Infrastructure for Resources and Technologies CLaDA-BG</cmdp:projectName>
                        <cmdp:fundsType>nationalFunds</cmdp:fundsType>
                    </cmdp:funding>
                </cmdp:funds>
                <cmdp:contactPerson>
                    <cmdp:firstName>Nikola</cmdp:firstName>
                    <cmdp:lastName>Ljubešić</cmdp:lastName>
                    <cmdp:email>nikola.ljubesic@ijs.si</cmdp:email>
                    <cmdp:affiliation>Jožef Stefan Institute</cmdp:affiliation>
                </cmdp:contactPerson>
                <cmdp:publishers>
                    <cmdp:publisher>Jožef Stefan Institute</cmdp:publisher>
                </cmdp:publishers>
            </cmdp:bibliographicInfo>
            <cmdp:dataInfo>
                <cmdp:type>toolService</cmdp:type>
                <cmdp:detailedType>tool</cmdp:detailedType>
                <cmdp:description>The model for UD dependency parsing of standard Bulgarian was built with the CLASSLA-StanfordNLP tool (https://github.com/clarinsi/classla-stanfordnlp) by training on the UD-parsed portion of the BulTreeBank training corpus (http://hdl.handle.net/11495/D93F-C6E9-65D9-2) and using the  CoNLL2017 word embeddings (http://hdl.handle.net/11234/1-1989). The estimated LAS of the parser is ~91.5.</cmdp:description>
                <cmdp:languages>
                    <cmdp:language>
                        <cmdp:code>bul</cmdp:code>
                        <cmdp:name>Bulgarian</cmdp:name>
                    </cmdp:language>
                </cmdp:languages>
                <cmdp:keywords>
                    <cmdp:keyword>parsing</cmdp:keyword>
                    <cmdp:keyword>language model</cmdp:keyword>
                </cmdp:keywords>
            </cmdp:dataInfo>
            <cmdp:licenseInfo>
                <cmdp:license>
                    <cmdp:uri>https://creativecommons.org/licenses/by-sa/4.0/</cmdp:uri>
                </cmdp:license>
            </cmdp:licenseInfo>
        </cmdp:LINDAT_CLARIN>
    </cmd:Components>
</cmd:CMD>
Licence:
  • https://creativecommons.org/licenses/by-sa/4.0/

Resources:

Resource

text/html

Resource

application/octet-stream

Resource

application/octet-stream