Abstract |
This contribution aims to show the necessity of working in the development of multilingual corpora and appropriate tools for multilingual contrastive studies. We take the corpus of the lexicographical project COMBIDIGILEX as example to show, how difficult it is to build a suitable data basis to study and compare linguistic phenomena in German, Spanish and Portuguese. Despite the availability of big reference corpora for the three languages (at least for written language), it is not able to obtain a comparable data basis from, because the mentioned corpora are created according to different requirements and they are also powered by disparate information systems and analyse tools. To break the status quo, we plead for increasing research infrastructures by means of compatible language technology and sharing data. |
BibTex |
@inproceedings{euralex_mannheim_vergleichbare_2022, address = {Mannheim}, title = {Vergleichbare {Korpora} für multilinguale kontrastive {Studien}. {Herausforderungen} und {Desiderata}}, isbn = {978-3-937241-87-6}, shorttitle = {Euralex (2022)}, url = {}, language = {de}, booktitle = {Dictionaries and {Society}. {Proceedings} of the {XX} {EURALEX} {International} {Congress}}, publisher = {IDS-Verlag}, author = {Meliss, Meike and González Ribao, Vanessa}, editor = {Klosa-Kückelhaus, Annette and Engelberg, Stefan and Möhrs, Christine and Storjohann, Petra}, year = {2022}, pages = {253--261}, } |