Abstract |
The objective of this paper is to demonstrate that corpus-driven bilingual dictionaries generated fully by automatic means are suitable for human use. Previous experiments have proven that bilingual resources can be created by applying word alignment on parallel corpora and such resources are useful for bilingual dictionary compilation purposes. Moreover, the corpus-driven nature of the method yields several advantages over more traditional approaches. Most importantly, the exploitation of parallel corpora decreases the reliance on human intuition during dictionary building. However, the proposed technique has to face some difficulties, as well. First, the scarce availability of parallel texts for medium density languages imposes limitations on the size of the resulting dictionary. Secondly, the resulting bilingual resource is not completely clean: that is, wrong translation candidates are also included in the dictionary. In fact, there is a tight correlation between the proportion of wrong candidates and the size of the resulting resource. Our objective is to design and implement a dictionary a query system that is apt to exploit the additional benefits of the dictionary building method and overcome the disadvantages of it. |
BibTex |
@InProceedings{ELX12-033, author = {Enikő Héja and Dávid Takács}, title = {An Online Dictionary Browser for Automatically Generated Bilingual Dictionaries}, pages = {468--477}, booktitle = {Proceedings of the 15th EURALEX International Congress}, year = {2012}, month = {aug}, date = {7-11}, address = {Oslo,Norway}, editor = {Ruth Vatvedt Fjeld and Julie Matilde Torjusen}, publisher = {Department of Linguistics and Scandinavian Studies, University of Oslo}, isbn = {978-82-303-2228-4}, } |