Abstract |
Estimating the quality of lexical data automatically linked on the sense level is challenging, as the quality of the predicted sense links can differ significantly across various datasets. This variability is especially problematic when quality estimation is limited to general statements about an extensive collection of sense pairs, such as the links between two entire dictionaries. We argue that estimating probabilities for individual sense pairs is a superior method for quality estimation for two reasons: Firstly, it allows us to draw more nuanced conclusions about the quality of linked lexical data. Secondly, it opens the door for merging automated with manual means of sense linking by pointing lexicographers towards sense pairs that are especially difficult to classify. We propose a method for generating such probability estimates for a supervised machine learning approach. We show that these probabilities successfully dissect the sense pairs based on the certainty of the classification algorithm, thereby enabling lexicographers to analyse and improve the quality of automatically linked lexical data effectively. |
BibTex |
@inproceedings{ELX2020_2021-010, address = {Alexandroupolis}, title = {Principled {Quality} {Estimation} for {Dictionary} {Sense} {Linking}}, isbn = {978-618-85138-1-5}, url = {https://www.euralex.org/elx_proceedings/Euralex2020-2021/EURALEX2020-2021_Vol1-p101-108.pdf}, language = {en}, booktitle = {Lexicography for {Inclusion}: {Proceedings} of the 19th {EURALEX} {International} {Congress}, 7-9 {September} 2021, {Alexandroupolis}, {Vol}. 1}, publisher = {Democritus University of Thrace}, author = {Grosse, Julian and Saurí, Roser}, editor = {Gavriilidou, Zoe and Mitsiaki, Maria and Fliatouras, Asimakis}, year = {2020}, pages = {101--108},} |