@inproceedings{Dereza:CLTW:2016,
    author = "Dereza, Oksana",
    title = "Building a Dictionary-Based Lemmatizer for Old Irish",
    booktitle = "Actes de la conf\'erence conjointe JEP-TALN-RECITAL 2016. Volume 6 : CLTW",
    month = "7",
    year = "2016",
    address = "Paris, France",
    publisher = "Association pour le Traitement Automatique des Langues",
    pages = "12-17",
    note = "Le d\'eveloppement d'un programme de lemmatisation pour le vieil irlandais",
    abstract = "This paper explores the problem of developing NLP tools for morphologically rich and orthographically inconsistent classical languages. It is a case study of building a lemmatizer for Old Irish using only a dictionary and an unlabeled corpus as sources of data. At the current stage, the lemmatizer shows 76.31\\% average recall score on a corpus of ca. 100,000 tokens and is able to predict lemmas for out-of-vocabulary words. However, as it is the work in progress, the lemmatizer lacks some functionality such as disambiguation. There is no gold standard to measure accuracy yet either.",
    keywords = "lemmatisation, lemma, Old Irish, Middle Irish, Damerau-Levenshtein distance, unlabelled data, automatic morphological analysis.",
    url = "http://talnarchives.atala.org/ateliers/2016/CLTW/2.pdf"
}
