@inproceedings{Herron:JEP-TALN:2024,
    author = "Herron, Felix",
    title = "An evaluation of current benchmarking strategies for French biomedical language models",
    booktitle = "Actes de JEP-TALN-RECITAL 2024. Actes de la 26\`eme Rencontre des \'Etudiants Chercheurs en Informatique  pour le Traitement Automatique des Langues",
    month = "7",
    year = "2024",
    address = "Toulouse, France",
    publisher = "Association pour le Traitement Automatique des Langues",
    pages = "1-16",
    note = "\'Evaluation de benchmarking actuel pour des mod\`eles de langage biom\'edicaux fran\c{c}ais",
    abstract = "We describe the current state of benchmarking for French language biomedical natural language processing (NLP). We note two important criteria in biomedical benchmarking: first, that a biomedical benchmark clearly simulate a specific use cases, in order to offer a useful evaluation of a biomedical model's real life applicability. Second: that a biomedical benchmark be created in collaboration with biomedical professionals. We note that many biomedical benchmarks, particularly in French, do not adhere to these criteria; however, we highlight other biomedical benchmarks which adhere better to those criteria. Furthermore, we evaluate some of the most common French biomedical benchmarks on an array of models and empirically support the necessity of domain-specific and language-specific pre-training for natural language understanding (NLU) tasks. We show that some popular French biomedical language models perform poorly and/or inconsistently on important biomedical tasks. Finally, we advocate for an increase in publicly available, clinically targeted French biomedical NLU benchmarks.",
    keywords = "Benchmarking,mod\'elisation de langage biom\'edicale,apprentissage profond",
    url = "1711.pdf"
}
