@inbook{cf9b6271087240ee9e497d82a3292d29,
title = "Page Embeddings: Extracting and Classifying Historical Documents with Generic Vector Representations",
abstract = "We propose a neural network architecture designed to generate region and page embeddings for boundary detection and classification of documents within a large and heterogeneous historical archive. Our approach is versatile and can be applied to other tasks and datasets. This method enhances the accessibility of historical archives and promotes a more inclusive utilization of historical materials.",
keywords = "Natural Language Processing, Sequence Tagging, Document Metadata Enhancement, Machine Learning",
author = "Carsten Schnober and Renate Smit and Manjusha Kuruppath and Kay Pepping and {van Wissen}, Leon and Lodewijk Petram",
year = "2024",
month = nov,
day = "18",
language = "English",
volume = "3834",
series = "CEUR Workshop Proceedings",
publisher = "CEUR Workshop Proceedings",
pages = "999--1011",
booktitle = "Proceedings of the Computational Humanities Research Conference 2024",
}