@inproceedings{224f1298327f494f8089d669aff4f061,
title = "MTFL: Multi-Timescale Feature Learning for Weakly-supervised Anomaly Detection in Surveillance Videos",
abstract = "Detection of anomaly events is relevant for public safety and requires a combination of fine-grained motion information and contextual events at variable time-scales. To this end, we propose a Multi-Timescale Feature Learning (MTFL) method to enhance the representation of anomaly features. Short, medium, and long temporal tubelets are employed to extract spatio-temporal video features using a Video Swin Transformer. Experimental results demonstrate that MTFL outperforms state-of-the-art methods on the UCF-Crime dataset, achieving an anomaly detection performance 89.78% AUC. Moreover, it performs complementary to SotA with 95.32% AUC on the ShanghaiTech and 84.57% AP on the XD-Violence dataset. Furthermore, we generate an extended dataset of the UCF-Crime for development and evaluation on a wider range of anomalies, namely Video Anomaly Detection Dataset (VADD), involving 2,591 videos in 18 classes with extensive coverage of realistic anomalies.",
keywords = "Anomaly detection, Surveillance videos, Video understanding",
author = "Yiling Zhang and Erkut Akdag and Egor Bondarev and {de With}, {Peter H.N.}",
year = "2025",
month = feb,
day = "24",
doi = "10.1117/12.3055069",
language = "English",
isbn = "9781510688278",
series = "Proceedings of SPIE - The International Society for Optical Engineering",
publisher = "SPIE",
editor = "Wolfgang Osten",
booktitle = "Seventeenth International Conference on Machine Vision, ICMV 2024",
address = "United States",
note = "17th International Conference on Machine Vision, ICMV 2024 ; Conference date: 10-10-2024 Through 13-10-2024",
}