{ "id": "1605.06177", "version": "v1", "published": "2016-05-20T00:03:42.000Z", "updated": "2016-05-20T00:03:42.000Z", "title": "Fine-Grained Classification of Pedestrians in Video: Benchmark and State of the Art", "authors": [ "David Hall", "Pietro Perona" ], "comment": "CVPR 2015", "categories": [ "cs.CV" ], "abstract": "A video dataset that is designed to study fine-grained categorisation of pedestrians is introduced. Pedestrians were recorded \"in-the-wild\" from a moving vehicle. Annotations include bounding boxes, tracks, 14 keypoints with occlusion information and the fine-grained categories of age (5 classes), sex (2 classes), weight (3 classes) and clothing style (4 classes). There are a total of 27,454 bounding box and pose labels across 4222 tracks. This dataset is designed to train and test algorithms for fine-grained categorisation of people, it is also useful for benchmarking tracking, detection and pose estimation of pedestrians. State-of-the-art algorithms for fine-grained classification and pose estimation were tested using the dataset and the results are reported as a useful performance baseline.", "revisions": [ { "version": "v1", "updated": "2016-05-20T00:03:42.000Z" } ], "analyses": { "keywords": [ "fine-grained classification", "pedestrians", "pose estimation", "bounding box", "test algorithms" ], "note": { "typesetting": "TeX", "pages": 0, "language": "en", "license": "arXiv", "status": "editable" } } }