{ "id": "1901.01341", "version": "v1", "published": "2019-01-04T23:44:47.000Z", "updated": "2019-01-04T23:44:47.000Z", "title": "Sheaves: A Topological Approach to Big Data", "authors": [ "Linas Vepstas" ], "comment": "49 pages, 24 figures", "categories": [ "cs.LG", "cs.DS", "cs.SC" ], "abstract": "This document develops general concepts useful for extracting knowledge embedded in large graphs or datasets that have pair-wise relationships, such as cause-effect-type relations. Almost no underlying assumptions are made, other than that the data can be presented in terms of pair-wise relationships between objects/events. This assumption is used to mine for patterns in the dataset, defining a reduced graph or dataset that boils-down or concentrates information into a more compact form. The resulting extracted structure or set of patterns are manifestly symbolic in nature, as they capture and encode the graph structure of the dataset in terms of a (generative) grammar. This structure is identified as having the formal mathematical structure of a sheaf. In essence, this paper introduces the basic concepts of sheaf theory into the domain of graphical datasets.", "revisions": [ { "version": "v1", "updated": "2019-01-04T23:44:47.000Z" } ], "analyses": { "keywords": [ "big data", "topological approach", "pair-wise relationships", "assumption", "general concepts" ], "note": { "typesetting": "TeX", "pages": 49, "language": "en", "license": "arXiv", "status": "editable" } } }