diff --git a/README.md b/README.md index 3e92275..5e030be 100644 --- a/README.md +++ b/README.md @@ -4,15 +4,15 @@ This is a repository of openly available hypergraph datasets in JSON format with ## Overview of the xgi-data format The xgi-data format for hypergraph data sets is a JSON data structure with the following structure: -* "hypergraph-data": This tag accesses the attributes of the entire hypergraph dataset such as the authors or dataset name. -* "node-data": This tag accesses the nodes of the hypergraph and their associated properties as a dictionary where the keys are node IDs and the corresponding values are dictionaries. If a node doesn't have any properties, the associated dictionary is empty. - * "name": This tag accesses the node's name if there is one that is different from the ID specified in the hyperedges. +* `hypergraph-data`: This tag accesses the attributes of the entire hypergraph dataset such as the authors or dataset name. +* `node-data`: This tag accesses the nodes of the hypergraph and their associated properties as a dictionary where the keys are node IDs and the corresponding values are dictionaries. If a node doesn't have any properties, the associated dictionary is empty. + * `name`: This tag accesses the node's name if there is one that is different from the ID specified in the hyperedges. * Other tags are user-specified based on the particular attributes provided by the dataset. -* "edge-data": This tag accesses the hyperedges of the hypergraph and their associated attributes. - * "name": This tag accesses the edge's name if one is provided. - * "timestamp": This is the tag specifying the time associated with the hyperedge if it is given. All times are stored in ISO8601 standard. +* `edge-data`: This tag accesses the hyperedges of the hypergraph and their associated attributes. + * `name`: This tag accesses the edge's name if one is provided. + * `timestamp`: This is the tag specifying the time associated with the hyperedge if it is given. All times are stored in ISO8601 standard. * Other tags are user-specified based on the particular attributes provided by the dataset. -* "edge-dict": This tag accesses the edge IDs and the corresponding nodes which participate in that hyperedge. +* `edge-dict`: This tag accesses the edge IDs and the corresponding nodes which participate in that hyperedge. All IDs are strings but can be converted to other types if desired. diff --git a/get_stats.ipynb b/get_stats.ipynb index 333d1ec..d95ce99 100644 --- a/get_stats.ipynb +++ b/get_stats.ipynb @@ -108,17 +108,17 @@ " data = json.load(file)\n", "\n", "props = {\n", - " \"num-nodes\": \"{:,d}\".format(num_nodes),\n", - " \"num-edges\": \"{:,d}\".format(num_edges),\n", - " \"num-unique-edges\": \"{:,d}\".format(num_unique_edges),\n", - " \"min-edge-size\": \"{:,d}\".format(smin),\n", - " \"max-edge-size\": \"{:,d}\".format(smax),\n", - " \"mean-edge-size\": \"{:,.2f}\".format(smean),\n", - " \"min-degree\": \"{:,d}\".format(dmin),\n", - " \"max-degree\": \"{:,d}\".format(dmax),\n", - " \"mean-degree\": \"{:,.2f}\".format(dmean),\n", - " \"num-components\": \"{:,d}\".format(num_components),\n", - " \"gc-size\": \"{:,d}\".format(size_gc),\n", + " \"num-nodes\": int(num_nodes),\n", + " \"num-edges\": int(num_edges),\n", + " \"num-unique-edges\": int(num_unique_edges),\n", + " \"min-edge-size\": int(smin),\n", + " \"max-edge-size\": int(smax),\n", + " \"mean-edge-size\": round(smean, 2),\n", + " \"min-degree\": int(dmin),\n", + " \"max-degree\": int(dmax),\n", + " \"mean-degree\": round(dmean, 2),\n", + " \"num-components\": int(num_components),\n", + " \"gc-size\": int(size_gc),\n", "}\n", "data[dataset_name].update(props)\n", "\n", diff --git a/index.json b/index.json index 4b20037..e8ae15a 100644 --- a/index.json +++ b/index.json @@ -1,380 +1,380 @@ { "coauth-dblp": { "url": "https://zenodo.org/records/10155873/files/coauth-DBLP.json", - "num-nodes": "1,930,378", - "num-edges": "3,700,681", - "num-unique-edges": "2,599,087", - "min-edge-size": "1", - "max-edge-size": "280", - "mean-edge-size": "2.79", - "min-degree": "1", - "max-degree": "1,425", - "mean-degree": "5.35", - "num-components": "154,294", - "gc-size": "1,659,954" + "num-nodes": 1930378, + "num-edges": 3700681, + "num-unique-edges": 2599087, + "min-edge-size": 1, + "max-edge-size": 280, + "mean-edge-size": 2.79, + "min-degree": 1, + "max-degree": 1425, + "mean-degree": 5.35, + "num-components": 154294, + "gc-size": 1659954 }, "coauth-mag-geology": { "url": "https://zenodo.org/records/10928443/files/coauth-MAG-Geology.json", - "num-nodes": "1,261,129", - "num-edges": "1,591,166", - "num-unique-edges": "1,204,704", - "min-edge-size": "1", - "max-edge-size": "284", - "mean-edge-size": "2.80", - "min-degree": "1", - "max-degree": "1,153", - "mean-degree": "3.53", - "num-components": "52", - "gc-size": "903,973" + "num-nodes": 1261129, + "num-edges": 1591166, + "num-unique-edges": 1204704, + "min-edge-size": 1, + "max-edge-size": 284, + "mean-edge-size": 2.80, + "min-degree": 1, + "max-degree": 1153, + "mean-degree": 3.53, + "num-components": 52, + "gc-size": 903973 }, "coauth-mag-history": { "url": "https://zenodo.org/records/10928551/files/coauth-MAG-History.json", - "num-nodes": "1,034,876", - "num-edges": "1,813,147", - "num-unique-edges": "896,062", - "min-edge-size": "1", - "max-edge-size": "925", - "mean-edge-size": "1.32", - "min-degree": "1", - "max-degree": "4,348", - "mean-degree": "2.32", - "num-components": "75", - "gc-size": "242,048" + "num-nodes": 1034876, + "num-edges": 1813147, + "num-unique-edges": 896062, + "min-edge-size": 1, + "max-edge-size": 925, + "mean-edge-size": 1.32, + "min-degree": 1, + "max-degree": 4348, + "mean-degree": 2.32, + "num-components": 75, + "gc-size": 242048 }, "congress-bills": { "url": "https://zenodo.org/records/10928561/files/congress-bills.json", - "num-nodes": "1,718", - "num-edges": "282,049", - "num-unique-edges": "105,733", - "min-edge-size": "1", - "max-edge-size": "400", - "mean-edge-size": "8.66", - "min-degree": "3", - "max-degree": "9,375", - "mean-degree": "1,421.30", - "num-components": "1", - "gc-size": "1,718" + "num-nodes": 1718, + "num-edges": 282049, + "num-unique-edges": 105733, + "min-edge-size": 1, + "max-edge-size": 400, + "mean-edge-size": 8.66, + "min-degree": 3, + "max-degree": 9375, + "mean-degree": 1421.30, + "num-components": 1, + "gc-size": 1718 }, "contact-high-school": { "url": "https://zenodo.org/records/10155802/files/contact-high-school.json", - "num-nodes": "327", - "num-edges": "172,035", - "num-unique-edges": "7,818", - "min-edge-size": "2", - "max-edge-size": "5", - "mean-edge-size": "2.05", - "min-degree": "7", - "max-degree": "4,495", - "mean-degree": "1,078.65", - "num-components": "1", - "gc-size": "327" + "num-nodes": 327, + "num-edges": 172035, + "num-unique-edges": 7818, + "min-edge-size": 2, + "max-edge-size": 5, + "mean-edge-size": 2.05, + "min-degree": 7, + "max-degree": 4495, + "mean-degree": 1078.65, + "num-components": 1, + "gc-size": 327 }, "contact-primary-school": { "url": "https://zenodo.org/records/10155810/files/contact-primary-school.json", - "num-nodes": "242", - "num-edges": "106,879", - "num-unique-edges": "12,704", - "min-edge-size": "2", - "max-edge-size": "5", - "mean-edge-size": "2.1", - "min-degree": "125", - "max-degree": "2,234", - "mean-degree": "925.61", - "num-components": "1", - "gc-size": "242" + "num-nodes": 242, + "num-edges": 106879, + "num-unique-edges": 12704, + "min-edge-size": 2, + "max-edge-size": 5, + "mean-edge-size": 2.1, + "min-degree": 125, + "max-degree": 2234, + "mean-degree": 925.61, + "num-components": 1, + "gc-size": 242 }, "dawn": { "url": "https://zenodo.org/records/10155779/files/dawn.json", - "num-nodes": "2,558", - "num-edges": "2,272,433", - "num-unique-edges": "141,087", - "min-edge-size": "1", - "max-edge-size": "16", - "mean-edge-size": "1.58", - "min-degree": "1", - "max-degree": "479,242", - "mean-degree": "1406.1", - "num-components": "269", - "gc-size": "2,290" + "num-nodes": 2558, + "num-edges": 2272433, + "num-unique-edges": 141087, + "min-edge-size": 1, + "max-edge-size": 16, + "mean-edge-size": 1.58, + "min-degree": 1, + "max-degree": 479242, + "mean-degree": 1406.1, + "num-components": 269, + "gc-size": 2290 }, "diseasome": { "url": "https://zenodo.org/records/10155812/files/diseasome.json", - "num-nodes": "516", - "num-edges": "903", - "num-unique-edges": "481", - "min-edge-size": "1", - "max-edge-size": "11", - "mean-edge-size": "1.72", - "min-degree": "1", - "max-degree": "41", - "mean-degree": "3.0", - "num-components": "1", - "gc-size": "516" + "num-nodes": 516, + "num-edges": 903, + "num-unique-edges": 481, + "min-edge-size": 1, + "max-edge-size": 11, + "mean-edge-size": 1.72, + "min-degree": 1, + "max-degree": 41, + "mean-degree": 3.0, + "num-components": 1, + "gc-size": 516 }, "disgenenet": { "url": "https://zenodo.org/records/10155817/files/disgenenet.json", - "num-nodes": "12,368", - "num-edges": "2,261", - "num-unique-edges": "2,069", - "min-edge-size": "1", - "max-edge-size": "2,453", - "mean-edge-size": "50.23", - "min-degree": "1", - "max-degree": "382", - "mean-degree": "9.18", - "num-components": "1", - "gc-size": "12,368" + "num-nodes": 12368, + "num-edges": 2261, + "num-unique-edges": 2069, + "min-edge-size": 1, + "max-edge-size": 2453, + "mean-edge-size": 50.23, + "min-degree": 1, + "max-degree": 382, + "mean-degree": 9.18, + "num-components": 1, + "gc-size": 12368 }, "email-enron": { "url": "https://zenodo.org/records/10155819/files/email-enron.json", - "num-nodes": "148", - "num-edges": "10,885", - "num-unique-edges": "1,514", - "min-edge-size": "1", - "max-edge-size": "37", - "mean-edge-size": "2.47", - "min-degree": "0", - "max-degree": "1,327", - "mean-degree": "181.85", - "num-components": "6", - "gc-size": "143" + "num-nodes": 148, + "num-edges": 10885, + "num-unique-edges": 1514, + "min-edge-size": 1, + "max-edge-size": 37, + "mean-edge-size": 2.47, + "min-degree": 0, + "max-degree": 1327, + "mean-degree": 181.85, + "num-components": 6, + "gc-size": 143 }, "email-eu": { "url": "https://zenodo.org/records/10155823/files/email-eu.json", - "num-nodes": "1,005", - "num-edges": "235,263", - "num-unique-edges": "25,148", - "min-edge-size": "1", - "max-edge-size": "40", - "mean-edge-size": "2.39", - "min-degree": "1", - "max-degree": "8,664", - "mean-degree": "559.8", - "num-components": "20", - "gc-size": "986" + "num-nodes": 1005, + "num-edges": 235263, + "num-unique-edges": 25148, + "min-edge-size": 1, + "max-edge-size": 40, + "mean-edge-size": 2.39, + "min-degree": 1, + "max-degree": 8664, + "mean-degree": 559.8, + "num-components": 20, + "gc-size": 986 }, "hospital-lyon": { "url": "https://zenodo.org/records/10155825/files/hospital-lyon.json", - "num-nodes": "75", - "num-edges": "27,834", - "num-unique-edges": "1,824", - "min-edge-size": "2", - "max-edge-size": "5", - "mean-edge-size": "2.09", - "min-degree": "10", - "max-degree": "3,603", - "mean-degree": "774.68", - "num-components": "1", - "gc-size": "75" + "num-nodes": 75, + "num-edges": 27834, + "num-unique-edges": 1824, + "min-edge-size": 2, + "max-edge-size": 5, + "mean-edge-size": 2.09, + "min-degree": 10, + "max-degree": 3603, + "mean-degree": 774.68, + "num-components": 1, + "gc-size": 75 }, "hypertext-conference": { "url": "https://zenodo.org/records/10206136/files/hypertext-conference.json", - "num-nodes": "113", - "num-edges": "19,036", - "num-unique-edges": "2,434", - "min-edge-size": "2", - "max-edge-size": "6", - "mean-edge-size": "2.05", - "min-degree": "2", - "max-degree": "1,446", - "mean-degree": "345.56", - "num-components": "1", - "gc-size": "113" + "num-nodes": 113, + "num-edges": 19036, + "num-unique-edges": 2434, + "min-edge-size": 2, + "max-edge-size": 6, + "mean-edge-size": 2.05, + "min-degree": 2, + "max-degree": 1446, + "mean-degree": 345.56, + "num-components": 1, + "gc-size": 113 }, "invs13": { "url": "https://zenodo.org/records/10206151/files/InVS13.json", - "num-nodes": "92", - "num-edges": "9,644", - "num-unique-edges": "787", - "min-edge-size": "2", - "max-edge-size": "4", - "mean-edge-size": "2.01", - "min-degree": "5", - "max-degree": "1,089", - "mean-degree": "210.65", - "num-components": "1", - "gc-size": "92" + "num-nodes": 92, + "num-edges": 9644, + "num-unique-edges": 787, + "min-edge-size": 2, + "max-edge-size": 4, + "mean-edge-size": 2.01, + "min-degree": 5, + "max-degree": 1089, + "mean-degree": 210.65, + "num-components": 1, + "gc-size": 92 }, "invs15": { "url": "https://zenodo.org/records/10206154/files/InVS15.json", - "num-nodes": "232", - "num-edges": "73,822", - "num-unique-edges": "4,909", - "min-edge-size": "2", - "max-edge-size": "4", - "mean-edge-size": "2.03", - "min-degree": "0", - "max-degree": "3,192", - "mean-degree": "646.33", - "num-components": "16", - "gc-size": "217" + "num-nodes": 232, + "num-edges": 73822, + "num-unique-edges": 4909, + "min-edge-size": 2, + "max-edge-size": 4, + "mean-edge-size": 2.03, + "min-degree": 0, + "max-degree": 3192, + "mean-degree": 646.33, + "num-components": 16, + "gc-size": 217 }, "kaggle-whats-cooking": { "url": "https://zenodo.org/records/10157609/files/kaggle-whats-cooking.json", - "num-nodes": "6,714", - "num-edges": "39,774", - "num-unique-edges": "39,243", - "min-edge-size": "1", - "max-edge-size": "65", - "mean-edge-size": "10.77", - "min-degree": "1", - "max-degree": "18,048", - "mean-degree": "63.78", - "num-components": "1", - "gc-size": "6,714" + "num-nodes": 6714, + "num-edges": 39774, + "num-unique-edges": 39243, + "min-edge-size": 1, + "max-edge-size": 65, + "mean-edge-size": 10.77, + "min-degree": 1, + "max-degree": 18048, + "mean-degree": 63.78, + "num-components": 1, + "gc-size": 6714 }, "malawi-village": { "url": "https://zenodo.org/records/10206147/files/malawi-village.json", - "num-nodes": "86", - "num-edges": "99,942", - "num-unique-edges": "432", - "min-edge-size": "2", - "max-edge-size": "4", - "mean-edge-size": "2.01", - "min-degree": "12", - "max-degree": "7,636", - "mean-degree": "2,338.01", - "num-components": "2", - "gc-size": "84" + "num-nodes": 86, + "num-edges": 99942, + "num-unique-edges": 432, + "min-edge-size": 2, + "max-edge-size": 4, + "mean-edge-size": 2.01, + "min-degree": 12, + "max-degree": 7636, + "mean-degree": 2338.01, + "num-components": 2, + "gc-size": 84 }, "ndc-classes": { "url": "https://zenodo.org/records/10155772/files/ndc-classes.json", - "num-nodes": "1,161", - "num-edges": "49,726", - "num-unique-edges": "1,090", - "min-edge-size": "1", - "max-edge-size": "39", - "mean-edge-size": "3.14", - "min-degree": "1", - "max-degree": "5,358", - "mean-degree": "134.58", - "num-components": "183", - "gc-size": "628" + "num-nodes": 1161, + "num-edges": 49726, + "num-unique-edges": 1090, + "min-edge-size": 1, + "max-edge-size": 39, + "mean-edge-size": 3.14, + "min-degree": 1, + "max-degree": 5358, + "mean-degree": 134.58, + "num-components": 183, + "gc-size": 628 }, "ndc-substances": { "url": "https://zenodo.org/records/10929019/files/NDC-substances.json", - "num-nodes": "5,556", - "num-edges": "112,919", - "num-unique-edges": "10,273", - "min-edge-size": "1", - "max-edge-size": "187", - "mean-edge-size": "2.01", - "min-degree": "1", - "max-degree": "6,693", - "mean-degree": "40.95", - "num-components": "13", - "gc-size": "3,414" + "num-nodes": 5556, + "num-edges": 112919, + "num-unique-edges": 10273, + "min-edge-size": 1, + "max-edge-size": 187, + "mean-edge-size": 2.01, + "min-degree": 1, + "max-degree": 6693, + "mean-degree": 40.95, + "num-components": 13, + "gc-size": 3414 }, "science-gallery": { "url": "https://zenodo.org/records/10206142/files/science-gallery.json", - "num-nodes": "10,972", - "num-edges": "338,765", - "num-unique-edges": "52,706", - "min-edge-size": "2", - "max-edge-size": "5", - "mean-edge-size": "2.12", - "min-degree": "1", - "max-degree": "486", - "mean-degree": "65.41", - "num-components": "304", - "gc-size": "410" + "num-nodes": 10972, + "num-edges": 338765, + "num-unique-edges": 52706, + "min-edge-size": 2, + "max-edge-size": 5, + "mean-edge-size": 2.12, + "min-degree": 1, + "max-degree": 486, + "mean-degree": 65.41, + "num-components": 304, + "gc-size": 410 }, "sfhh-conference": { "url": "https://zenodo.org/records/10198859/files/sfhh-conference.json", - "num-nodes": "403", - "num-edges": "54,305", - "num-unique-edges": "10,541", - "min-edge-size": "2", - "max-edge-size": "9", - "mean-edge-size": "2.15", - "min-degree": "2", - "max-degree": "1,960", - "mean-degree": "289.42", - "num-components": "1", - "gc-size": "403" + "num-nodes": 403, + "num-edges": 54305, + "num-unique-edges": 10541, + "min-edge-size": 2, + "max-edge-size": 9, + "mean-edge-size": 2.15, + "min-degree": 2, + "max-degree": 1960, + "mean-degree": 289.42, + "num-components": 1, + "gc-size": 403 }, "tags-ask-ubuntu": { "url": "https://zenodo.org/records/10155835/files/tags-ask-ubuntu.json", - "num-nodes": "3,029", - "num-edges": "271,233", - "num-unique-edges": "147,222", - "min-edge-size": "1", - "max-edge-size": "5", - "mean-edge-size": "2.71", - "min-degree": "1", - "max-degree": "21,004", - "mean-degree": "242.42", - "num-components": "9", - "gc-size": "3021" + "num-nodes": 3029, + "num-edges": 271233, + "num-unique-edges": 147222, + "min-edge-size": 1, + "max-edge-size": 5, + "mean-edge-size": 2.71, + "min-degree": 1, + "max-degree": 21004, + "mean-degree": 242.42, + "num-components": 9, + "gc-size": 3021 }, "tags-math-sx": { "url": "https://zenodo.org/records/10155845/files/tags-math-sx.json", - "num-nodes": "1,629", - "num-edges": "822,059", - "num-unique-edges": "170,476", - "min-edge-size": "1", - "max-edge-size": "5", - "mean-edge-size": "2.19", - "min-degree": "1", - "max-degree": "71,046", - "mean-degree": "1,105.98", - "num-components": "3", - "gc-size": "1,627" + "num-nodes": 1629, + "num-edges": 822059, + "num-unique-edges": 170476, + "min-edge-size": 1, + "max-edge-size": 5, + "mean-edge-size": 2.19, + "min-degree": 1, + "max-degree": 71046, + "mean-degree": 1105.98, + "num-components": 3, + "gc-size": 1627 }, "tags-stack-overflow": { "url": "https://zenodo.org/records/10155885/files/tags-stack-overflow.json", - "num-nodes": "49,998", - "num-edges": "14,458,875", - "num-unique-edges": "5,537,637", - "min-edge-size": "1", - "max-edge-size": "5", - "mean-edge-size": "2.97", - "min-degree": "1", - "max-degree": "1,457,906", - "mean-degree": "858.45", - "num-components": "61", - "gc-size": "49,931" + "num-nodes": 49998, + "num-edges": 14458875, + "num-unique-edges": 5537637, + "min-edge-size": 1, + "max-edge-size": 5, + "mean-edge-size": 2.97, + "min-degree": 1, + "max-degree": 1457906, + "mean-degree": 858.45, + "num-components": 61, + "gc-size": 49931 }, "threads-ask-ubuntu": { "url": "https://zenodo.org/records/10373311/files/threads-ask-ubuntu.json", - "num-nodes": "125,602", - "num-edges": "192,947", - "num-unique-edges": "166,999", - "min-edge-size": "1", - "max-edge-size": "14", - "mean-edge-size": "1.8", - "min-degree": "1", - "max-degree": "2,332", - "mean-degree": "2.76", - "num-components": "39,187", - "gc-size": "82,075" + "num-nodes": 125602, + "num-edges": 192947, + "num-unique-edges": 166999, + "min-edge-size": 1, + "max-edge-size": 14, + "mean-edge-size": 1.8, + "min-degree": 1, + "max-degree": 2332, + "mean-degree": 2.76, + "num-components": 39187, + "gc-size": 82075 }, "threads-math-sx": { "url": "https://zenodo.org/records/10373324/files/threads-math-sx.json", - "num-nodes": "176,445", - "num-edges": "719,792", - "num-unique-edges": "595,749", - "min-edge-size": "1", - "max-edge-size": "21", - "mean-edge-size": "2.24", - "min-degree": "1", - "max-degree": "12,511", - "mean-degree": "9.13", - "num-components": "23,181", - "gc-size": "152,702" + "num-nodes": 176445, + "num-edges": 719792, + "num-unique-edges": 595749, + "min-edge-size": 1, + "max-edge-size": 21, + "mean-edge-size": 2.24, + "min-degree": 1, + "max-degree": 12511, + "mean-degree": 9.13, + "num-components": 23181, + "gc-size": 152702 }, "threads-stack-overflow": { "url": "https://zenodo.org/records/10373328/files/threads-stack-overflow.json", - "num-nodes": "2,675,969", - "num-edges": "11,305,356", - "num-unique-edges": "9,705,575", - "min-edge-size": "1", - "max-edge-size": "67", - "mean-edge-size": "2.26", - "min-degree": "1", - "max-degree": "36,365", - "mean-degree": "9.56", - "num-components": "364,286", - "gc-size": "2,301,086" + "num-nodes": 2675969, + "num-edges": 11305356, + "num-unique-edges": 9705575, + "min-edge-size": 1, + "max-edge-size": 67, + "mean-edge-size": 2.26, + "min-degree": 1, + "max-degree": 36365, + "mean-degree": 9.56, + "num-components": 364286, + "gc-size": 2301086 } } \ No newline at end of file