library(tidyverse) library(tidygraph) wiki <- readr::read_csv("data-raw/result.csv") wiki_2 <- readr::read_csv("data-raw/result_2.csv") # wiki_3 <- readr::read_csv("data-raw/result_3.csv") wiki <- wiki |> add_row(wiki_2) # |> add_row(wiki_3) # wiki <- readr::read_csv("https://www.securityvision.io/wiki/index.php/Special:Ask/format%3Dcsv/limit%3D999999/link%3Dall/headers%3Dshow/searchlabel%3DCSV/class%3Dsortable-20wikitable-20smwtable/order%3Dasc/sort%3D/offset%3D0/-5B-5BCategory:Deployments-7C-7CInstitution-7C-7CDataset-7C-7CPerson-7C-7CTechnology-5D-5D/-3FCategory/-3FClients/-3FManaged-20by/-3FUsed-20by/-3FFunded-20by/-3FProvided-20by/-3FSoftware-20Deployed/-3FDatasets-20Used/-3FRelated-20Institutions/-3FIs-20Department-20Of/-3FInvolved-20Entities/mainlabel%3D/prettyprint%3Dtrue/unescape%3Dtrue") # variable names wiki <- wiki |> janitor::clean_names() |> dplyr::rename(name = x1) # adjust spelling mistake wiki <- wiki |> rename(developed_by_people = developped_by_people, developed_by_institution = developped_by_institutions) # add a category variable entity_columns <- c("name", "managed_by", "used_by", "software_deployed", "datasets_used", "related_institutions", "developed_by_institution", "developed_by_people", "institution_type") wiki <- wiki |> dplyr::mutate(category = stringr::str_remove(category, "Category:")) |> dplyr::mutate(dplyr::across(where(is.character), tolower)) |> dplyr::mutate(across(all_of(entity_columns), ~ stringr::str_replace(.x, "deployment,", "deployment:"))) # add variables wiki <- wiki |> rowid_to_column(var = "id") |> mutate(unknown = if_else(stringr::str_detect(name, "unknown"), TRUE, FALSE)) # make nodes dataset wiki_nodes <- wiki |> select(id, name, category, institution_type, geolocation, city, unknown) # make edges dataset wiki_edges <- wiki |> tidyr::pivot_longer(cols = all_of(entity_columns), names_to = "edge_type", values_to = "name") |> select(id, name, edge_type, category) |> separate_rows(name, sep = ",") |> left_join(wiki_nodes |> select(id, name), by = "name") |> rename(from = id.x, to = id.y) |> select(from, to, edge_type) wiki_edges <- wiki_edges |> filter(!is.na(from) & !is.na(to)) |> filter(from != to) |> distinct() # nodes values wiki_nodes <- wiki_nodes |> mutate(category = if_else(category == "deployments", "deployment", category)) # add geolocation wiki_nodes <- wiki_nodes |> get_decimal_coordinates(geolocation) # if row has no geocoordinates but has city, add the geocoordinates for the city wiki_nodes <- wiki_nodes |> left_join(cities, by = "city") |> mutate(latitude = if_else(is.na(latitude.x), latitude.y, latitude.x), longitude = if_else(is.na(longitude.x), longitude.y, longitude.x)) |> select(-latitude.x, -latitude.y, -longitude.x, -longitude.y) # edges values wiki_edges <- wiki_edges |> dplyr::mutate(edge_type = case_when( edge_type %in% c("developed_by_institutions", "developed_by_people") ~ "developed_by", TRUE ~ edge_type)) # bring together wiki <- tidygraph::tbl_graph(wiki_nodes, wiki_edges, directed = FALSE) # add importance measure # nodes wiki <- wiki |> activate(nodes) |> mutate(connectivity = tidygraph::centrality_betweenness(), connectivity_normalised = 1 + (connectivity-min(connectivity))/sd(connectivity), size = connectivity_normalised * if_else(category == "deployments", 2, 1)) # add uncertainty between edges wiki <- wiki |> activate(edges) |> mutate(edge_importance = centrality_edge_betweenness(), edge_importance_normalised = 1 + (edge_importance-mean(edge_importance))/sd(edge_importance), edge_certainty = runif(n = n(), min = 0.2, max = 0.8), edge_certainty_twenty = edge_certainty * 20) # community detection wiki <- wiki |> activate(nodes) |> mutate(community = as.factor(group_infomap())) # add labels to edges on what they contain # wiki |> # activate(edges) |> # mutate(contains_deployment = edge_is_from(121)) wiki_nodes <- wiki |> activate(nodes) |> as_tibble() wiki_edges <- wiki |> activate(edges) |> as_tibble() # add to package usethis::use_data(wiki, overwrite = TRUE) # see as tibble for checking wiki_tibble <- wiki |> as_tibble() wiki_nodes <- wiki_nodes |> mutate(name = stringr::str_to_title(name)) # make json for 3d visualisation json_nodes <- jsonify::to_json(wiki_nodes) json_edges <- jsonify::to_json(wiki_edges) json_wiki <- paste0('{"nodes": ', json_nodes, ', "links": ', json_edges, "}") json_wiki |> write("outputs/wiki.json") json_wiki |> write("../security-vision-3d/data/wiki.json")