2023-10-20 11:34:07 +00:00
#' DMS to Decimal
#'
#' Convert geocoordinates froom DMS format to decimal
#'
#' @param input
#'
#' @return
#' @export
#'
#' @examples
dms_to_decimal <- function ( input ) {
if ( is.na ( input ) ) {
return ( NA )
}
else {
return ( sp :: char2dms ( input , chd = " d" , chm = " m" , chs = " s" ) | >
as.numeric ( ) )
}
}
#' Convert Semantic Wiki style coordinates to DMS
#'
#' @param dataset
#' @param geocoordinates_variable
#'
#' @return
#' @export
#'
#' @examples
get_decimal_coordinates <- function ( dataset , geocoordinates_variable ) {
dataset | >
mutate (
{ { geocoordinates_variable } } : = toupper ( { { geocoordinates_variable } } ) ,
{ { geocoordinates_variable } } : = str_replace_all ( { { geocoordinates_variable } } , ' °' , " d" ) ,
{ { geocoordinates_variable } } : = str_replace_all ( { { geocoordinates_variable } } , " '" , " m" ) ,
{ { geocoordinates_variable } } : = str_replace_all ( { { geocoordinates_variable } } , ' "' , " s" ) ) | >
mutate ( geo_split = stringr :: str_split ( { { geocoordinates_variable } } , " ," ) ) | >
rowwise ( ) | >
mutate ( latitude = dms_to_decimal ( geo_split [1 ] ) ,
longitude = dms_to_decimal ( geo_split [2 ] ) ) | >
select ( - geo_split , - { { geocoordinates_variable } } )
}
2023-05-08 16:19:03 +00:00
# Note to future developers
#
# We finished working on this project early due to difficulties with the project management,
# including us not being paid for our work for over a year.
# If you are asked to continue working on this project,
# we would appreciate you reaching out to us [here](https://radicaldata.org/contact)
# so that we can fill you in on certain aspects of the code and the project.
#
# With solidarity and love,
# Radical Data
2023-05-08 15:58:56 +00:00
library ( tidyverse )
library ( tidygraph )
2023-10-20 11:34:07 +00:00
wiki <- readr :: read_csv ( " data-raw/test/result.csv" )
wiki_2 <- readr :: read_csv ( " data-raw/test/result2.csv" )
wiki_3 <- readr :: read_csv ( " data-raw/test/result3.csv" )
wiki_4 <- readr :: read_csv ( " data-raw/test/result4.csv" )
wiki_5 <- readr :: read_csv ( " data-raw/test/result5.csv" )
2023-05-08 15:58:56 +00:00
# wiki_3 <- readr::read_csv("data-raw/result_3.csv")
wiki <- wiki | > add_row ( wiki_2 )
2023-10-20 11:34:07 +00:00
wiki <- wiki | > add_row ( wiki_3 )
wiki <- wiki | > add_row ( wiki_4 )
wiki <- wiki | > add_row ( wiki_5 )
2023-05-08 15:58:56 +00:00
# wiki <- readr::read_csv("https://www.securityvision.io/wiki/index.php/Special:Ask/format%3Dcsv/limit%3D999999/link%3Dall/headers%3Dshow/searchlabel%3DCSV/class%3Dsortable-20wikitable-20smwtable/order%3Dasc/sort%3D/offset%3D0/-5B-5BCategory:Deployments-7C-7CInstitution-7C-7CDataset-7C-7CPerson-7C-7CTechnology-5D-5D/-3FCategory/-3FClients/-3FManaged-20by/-3FUsed-20by/-3FFunded-20by/-3FProvided-20by/-3FSoftware-20Deployed/-3FDatasets-20Used/-3FRelated-20Institutions/-3FIs-20Department-20Of/-3FInvolved-20Entities/mainlabel%3D/prettyprint%3Dtrue/unescape%3Dtrue")
# variable names
wiki <- wiki | >
janitor :: clean_names ( ) | >
dplyr :: rename ( name = x1 )
# adjust spelling mistake
wiki <- wiki | >
rename ( developed_by_people = developped_by_people ,
developed_by_institution = developped_by_institutions )
# add a category variable
entity_columns <- c ( " name" , " managed_by" , " used_by" , " software_deployed" ,
" datasets_used" , " related_institutions" ,
" developed_by_institution" , " developed_by_people" , " institution_type" )
wiki <- wiki | >
dplyr :: mutate ( category = stringr :: str_remove ( category , " Category:" ) ) | >
dplyr :: mutate ( dplyr :: across ( where ( is.character ) , tolower ) ) | >
dplyr :: mutate ( across ( all_of ( entity_columns ) , ~ stringr :: str_replace ( .x , " deployment," , " deployment:" ) ) )
# add variables
wiki <- wiki | >
rowid_to_column ( var = " id" ) | >
mutate ( unknown = if_else ( stringr :: str_detect ( name , " unknown" ) , TRUE , FALSE ) )
# make nodes dataset
wiki_nodes <- wiki | >
select ( id , name , category , institution_type , geolocation , city , unknown )
# make edges dataset
wiki_edges <- wiki | >
tidyr :: pivot_longer ( cols = all_of ( entity_columns ) ,
names_to = " edge_type" ,
values_to = " name" ) | >
select ( id , name , edge_type , category ) | >
separate_rows ( name , sep = " ," ) | >
left_join ( wiki_nodes | > select ( id , name ) , by = " name" ) | >
rename ( from = id.x ,
to = id.y ) | >
select ( from , to , edge_type )
wiki_edges <- wiki_edges | >
filter ( ! is.na ( from ) & ! is.na ( to ) ) | >
filter ( from != to ) | >
distinct ( )
2023-10-20 11:34:07 +00:00
load ( file = ' data/cities.rda' )
2023-05-08 15:58:56 +00:00
# nodes values
wiki_nodes <- wiki_nodes | >
mutate ( category = if_else ( category == " deployments" , " deployment" , category ) )
# add geolocation
wiki_nodes <- wiki_nodes | >
get_decimal_coordinates ( geolocation )
# if row has no geocoordinates but has city, add the geocoordinates for the city
wiki_nodes <- wiki_nodes | >
left_join ( cities , by = " city" ) | >
mutate ( latitude = if_else ( is.na ( latitude.x ) , latitude.y , latitude.x ) ,
longitude = if_else ( is.na ( longitude.x ) , longitude.y , longitude.x ) ) | >
select ( - latitude.x , - latitude.y , - longitude.x , - longitude.y )
# edges values
wiki_edges <- wiki_edges | >
dplyr :: mutate ( edge_type = case_when (
edge_type %in% c ( " developed_by_institutions" , " developed_by_people" ) ~ " developed_by" ,
TRUE ~ edge_type ) )
# bring together
wiki <- tidygraph :: tbl_graph ( wiki_nodes , wiki_edges , directed = FALSE )
# add importance measure
# nodes
wiki <- wiki | >
activate ( nodes ) | >
mutate ( connectivity = tidygraph :: centrality_betweenness ( ) ,
connectivity_normalised = 1 + ( connectivity - min ( connectivity ) ) / sd ( connectivity ) ,
size = connectivity_normalised * if_else ( category == " deployments" , 2 , 1 ) )
# add uncertainty between edges
wiki <- wiki | >
activate ( edges ) | >
mutate ( edge_importance = centrality_edge_betweenness ( ) ,
edge_importance_normalised = 1 + ( edge_importance - mean ( edge_importance ) ) / sd ( edge_importance ) ,
edge_certainty = runif ( n = n ( ) , min = 0.2 , max = 0.8 ) ,
edge_certainty_twenty = edge_certainty * 20 )
# community detection
wiki <- wiki | >
activate ( nodes ) | >
mutate ( community = as.factor ( group_infomap ( ) ) )
# add labels to edges on what they contain
# wiki |>
# activate(edges) |>
# mutate(contains_deployment = edge_is_from(121))
wiki_nodes <- wiki | >
activate ( nodes ) | >
as_tibble ( )
wiki_edges <- wiki | >
activate ( edges ) | >
as_tibble ( )
# add to package
usethis :: use_data ( wiki , overwrite = TRUE )
# see as tibble for checking
wiki_tibble <- wiki | >
as_tibble ( )
wiki_nodes <- wiki_nodes | >
mutate ( name = stringr :: str_to_title ( name ) )
# make json for 3d visualisation
json_nodes <- jsonify :: to_json ( wiki_nodes )
json_edges <- jsonify :: to_json ( wiki_edges )
json_wiki <- paste0 ( ' {"nodes": ' , json_nodes , ' , "links": ' , json_edges , " }" )
json_wiki | >
write ( " outputs/wiki.json" )
json_wiki | >
write ( " ../security-vision-3d/data/wiki.json" )