# Load necessary libraries library(dplyr, warn.conflicts = FALSE) # For data manipulation library(rvest) # For web scraping library(janitor) # For cleaning data library(tidygeocoder) # For geocoding addresses library(sf) # For spatial data manipulation library(httr) # For HTTP requests library(jsonlite) # For JSON manipulation library(leaflet) # For interactive maps library(htmlwidgets) # For saving the HTML file # Get the directory path of the currently executing script file this_dir <- dirname(parent.frame(2)$ofile) # Set the working directory to the directory path obtained above setwd(this_dir) # Define the URL of the webpage containing information about spa towns in the Czech Republic url <- "https://www.mzcr.cz/seznam-lazenskych-mist-v-cr/" # Read the HTML content of the webpage html <- read_html(url) # Extract the table from the HTML content table <- html |> html_element("table") |> html_table() # Use the first row of the table as column names table <- table %>% row_to_names(1) # Clean up the data: Rename certain locations for consistency table <- table %>% mutate(Lokalita = case_when( Lokalita == "Vráž" ~ "Vráž (okres Písek)", TRUE ~ Lokalita )) # Geocode the locations in the table to obtain latitude and longitude coordinates lat_longs <- table %>% tidygeocoder::geocode(Lokalita, method = "arcgis", lat = latitude, long = longitude) # Further clean up the data: Adjust certain location names for consistency lat_longs <- lat_longs %>% mutate(Lokalita = case_when( Lokalita == "Bílina" ~ "Bílina (město)", Lokalita == "Bludov" ~ "Bludov (okres_Šumperk)", Lokalita == "Hodonín – Josefov" ~ "Josefov (okres Hodonín)", Lokalita == "Karviná – Darkov" ~ "Lázně Darkov", Lokalita == "Kostelec u Zlína" ~ "Kostelec (Zlín)", Lokalita == "Lednice" ~ "Lednice (okres Břeclav)", Lokalita == "Mšené Lázně" ~ "Mšené-lázně", Lokalita == "Teplice v Čechách" ~ "Teplice", Lokalita == "Toušeň" ~ "Lázně Toušeň", TRUE ~ Lokalita )) # Create Wikipedia and API links for each location lat_longs$wiki <- paste0("https://cs.wikipedia.org/wiki/", lat_longs$Lokalita) lat_longs$source <- paste0("https://cs.wikipedia.org/w/api.php?action=query&titles=", lat_longs$Lokalita, "&prop=pageimages&format=json&pithumbsize=300") # Function to get thumbnail link for a place from Wikipedia API get_thumbnail_link <- function(place_name) { api_url <- "https://cs.wikipedia.org/w/api.php" params <- list( action = "query", titles = place_name, prop = "pageimages", format = "json", pithumbsize = 300 ) response <- GET(api_url, query = params) content <- content(response, as = "text", encoding = "UTF-8") data <- fromJSON(content) if ("pages" %in% names(data$query) && length(data$query$pages) > 0) { page_id <- names(data$query$pages)[1] thumbnail_link <- data$query$pages[[page_id]]$thumbnail$source if (!is.null(thumbnail_link)) { return(thumbnail_link) } } # Return NA if thumbnail link is not found return(NA) } # Add a column with thumbnail links for each location lat_longs$thumbnail_link <- sapply(lat_longs$Lokalita, get_thumbnail_link) # Define content for popups in leaflet map content <- ~paste0( '
', '', Lokalita, '
', '', 'Wikipedie', '
' ) # Create an interactive leaflet map with spa locations m <- leaflet(data = lat_longs) %>% addTiles(group = "OSM") %>% addProviderTiles(providers$OpenTopoMap, group = "Topo") %>% addLayersControl(baseGroups = c("OSM", "Topo")) %>% addAwesomeMarkers(~longitude, ~latitude, popup = content, label = ~as.character(lat_longs$Lokalita)) m saveWidget(m, file = "lazne.html")