# Load the ggplot2movies and ggplot2 libraries for data visualization. library(ggplot2movies) library(ggplot2) # The link to the dataset source is provided as a comment. # Read in the dataset and remove rows with missing values. movies <- na.omit(movies) # Convert budget to millions for easier interpretation. budget_millions <- movies$budget/1000000 # Determine the genre of each movie based on the count of genres. # Assign "Mixed" if count > 1, "None" if count < 1, and the specific genre otherwise. genre <- rep(NA, nrow(movies)) count <- rowSums(movies[, 18:24]) genre[which(count > 1)] = "Mixed" genre[which(count < 1)] = "None" genre[which(count == 1 & movies$Action == 1)] = "Action" genre[which(count == 1 & movies$Animation == 1)] = "Animation" genre[which(count == 1 & movies$Comedy == 1)] = "Comedy" genre[which(count == 1 & movies$Drama == 1)] = "Drama" genre[which(count == 1 & movies$Documentary == 1)] = "Documentary" genre[which(count == 1 & movies$Romance == 1)] = "Romance" genre[which(count == 1 & movies$Short == 1)] = "Short" movies$genre <- as.factor(genre) # Create a bar plot showing the count of each genre. ggplot(movies) + geom_bar(aes(x = genre), fill = "#004C99") + labs(title = "Count of Genre", x = "Genre", y = "Count") + coord_flip() # Create a histogram of movie lengths. ggplot(movies, aes(x = length)) + geom_bar() + coord_cartesian(xlim = c(0, 300)) # Create a histogram of movie lengths by genre. ggplot(data = movies, aes(x = length)) + geom_histogram(bins = 50) + facet_wrap(~genre) + ggtitle("Histogram of Length by Genre") + xlab("Length") + ylab("Count") # Create a frequency polygon of movie budgets. ggplot(movies, aes(x = budget_millions)) + geom_freqpoly(bins = 50) # Create a boxplot showing the distribution of ratings for different genres. ggplot(aes(x = genre, y = rating), data = movies) + geom_boxplot() + ggtitle("Distribution of Ratings for Different Genres") # Load the plotly library for interactive visualizations. library(plotly) # Create an interactive line plot showing the mean rating over years, colored by genre. ggplotly( ggplot(movies, aes(x = year, y = rating, colour = genre, group = genre)) + stat_summary(fun.y = "mean", geom = "smooth") ) # Categorize movies into decades based on their release year. decade <- rep(NA, nrow(movies)) decade[which(movies$year >= 1990)] = "90s" decade[which(movies$year >= 1980 & movies$year < 1990)] = "80s" decade[which(movies$year >= 1970 & movies$year < 1980)] = "70s" decade[which(movies$year >= 1960 & movies$year < 1970)] = "60s" decade[which(movies$year < 1960)] = "50s" movies$decade <- as.factor(decade) # Create a smooth line plot showing the mean rating over decades, colored by genre. ggplot(movies, aes(x = decade, y = rating, colour = genre, group = genre)) + stat_summary(fun.y = "mean", geom = "smooth") # Create a scatter plot showing the relationship between movie length and rating, colored by genre. ggplot(data = movies, aes(x = length, y = rating, col = genre)) + geom_point(alpha = 0.5) + geom_smooth(method = "lm") + facet_wrap(~genre, ncol = 3) + coord_cartesian(xlim = c(0, 300)) + labs(title = "Length and Rating", x = "Length", y = "Rating")