# Load the ggplot2movies and ggplot2 libraries for data visualization.
library(ggplot2movies)
library(ggplot2)

# The link to the dataset source is provided as a comment.
# Read in the dataset and remove rows with missing values.
movies <- na.omit(movies)

# Convert budget to millions for easier interpretation.
budget_millions <- movies$budget/1000000

# Determine the genre of each movie based on the count of genres.
# Assign "Mixed" if count > 1, "None" if count < 1, and the specific genre otherwise.
genre <- rep(NA, nrow(movies))
count <- rowSums(movies[, 18:24])
genre[which(count > 1)] = "Mixed"
genre[which(count < 1)] = "None"
genre[which(count == 1 & movies$Action == 1)] = "Action"
genre[which(count == 1 & movies$Animation == 1)] = "Animation"
genre[which(count == 1 & movies$Comedy == 1)] = "Comedy"
genre[which(count == 1 & movies$Drama == 1)] = "Drama"
genre[which(count == 1 & movies$Documentary == 1)] = "Documentary"
genre[which(count == 1 & movies$Romance == 1)] = "Romance"
genre[which(count == 1 & movies$Short == 1)] = "Short"
movies$genre <- as.factor(genre)

# Create a bar plot showing the count of each genre.
ggplot(movies) + geom_bar(aes(x = genre), fill = "#004C99") +
  labs(title = "Count of Genre", x = "Genre", y = "Count") + coord_flip()

# Create a histogram of movie lengths.
ggplot(movies, aes(x = length)) +
  geom_bar() + coord_cartesian(xlim = c(0, 300))

# Create a histogram of movie lengths by genre.
ggplot(data = movies, aes(x = length)) +
  geom_histogram(bins = 50) + facet_wrap(~genre) +
  ggtitle("Histogram of Length by Genre") + xlab("Length") + ylab("Count")

# Create a frequency polygon of movie budgets.
ggplot(movies, aes(x = budget_millions)) +
  geom_freqpoly(bins = 50)

# Create a boxplot showing the distribution of ratings for different genres.
ggplot(aes(x = genre, y = rating), data = movies) +
  geom_boxplot() + ggtitle("Distribution of Ratings for Different Genres")

# Load the plotly library for interactive visualizations.
library(plotly)

# Create an interactive line plot showing the mean rating over years, colored by genre.
ggplotly(
  ggplot(movies, aes(x = year, y = rating, colour = genre, group = genre)) +
  stat_summary(fun.y = "mean", geom = "smooth")
)

# Categorize movies into decades based on their release year.
decade <- rep(NA, nrow(movies))
decade[which(movies$year >= 1990)] = "90s"
decade[which(movies$year >= 1980 & movies$year < 1990)] = "80s"
decade[which(movies$year >= 1970 & movies$year < 1980)] = "70s"
decade[which(movies$year >= 1960 & movies$year < 1970)] = "60s"
decade[which(movies$year < 1960)] = "50s"
movies$decade <- as.factor(decade)

# Create a smooth line plot showing the mean rating over decades, colored by genre.
ggplot(movies, aes(x = decade, y = rating, colour = genre, group = genre)) +
  stat_summary(fun.y = "mean", geom = "smooth")

# Create a scatter plot showing the relationship between movie length and rating, colored by genre.
ggplot(data = movies, aes(x = length, y = rating, col = genre)) +
  geom_point(alpha = 0.5) + geom_smooth(method = "lm") +
  facet_wrap(~genre, ncol = 3) + coord_cartesian(xlim = c(0, 300)) +
  labs(title = "Length and Rating", x = "Length", y = "Rating")