# task_1.R
################################################################################
library("dplyr") # move this to the top of your script
library("ggplot2") # move this to the top of your script
library("readr") # move this to the top of your script
library("sf") # move this to the top of your script
posmo <- read_delim("datasets/posmo_2022-05-01T00 00 00+02 00-2023-04-18T23 59 59+02 00.csv") |>
st_as_sf(coords = c("X", "Y"), crs = 2056, remove = FALSE) |>
posmo_filter <- posmo |>
filter(as.Date(datetime) == "2023-03-23")
distance_by_element <- function(later, now) {
as.numeric(
st_distance(later, now, by_element = TRUE)
)
}
posmo_filter <- posmo_filter |>
mutate(
stepMean = rowMeans(
cbind(
distance_by_element(lag(geometry, 3), geometry),
distance_by_element(lag(geometry, 2), geometry),
distance_by_element(lag(geometry, 1), geometry),
distance_by_element(geometry, lead(geometry, 1)),
distance_by_element(geometry, lead(geometry, 2)),
distance_by_element(geometry, lead(geometry, 3))
)
)
)
# Note:
# We present here a slightly different approach as presented in the input:
# - cbind() creates a matrix with the same number of rows as the original dataframe
# - It has 6 columns, one for each Euclidean distance calculation
# - rowMeans() returns a single vector with the same number of rows as the original dataframe
# task_2.R
################################################################################
summary(posmo_filter$stepMean)
ggplot(posmo_filter, aes(stepMean)) +
geom_histogram(binwidth = 1) +
geom_vline(xintercept = mean(posmo_filter$stepMean, na.rm = TRUE))
posmo_filter <- posmo_filter |>
mutate(
static = stepMean < mean(posmo_filter$stepMean, na.rm = TRUE)
)
# task_3.R
################################################################################
posmo_filter |>
ggplot() +
geom_path(aes(X, Y), alpha = 0.5) +
geom_point(aes(X, Y, colour = static)) +
theme_minimal() +
coord_equal()
# task_4.R
################################################################################
posmo_filter <- posmo_filter |>
mutate(
segment_ID = rle_id(static)
)
posmo_filter_moves <- posmo_filter |>
filter(!static)
p1 <- ggplot(posmo_filter_moves, aes(X, Y, color = segment_ID)) +
geom_point() +
geom_path() +
coord_equal() +
theme(legend.position = "none") +
labs(subtitle = "All segments (uncleaned)")
p2 <- posmo_filter_moves |>
group_by(segment_ID) |>
mutate(duration = as.integer(difftime(max(datetime), min(datetime), "mins"))) |>
filter(duration > 5) |>
ggplot(aes(X, Y, color = segment_ID)) +
geom_point() +
geom_path() +
coord_equal() +
theme(legend.position = "none") +
labs(subtitle = "Long segments (removed segements <5 minutes)")
# task_5.R
################################################################################
pedestrians <- read_delim("datasets/pedestrian.csv", ",")
ggplot(pedestrians, aes(E, N)) +
geom_point(data = dplyr::select(pedestrians, -TrajID), alpha = 0.1) +
geom_point(aes(color = as.factor(TrajID)), size = 2) +
geom_path(aes(color = as.factor(TrajID))) +
facet_wrap(~TrajID, labeller = label_both) +
coord_equal() +
theme_minimal() +
labs(title = "Visual comparison of the 6 trajectories", subtitle = "Each subplot highlights a trajectory") +
theme(legend.position = "none", axis.text = element_blank())
# task_6.R
################################################################################
library("SimilarityMeasures") # move this to the top of your script
# for the similarity measure functions
# all functions compare two trajectories (traj1 and traj2). Each trajectory
# must be an numeric matrix of n dimensions. Since our dataset is spatiotemporal
# we need to turn our Datetime column from POSIXct to integer:
pedestrians <- pedestrians |>
mutate(Datetime_int = as.integer(DatetimeUTC))
# Next, we make an object for each trajectory only containing the
# coordinates in the three-dimensional space and turn it into a matrix
traj1 <- pedestrians |>
filter(TrajID == 1) |>
dplyr::select(E, N, Datetime_int) |>
as.matrix()
# But instead of repeating these lines 6 times, we turn them into a function.
# (this is still more repetition than necessary, use the purr::map if you know
# how!)
df_to_traj <- function(df, traj) {
df |>
filter(TrajID == traj) |>
dplyr::select(E, N, Datetime_int) |>
as.matrix()
}
traj2 <- df_to_traj(pedestrians, 2)
traj3 <- df_to_traj(pedestrians, 3)
traj4 <- df_to_traj(pedestrians, 4)
traj5 <- df_to_traj(pedestrians, 5)
traj6 <- df_to_traj(pedestrians, 6)
# Then we can start comparing trajectories with each other
dtw_1_2 <- DTW(traj1, traj2)
dtw_1_3 <- DTW(traj1, traj3)
# ... and so on. Since this also leads to much code repetition, we will
# demostrate a diffferent approach:
# Instead of creating 6 objects, we can also create a single list containing 6
# elements by using "split" and "purrr::map"
library(purrr)
pedestrians_list <- map(1:6, function(x) {
df_to_traj(pedestrians, x)
})
comparison_df <- map_dfr(2:6, function(x) {
tibble(
trajID = x,
DTW = DTW(pedestrians_list[[1]], pedestrians_list[[x]]),
EditDist = EditDist(pedestrians_list[[1]], pedestrians_list[[x]]),
Frechet = Frechet(pedestrians_list[[1]], pedestrians_list[[x]]),
LCSS = LCSS(pedestrians_list[[1]], pedestrians_list[[x]], 5, 4, 4)
)
})
library(tidyr) # for pivot_longer
comparison_df |>
pivot_longer(-trajID) |>
ggplot(aes(trajID, value, fill = as.factor(trajID))) +
geom_bar(stat = "identity") +
facet_wrap(~name, scales = "free") +
theme(legend.position = "none") +
labs(x = "Comparison trajectory", y = "Value", title = "Computed similarities using different measures \nbetween trajectory 1 to all other trajectories ")Solutions
Tip
Hover over the code and copy the content by clicking on the clipboard icon on the top right. You can now paste this into an R-Script.
The code examples for task 1 - 4 was done using a posmo dataset. If you used another dataset (Strava, Google Timeline, etc.) your code can look a bit different.