# task_1.R
################################################################################
library("dplyr") # move this to the top of your script
library("ggplot2") # move this to the top of your script
library("readr") # move this to the top of your script
library("sf") # move this to the top of your script
<- read_delim("datasets/posmo_2022-05-01T00 00 00+02 00-2023-04-18T23 59 59+02 00.csv") |>
posmo st_as_sf(coords = c("X", "Y"), crs = 2056, remove = FALSE) |>
<- posmo |>
posmo_filter filter(as.Date(datetime) == "2023-03-23")
<- function(later, now) {
distance_by_element as.numeric(
st_distance(later, now, by_element = TRUE)
)
}
<- posmo_filter |>
posmo_filter mutate(
stepMean = rowMeans(
cbind(
distance_by_element(lag(geometry, 3), geometry),
distance_by_element(lag(geometry, 2), geometry),
distance_by_element(lag(geometry, 1), geometry),
distance_by_element(geometry, lead(geometry, 1)),
distance_by_element(geometry, lead(geometry, 2)),
distance_by_element(geometry, lead(geometry, 3))
)
)
)
# Note:
# We present here a slightly different approach as presented in the input:
# - cbind() creates a matrix with the same number of rows as the original dataframe
# - It has 6 columns, one for each Euclidean distance calculation
# - rowMeans() returns a single vector with the same number of rows as the original dataframe
# task_2.R
################################################################################
summary(posmo_filter$stepMean)
ggplot(posmo_filter, aes(stepMean)) +
geom_histogram(binwidth = 1) +
geom_vline(xintercept = mean(posmo_filter$stepMean, na.rm = TRUE))
<- posmo_filter |>
posmo_filter mutate(
static = stepMean < mean(posmo_filter$stepMean, na.rm = TRUE)
)
# task_3.R
################################################################################
|>
posmo_filter ggplot() +
geom_path(aes(X, Y), alpha = 0.5) +
geom_point(aes(X, Y, colour = static)) +
theme_minimal() +
coord_equal()
# task_4.R
################################################################################
<- posmo_filter |>
posmo_filter mutate(
segment_ID = rle_id(static)
)
<- posmo_filter |>
posmo_filter_moves filter(!static)
<- ggplot(posmo_filter_moves, aes(X, Y, color = segment_ID)) +
p1 geom_point() +
geom_path() +
coord_equal() +
theme(legend.position = "none") +
labs(subtitle = "All segments (uncleaned)")
<- posmo_filter_moves |>
p2 group_by(segment_ID) |>
mutate(duration = as.integer(difftime(max(datetime), min(datetime), "mins"))) |>
filter(duration > 5) |>
ggplot(aes(X, Y, color = segment_ID)) +
geom_point() +
geom_path() +
coord_equal() +
theme(legend.position = "none") +
labs(subtitle = "Long segments (removed segements <5 minutes)")
# task_5.R
################################################################################
<- read_delim("datasets/pedestrian.csv", ",")
pedestrians
ggplot(pedestrians, aes(E, N)) +
geom_point(data = dplyr::select(pedestrians, -TrajID), alpha = 0.1) +
geom_point(aes(color = as.factor(TrajID)), size = 2) +
geom_path(aes(color = as.factor(TrajID))) +
facet_wrap(~TrajID, labeller = label_both) +
coord_equal() +
theme_minimal() +
labs(title = "Visual comparison of the 6 trajectories", subtitle = "Each subplot highlights a trajectory") +
theme(legend.position = "none", axis.text = element_blank())
# task_6.R
################################################################################
library("SimilarityMeasures") # move this to the top of your script
# for the similarity measure functions
# all functions compare two trajectories (traj1 and traj2). Each trajectory
# must be an numeric matrix of n dimensions. Since our dataset is spatiotemporal
# we need to turn our Datetime column from POSIXct to integer:
<- pedestrians |>
pedestrians mutate(Datetime_int = as.integer(DatetimeUTC))
# Next, we make an object for each trajectory only containing the
# coordinates in the three-dimensional space and turn it into a matrix
<- pedestrians |>
traj1 filter(TrajID == 1) |>
::select(E, N, Datetime_int) |>
dplyras.matrix()
# But instead of repeating these lines 6 times, we turn them into a function.
# (this is still more repetition than necessary, use the purr::map if you know
# how!)
<- function(df, traj) {
df_to_traj |>
df filter(TrajID == traj) |>
::select(E, N, Datetime_int) |>
dplyras.matrix()
}
<- df_to_traj(pedestrians, 2)
traj2 <- df_to_traj(pedestrians, 3)
traj3 <- df_to_traj(pedestrians, 4)
traj4 <- df_to_traj(pedestrians, 5)
traj5 <- df_to_traj(pedestrians, 6)
traj6
# Then we can start comparing trajectories with each other
<- DTW(traj1, traj2)
dtw_1_2 <- DTW(traj1, traj3)
dtw_1_3
# ... and so on. Since this also leads to much code repetition, we will
# demostrate a diffferent approach:
# Instead of creating 6 objects, we can also create a single list containing 6
# elements by using "split" and "purrr::map"
library(purrr)
<- map(1:6, function(x) {
pedestrians_list df_to_traj(pedestrians, x)
})
<- map_dfr(2:6, function(x) {
comparison_df tibble(
trajID = x,
DTW = DTW(pedestrians_list[[1]], pedestrians_list[[x]]),
EditDist = EditDist(pedestrians_list[[1]], pedestrians_list[[x]]),
Frechet = Frechet(pedestrians_list[[1]], pedestrians_list[[x]]),
LCSS = LCSS(pedestrians_list[[1]], pedestrians_list[[x]], 5, 4, 4)
)
})
library(tidyr) # for pivot_longer
|>
comparison_df pivot_longer(-trajID) |>
ggplot(aes(trajID, value, fill = as.factor(trajID))) +
geom_bar(stat = "identity") +
facet_wrap(~name, scales = "free") +
theme(legend.position = "none") +
labs(x = "Comparison trajectory", y = "Value", title = "Computed similarities using different measures \nbetween trajectory 1 to all other trajectories ")
Solutions
Tip
Hover over the code and copy the content by clicking on the clipboard icon on the top right. You can now paste this into an R-Script.
The code examples for task 1 - 4 was done using a posmo dataset. If you used another dataset (Strava, Google Timeline, etc.) your code can look a bit different.