Solutions

Tip

Hover over the code and copy the content by clicking on the clipboard icon on the top right. You can now paste this into an R-Script.

The code examples for task 1 - 4 was done using a posmo dataset. If you used another dataset (Strava, Google Timeline, etc.) your code can look a bit different.

# task_1.R
################################################################################



library("dplyr")   # move this to the top of your script
library("ggplot2") # move this to the top of your script
library("readr")   # move this to the top of your script
library("sf")      # move this to the top of your script

posmo <- read_delim("datasets/posmo_2022-05-01T00 00 00+02 00-2023-04-18T23 59 59+02 00.csv") |>
    st_as_sf(coords = c("X", "Y"), crs = 2056, remove = FALSE) |>

posmo_filter <- posmo |>
    filter(as.Date(datetime) == "2023-03-23")

distance_by_element <- function(later, now) {
  as.numeric(
    st_distance(later, now, by_element = TRUE)
  )
}

posmo_filter <- posmo_filter |>
  mutate(
    stepMean = rowMeans(
      cbind(
        distance_by_element(lag(geometry, 3), geometry),
        distance_by_element(lag(geometry, 2), geometry),
        distance_by_element(lag(geometry, 1), geometry),
        distance_by_element(geometry, lead(geometry, 1)),
        distance_by_element(geometry, lead(geometry, 2)),
        distance_by_element(geometry, lead(geometry, 3))
      )
    )
  )

# Note:
# We present here a slightly different approach as presented in the input:
# - cbind() creates a matrix with the same number of rows as the original dataframe
# - It has 6 columns, one for each Euclidean distance calculation
# - rowMeans() returns a single vector with the same number of rows as the original dataframe




# task_2.R
################################################################################


summary(posmo_filter$stepMean)

ggplot(posmo_filter, aes(stepMean)) +
  geom_histogram(binwidth = 1) +
  geom_vline(xintercept = mean(posmo_filter$stepMean, na.rm = TRUE))

posmo_filter <- posmo_filter |>
  mutate(
    static = stepMean < mean(posmo_filter$stepMean, na.rm = TRUE)
  )




# task_3.R
################################################################################


posmo_filter |>
  ggplot() +
  geom_path(aes(X, Y), alpha = 0.5) +
  geom_point(aes(X, Y, colour = static)) +
  theme_minimal() +
  coord_equal()




# task_4.R
################################################################################


posmo_filter <- posmo_filter |>
  mutate(
    segment_ID = rle_id(static)
  )

posmo_filter_moves <- posmo_filter |>
  filter(!static)

p1 <- ggplot(posmo_filter_moves, aes(X, Y, color = segment_ID)) +
  geom_point() +
  geom_path() +
  coord_equal() +
  theme(legend.position = "none") +
  labs(subtitle = "All segments (uncleaned)")

p2 <- posmo_filter_moves |>
  group_by(segment_ID) |>
  mutate(duration = as.integer(difftime(max(datetime), min(datetime), "mins"))) |>
  filter(duration > 5) |>
  ggplot(aes(X, Y, color = segment_ID)) +
  geom_point() +
  geom_path() +
  coord_equal() +
  theme(legend.position = "none") +
  labs(subtitle = "Long segments (removed segements <5 minutes)")




# task_5.R
################################################################################


pedestrians <- read_delim("datasets/pedestrian.csv", ",")

ggplot(pedestrians, aes(E, N)) +
  geom_point(data = dplyr::select(pedestrians, -TrajID), alpha = 0.1) +
  geom_point(aes(color = as.factor(TrajID)), size = 2) +
  geom_path(aes(color = as.factor(TrajID))) +
  facet_wrap(~TrajID, labeller = label_both) +
  coord_equal() +
  theme_minimal() +
  labs(title = "Visual comparison of the 6 trajectories", subtitle = "Each subplot highlights a trajectory") +
  theme(legend.position = "none", axis.text = element_blank())




# task_6.R
################################################################################


library("SimilarityMeasures")  # move this to the top of your script
# for the similarity measure functions

# all functions compare two trajectories (traj1 and traj2). Each trajectory
# must be an numeric matrix of n dimensions. Since our dataset is spatiotemporal
# we need to turn our Datetime column from POSIXct to integer:

pedestrians <- pedestrians |>
  mutate(Datetime_int = as.integer(DatetimeUTC))

# Next, we make an object for each trajectory only containing the
# coordinates in the three-dimensional space and turn it into a matrix

traj1 <- pedestrians |>
  filter(TrajID == 1) |>
  dplyr::select(E, N, Datetime_int) |>
  as.matrix()

# But instead of repeating these lines 6 times, we turn them into a function.
# (this is still more repetition than necessary, use the purr::map if you know
# how!)

df_to_traj <- function(df, traj) {
  df |>
    filter(TrajID == traj) |>
    dplyr::select(E, N, Datetime_int) |>
    as.matrix()
}

traj2 <- df_to_traj(pedestrians, 2)
traj3 <- df_to_traj(pedestrians, 3)
traj4 <- df_to_traj(pedestrians, 4)
traj5 <- df_to_traj(pedestrians, 5)
traj6 <- df_to_traj(pedestrians, 6)

# Then we can start comparing trajectories with each other

dtw_1_2 <- DTW(traj1, traj2)
dtw_1_3 <- DTW(traj1, traj3)

# ... and so on. Since this also leads to much code repetition, we will
# demostrate a diffferent approach:

# Instead of creating 6 objects, we can also create a single list containing 6
# elements by using "split" and "purrr::map"

library(purrr)

pedestrians_list <- map(1:6, function(x) {
  df_to_traj(pedestrians, x)
})

comparison_df <- map_dfr(2:6, function(x) {
  tibble(
    trajID = x,
    DTW = DTW(pedestrians_list[[1]], pedestrians_list[[x]]),
    EditDist = EditDist(pedestrians_list[[1]], pedestrians_list[[x]]),
    Frechet = Frechet(pedestrians_list[[1]], pedestrians_list[[x]]),
    LCSS = LCSS(pedestrians_list[[1]], pedestrians_list[[x]], 5, 4, 4)
  )
})

library(tidyr) # for pivot_longer

comparison_df |>
  pivot_longer(-trajID) |>
  ggplot(aes(trajID, value, fill = as.factor(trajID))) +
  geom_bar(stat = "identity") +
  facet_wrap(~name, scales = "free") +
  theme(legend.position = "none") +
  labs(x = "Comparison trajectory", y = "Value", title = "Computed similarities using different measures \nbetween trajectory 1 to all other trajectories ")