# initialisation code copied on file GEO872-session-01-solution.rmd
# packages:
## Default repository
local({r <- getOption("repos")
r["CRAN"] <- "http://cran.r-project.org"
options(repos=r)
})
check_pkg <- function(x)
{
if (!require(x,character.only = TRUE, quietly = TRUE))
{
install.packages(x,dep=TRUE)
if(!require(x,character.only = TRUE, quietly = TRUE)) stop("Package not found")
}
}
# Call check_pkg() to install/load the required packages.
check_pkg("here")
check_pkg("sf")
check_pkg("readr")
check_pkg("ggplot2")
check_pkg("dplyr")
check_pkg("lubridate") #for timezones
check_pkg("raster")
check_pkg("leaflet")
check_pkg("maptools")
check_pkg("concaveman") # for concave hulls
# Plot
check_pkg("tidyverse")
check_pkg("hrbrthemes")
check_pkg("viridis")
check_pkg("tidyr")
check_pkg("forcats")
check_pkg("RColorBrewer")
check_pkg("extrafont")
check_pkg("ggcorrplot") #for correlation matrix
# Statistics
check_pkg("rstatix")
check_pkg("caret")
# Machine learning
check_pkg("caTools")
check_pkg("class")
check_pkg("forecast")
check_pkg("MLmetrics")
check_pkg("randomForest")
check_pkg("rpart")
#https://cran.r-project.org/web/packages/rmdwc/rmdwc.pdf
check_pkg("rmdwc")
#rmdcountAddin()
# DATA FOLDER
#On Martine laptop:
#data_folder <- "D:/Documents/Master/UZH/cours/GEO880_Computational Movement Analysis/Project/FS23_880Project_G3/data"
#On Yelu laptop:
data_folder <- "D:/GEO880/Project"
Our project aims at defining, describing and identifying movement patterns for different travel modes. Movement data has been collected over a period of approximately two months and a various range of travel modes have been recorded: train, tram, bus, car, walking, cycling, running, kick-scooter, ski, ski-lift and cable-car, boat and plane. A labeled dataset with travel modes will be extracted from the collected raw data by cleaning, filtering and segmentation.
The movement pattern for each travel mode will first be characterized by the following metrics: speed (acceleration, deceleration), sinuosity (angle, direction), and environmental factors related to the studied travel mode: proximity to railway tracks, tram lines, road network, walking path, slope, etc.
In a second step, the suitability of these characteristics for identifying travel modes will be studied. Limitation of data and methods will also be discussed. For example are the difference of the aforementioned characteristics significant between two travel modes, and does our recorded data contain enough information.
Based on the answer, a model will then be attempted to build. We will apply it to collected data to detect different travel modes. The results of the identification will finally be evaluated with the labeled dataset.
Subjects and exercises studied in class were used to build our project. Additional paper (See references) were also read.
First the trajectories data will be prepared to be fed for machine learning: data cleaning, labelizing, values extracted.
To train a machine learning to classify the segments in the correct
categories, we first need to clean and prepare our data.
Firstly, the points with be manually labelized with the correct
transport modes. Secondly, the points will be segmentized based on time
gaps (error in recording or simply break in movement (night)) Then,
trajectory derivatives such as speed, sinuosity will be calculated.
We are working with two trajectory points datasets: one from posmo,
and one from a GPS tracker. Some inital cleanings and adaptations are
required to create an homogen trajectory dataset. In both datasets, the
columns were prepared in sort to have the basic attributes:
- source (posmo or GPS)
- datetime
- tmode_manual - E, N, H - geometry
Data cleaning and basic processing
The trajectory points downloaded from posmo are recorded in the
coordinate system WGS84. The points were then converted in the Swiss
system (MN95), and the coordinates extracted. Unfortunately, posmo
platform does not give the possibility to get the recorded altitude of
the points. To have an approximation of the height, the altitudes were
extracted thanks to the Digital Height Model DHM25 provided by
swisstopo. The DHM25 gives altitude every 25m. We estimated the
precision will be enough for our project. DHM25 is available as a
raster. To save calculation time in the project, this was first
preprocessed on QGIS to be converted in the new Swiss projection system
(MN95, previously MN03).
An issue with this method, is that this extraction gives the altitude
on the ground, and not of the user. This is of poor impact for
terrestrial movements, such as walks, trains, etc. as the differences is
not more than 5 meters. As we look at ranges (above ~ 1200m) or relative
derivatives (slope, speed), this is not too much influence.
However, this is more important for plane: the altitude of the user
cannot possibly be taken this way. Therefore, no altitude value were
assigned to plane segments.
# import and convert to sf object
posmo <- read_delim(here(data_folder, "posmo_2023-07-02.csv"), ",",show_col_types = FALSE) %>% st_as_sf(coords = c("lon_x", "lat_y"), crs=4326, remove = FALSE) |> st_transform(2056)
# Remove/rename columns
posmo <- posmo %>%
rename("tmode_posmo" = "transport_mode")
to_remove <- c("place_name", "lon_x", "lat_y", "user_id")
posmo <- posmo[ , !(names(posmo) %in% to_remove)]
# Add source
posmo$source <- "posmo"
# Add column for travel mode
posmo$tmode_manual <- NA
# add East and North columns
coords <- posmo |> st_coordinates()
posmo <- posmo |>
mutate(E = coords[,1], N = coords[,2])
# Filter points outside of Switzerland
ch_ll <- c(2460666, 1069416)
ch_ur <- c(2849000, 1300750)
posmo <- posmo %>%
filter((E > ch_ll[1] & E < ch_ur[1] & N > ch_ll[2] & N < ch_ur[2]))
# Update the timezone
posmo <- posmo %>%
mutate(
datetime = datetime %>% with_tz(tzone = "Europe/Zurich")
)
# Add heights
## Extract values from rasters:
dhm25 <- raster::raster(here(data_folder, "dhm25_raster", "dhm25_2056.tif"))
rasValue <- raster::extract(dhm25, posmo)
posmo$H <- rasValue
Labelizing data
#select boats segments
condition_boats <- posmo$datetime > as.POSIXct("2023-05-14 13:55:00", tz = "Europe/Zurich") & posmo$datetime < as.POSIXct("2023-05-14 14:21:00", tz = "Europe/Zurich")
#select planes segments
condition_planes <- posmo$datetime > as.POSIXct("2023-06-17 11:12:00", tz = "Europe/Zurich") & posmo$datetime < as.POSIXct("2023-06-17 12:22:00", tz = "Europe/Zurich")
#select cable car segments
condition_cable_cars <-
posmo$datetime > as.POSIXct("2023-04-12 09:12:00", tz = "Europe/Zurich") & posmo$datetime < as.POSIXct("2023-04-12 09:18:30", tz = "Europe/Zurich") |
posmo$datetime > as.POSIXct("2023-04-13 09:02:00", tz = "Europe/Zurich") & posmo$datetime < as.POSIXct("2023-04-13 09:10:00", tz = "Europe/Zurich") |
posmo$datetime > as.POSIXct("2023-04-13 12:20:00", tz = "Europe/Zurich") & posmo$datetime < as.POSIXct("2023-04-13 12:27:30", tz = "Europe/Zurich") |
posmo$datetime > as.POSIXct("2023-04-15 09:18:10", tz = "Europe/Zurich") & posmo$datetime < as.POSIXct("2023-04-15 9:24:30", tz = "Europe/Zurich") |
posmo$datetime > as.POSIXct("2023-04-15 13:11:45", tz = "Europe/Zurich") & posmo$datetime < as.POSIXct("2023-04-15 13:20:00", tz = "Europe/Zurich")
# select ski lifts segments
condition_ski_lifts <-
posmo$datetime > as.POSIXct("2023-04-12 09:23:00", tz = "Europe/Zurich") & posmo$datetime < as.POSIXct("2023-04-12 09:30:00", tz = "Europe/Zurich") |
posmo$datetime > as.POSIXct("2023-04-12 09:33:00", tz = "Europe/Zurich") & posmo$datetime < as.POSIXct("2023-04-12 09:41:00", tz = "Europe/Zurich") |
posmo$datetime > as.POSIXct("2023-04-12 09:51:00", tz = "Europe/Zurich") & posmo$datetime < as.POSIXct("2023-04-12 9:59:00", tz = "Europe/Zurich") |
posmo$datetime > as.POSIXct("2023-04-12 10:15:00", tz = "Europe/Zurich") & posmo$datetime < as.POSIXct("2023-04-12 10:24:00", tz = "Europe/Zurich") |
posmo$datetime > as.POSIXct("2023-04-12 10:51:30", tz = "Europe/Zurich") & posmo$datetime < as.POSIXct("2023-04-12 10:57:30", tz = "Europe/Zurich") |
posmo$datetime > as.POSIXct("2023-04-12 11:00:00", tz = "Europe/Zurich") & posmo$datetime < as.POSIXct("2023-04-12 11:08:00", tz = "Europe/Zurich") |
posmo$datetime > as.POSIXct("2023-04-12 11:21:30", tz = "Europe/Zurich") & posmo$datetime < as.POSIXct("2023-04-12 11:29:00", tz = "Europe/Zurich") |
posmo$datetime > as.POSIXct("2023-04-12 11:46:00", tz = "Europe/Zurich") & posmo$datetime < as.POSIXct("2023-04-12 11:51:30", tz = "Europe/Zurich") |
posmo$datetime > as.POSIXct("2023-04-12 13:49:30", tz = "Europe/Zurich") & posmo$datetime < as.POSIXct("2023-04-12 13:54:00", tz = "Europe/Zurich") |
posmo$datetime > as.POSIXct("2023-04-12 13:57:00", tz = "Europe/Zurich") & posmo$datetime < as.POSIXct("2023-04-12 14:05:00", tz = "Europe/Zurich") |
posmo$datetime > as.POSIXct("2023-04-13 09:14:00", tz = "Europe/Zurich") & posmo$datetime < as.POSIXct("2023-04-13 09:19:30", tz = "Europe/Zurich") |
posmo$datetime > as.POSIXct("2023-04-13 09:22:30", tz = "Europe/Zurich") & posmo$datetime < as.POSIXct("2023-04-13 09:30:45", tz = "Europe/Zurich") |
posmo$datetime > as.POSIXct("2023-04-13 09:36:45", tz = "Europe/Zurich") & posmo$datetime < as.POSIXct("2023-04-13 09:44:30", tz = "Europe/Zurich") |
posmo$datetime > as.POSIXct("2023-04-13 09:58:30", tz = "Europe/Zurich") & posmo$datetime < as.POSIXct("2023-04-13 10:08:00", tz = "Europe/Zurich") |
posmo$datetime > as.POSIXct("2023-04-13 10:19:00", tz = "Europe/Zurich") & posmo$datetime < as.POSIXct("2023-04-13 10:28:30", tz = "Europe/Zurich") |
posmo$datetime > as.POSIXct("2023-04-13 10:40:30", tz = "Europe/Zurich") & posmo$datetime < as.POSIXct("2023-04-13 10:51:30", tz = "Europe/Zurich") |
posmo$datetime > as.POSIXct("2023-04-15 09:28:30", tz = "Europe/Zurich") & posmo$datetime < as.POSIXct("2023-04-15 9:33:00", tz = "Europe/Zurich") |
posmo$datetime > as.POSIXct("2023-04-15 09:36:00", tz = "Europe/Zurich") & posmo$datetime < as.POSIXct("2023-04-15 9:43:30", tz = "Europe/Zurich") |
posmo$datetime > as.POSIXct("2023-04-15 09:48:30", tz = "Europe/Zurich") & posmo$datetime < as.POSIXct("2023-04-15 9:56:30", tz = "Europe/Zurich") |
posmo$datetime > as.POSIXct("2023-04-15 10:06:00", tz = "Europe/Zurich") & posmo$datetime < as.POSIXct("2023-04-15 10:14:30", tz = "Europe/Zurich") |
posmo$datetime > as.POSIXct("2023-04-15 10:23:30", tz = "Europe/Zurich") & posmo$datetime < as.POSIXct("2023-04-15 10:28:00", tz = "Europe/Zurich") |
posmo$datetime > as.POSIXct("2023-04-15 10:30:30", tz = "Europe/Zurich") & posmo$datetime < as.POSIXct("2023-04-15 10:38:30", tz = "Europe/Zurich") |
posmo$datetime > as.POSIXct("2023-04-15 11:06:30", tz = "Europe/Zurich") & posmo$datetime < as.POSIXct("2023-04-15 11:14:30", tz = "Europe/Zurich")
# select t-bar segments
condition_t_bars <-
posmo$datetime > as.POSIXct("2023-04-12 10:28:00", tz = "Europe/Zurich") & posmo$datetime < as.POSIXct("2023-04-12 10:33:00", tz = "Europe/Zurich") |
posmo$datetime > as.POSIXct("2023-04-12 14:10:00", tz = "Europe/Zurich") & posmo$datetime < as.POSIXct("2023-04-12 14:18:00", tz = "Europe/Zurich") |
posmo$datetime > as.POSIXct("2023-04-13 10:54:30", tz = "Europe/Zurich") & posmo$datetime < as.POSIXct("2023-04-13 11:02:00", tz = "Europe/Zurich") |
posmo$datetime > as.POSIXct("2023-04-15 11:19:30", tz = "Europe/Zurich") & posmo$datetime < as.POSIXct("2023-04-15 11:26:45", tz = "Europe/Zurich")
# select ski slopes segments
condition_skis <-
posmo$datetime > as.POSIXct("2023-04-12 09:30:00", tz = "Europe/Zurich") & posmo$datetime < as.POSIXct("2023-04-12 09:33:00", tz = "Europe/Zurich") |
posmo$datetime > as.POSIXct("2023-04-12 09:41:00", tz = "Europe/Zurich") & posmo$datetime < as.POSIXct("2023-04-12 9:51:00", tz = "Europe/Zurich") |
posmo$datetime > as.POSIXct("2023-04-12 09:59:00", tz = "Europe/Zurich") & posmo$datetime < as.POSIXct("2023-04-12 10:15:00", tz = "Europe/Zurich") |
posmo$datetime > as.POSIXct("2023-04-12 10:24:00", tz = "Europe/Zurich") & posmo$datetime < as.POSIXct("2023-04-12 10:28:00", tz = "Europe/Zurich") |
posmo$datetime > as.POSIXct("2023-04-12 10:33:00", tz = "Europe/Zurich") & posmo$datetime < as.POSIXct("2023-04-12 10:51:30", tz = "Europe/Zurich") |
posmo$datetime > as.POSIXct("2023-04-12 10:57:30", tz = "Europe/Zurich") & posmo$datetime < as.POSIXct("2023-04-12 11:00:00", tz = "Europe/Zurich") |
posmo$datetime > as.POSIXct("2023-04-12 11:08:00", tz = "Europe/Zurich") & posmo$datetime < as.POSIXct("2023-04-12 11:21:30", tz = "Europe/Zurich") |
posmo$datetime > as.POSIXct("2023-04-12 11:29:00", tz = "Europe/Zurich") & posmo$datetime < as.POSIXct("2023-04-12 11:46:00", tz = "Europe/Zurich") |
posmo$datetime > as.POSIXct("2023-04-12 11:51:30", tz = "Europe/Zurich") & posmo$datetime < as.POSIXct("2023-04-12 13:49:30", tz = "Europe/Zurich") |
posmo$datetime > as.POSIXct("2023-04-12 13:54:00", tz = "Europe/Zurich") & posmo$datetime < as.POSIXct("2023-04-12 13:57:00", tz = "Europe/Zurich") |
posmo$datetime > as.POSIXct("2023-04-12 14:05:00", tz = "Europe/Zurich") & posmo$datetime < as.POSIXct("2023-04-12 14:10:00", tz = "Europe/Zurich") |
posmo$datetime > as.POSIXct("2023-04-12 14:18:00", tz = "Europe/Zurich") & posmo$datetime < as.POSIXct("2023-04-12 14:48:00", tz = "Europe/Zurich") |
posmo$datetime > as.POSIXct("2023-04-13 09:12:00", tz = "Europe/Zurich") & posmo$datetime < as.POSIXct("2023-04-13 09:14:00", tz = "Europe/Zurich") |
posmo$datetime > as.POSIXct("2023-04-13 09:19:30", tz = "Europe/Zurich") & posmo$datetime < as.POSIXct("2023-04-13 09:22:30", tz = "Europe/Zurich") |
posmo$datetime > as.POSIXct("2023-04-13 09:30:45", tz = "Europe/Zurich") & posmo$datetime < as.POSIXct("2023-04-13 09:36:45", tz = "Europe/Zurich") |
posmo$datetime > as.POSIXct("2023-04-13 09:44:30", tz = "Europe/Zurich") & posmo$datetime < as.POSIXct("2023-04-13 09:58:30", tz = "Europe/Zurich") |
posmo$datetime > as.POSIXct("2023-04-13 10:08:00", tz = "Europe/Zurich") & posmo$datetime < as.POSIXct("2023-04-13 10:17:00", tz = "Europe/Zurich") |
posmo$datetime > as.POSIXct("2023-04-13 10:28:30", tz = "Europe/Zurich") & posmo$datetime < as.POSIXct("2023-04-13 10:40:30", tz = "Europe/Zurich") |
posmo$datetime > as.POSIXct("2023-04-13 10:51:30", tz = "Europe/Zurich") & posmo$datetime < as.POSIXct("2023-04-13 10:54:30", tz = "Europe/Zurich") |
posmo$datetime > as.POSIXct("2023-04-13 11:02:00", tz = "Europe/Zurich") & posmo$datetime < as.POSIXct("2023-04-13 11:25:00", tz = "Europe/Zurich") |
posmo$datetime > as.POSIXct("2023-04-13 15:40:30", tz = "Europe/Zurich") & posmo$datetime < as.POSIXct("2023-04-13 15:59:30", tz = "Europe/Zurich") |
posmo$datetime > as.POSIXct("2023-04-15 09:24:30", tz = "Europe/Zurich") & posmo$datetime < as.POSIXct("2023-04-15 9:28:30", tz = "Europe/Zurich") |
posmo$datetime > as.POSIXct("2023-04-15 09:33:00", tz = "Europe/Zurich") & posmo$datetime < as.POSIXct("2023-04-15 9:36:00", tz = "Europe/Zurich") |
posmo$datetime > as.POSIXct("2023-04-15 09:43:30", tz = "Europe/Zurich") & posmo$datetime < as.POSIXct("2023-04-15 9:48:30", tz = "Europe/Zurich") |
posmo$datetime > as.POSIXct("2023-04-15 09:56:30", tz = "Europe/Zurich") & posmo$datetime < as.POSIXct("2023-04-15 10:06:00", tz = "Europe/Zurich") |
posmo$datetime > as.POSIXct("2023-04-15 10:14:30", tz = "Europe/Zurich") & posmo$datetime < as.POSIXct("2023-04-15 10:23:30", tz = "Europe/Zurich") |
posmo$datetime > as.POSIXct("2023-04-15 10:28:00", tz = "Europe/Zurich") & posmo$datetime < as.POSIXct("2023-04-15 10:30:30", tz = "Europe/Zurich") |
posmo$datetime > as.POSIXct("2023-04-15 10:38:30", tz = "Europe/Zurich") & posmo$datetime < as.POSIXct("2023-04-15 11:06:30", tz = "Europe/Zurich") |
posmo$datetime > as.POSIXct("2023-04-15 11:14:30", tz = "Europe/Zurich") & posmo$datetime < as.POSIXct("2023-04-15 11:19:30", tz = "Europe/Zurich") |
posmo$datetime > as.POSIXct("2023-04-15 11:26:45", tz = "Europe/Zurich") & posmo$datetime < as.POSIXct("2023-04-15 11:42:30", tz = "Europe/Zurich")
#select kick scooters segments
condition_scooters <-
posmo$datetime > as.POSIXct("2023-05-03 19:53:00", tz = "Europe/Zurich") & posmo$datetime < as.POSIXct("2023-05-03 19:56:15", tz = "Europe/Zurich") |
posmo$datetime > as.POSIXct("2023-05-03 21:47:00", tz = "Europe/Zurich") & posmo$datetime < as.POSIXct("2023-05-03 21:56:15", tz = "Europe/Zurich") |
posmo$datetime > as.POSIXct("2023-05-04 07:36:00", tz = "Europe/Zurich") & posmo$datetime < as.POSIXct("2023-05-04 07:42:00", tz = "Europe/Zurich") |
posmo$datetime > as.POSIXct("2023-05-24 22:00:00", tz = "Europe/Zurich") & posmo$datetime < as.POSIXct("2023-05-24 22:10:00", tz = "Europe/Zurich") |
posmo$datetime > as.POSIXct("2023-05-24 19:48:00", tz = "Europe/Zurich") & posmo$datetime < as.POSIXct("2023-05-24 20:00:00", tz = "Europe/Zurich")
#select bikes segments
condition_bikes <-
posmo$datetime > as.POSIXct("2023-05-17 07:43:00", tz = "Europe/Zurich") & posmo$datetime < as.POSIXct("2023-05-17 07:57:30", tz = "Europe/Zurich") |
posmo$datetime > as.POSIXct("2023-05-17 18:01:30", tz = "Europe/Zurich") & posmo$datetime < as.POSIXct("2023-05-17 18:12:30", tz = "Europe/Zurich") |
posmo$datetime > as.POSIXct("2023-05-23 07:52:00", tz = "Europe/Zurich") & posmo$datetime < as.POSIXct("2023-05-23 08:07:00", tz = "Europe/Zurich") |
posmo$datetime > as.POSIXct("2023-05-23 17:11:00", tz = "Europe/Zurich") & posmo$datetime < as.POSIXct("2023-05-23 18:14:00", tz = "Europe/Zurich") |
posmo$datetime > as.POSIXct("2023-05-24 07:59:00", tz = "Europe/Zurich") & posmo$datetime < as.POSIXct("2023-05-24 08:15:00", tz = "Europe/Zurich") |
posmo$datetime > as.POSIXct("2023-05-24 17:10:00", tz = "Europe/Zurich") & posmo$datetime < as.POSIXct("2023-05-24 17:50:00", tz = "Europe/Zurich") |
posmo$datetime > as.POSIXct("2023-05-25 07:43:20", tz = "Europe/Zurich") & posmo$datetime < as.POSIXct("2023-05-25 07:58:00", tz = "Europe/Zurich") |
posmo$datetime > as.POSIXct("2023-05-03 08:30:00", tz = "Europe/Zurich") & posmo$datetime < as.POSIXct("2023-05-03 08:51:30", tz = "Europe/Zurich") |
posmo$datetime > as.POSIXct("2023-05-03 17:39:15", tz = "Europe/Zurich") & posmo$datetime < as.POSIXct("2023-05-03 17:51:30", tz = "Europe/Zurich") |
posmo$datetime > as.POSIXct("2023-05-02 08:31:00", tz = "Europe/Zurich") & posmo$datetime < as.POSIXct("2023-05-02 08:36:00", tz = "Europe/Zurich") |
posmo$datetime > as.POSIXct("2023-05-02 11:49:00", tz = "Europe/Zurich") & posmo$datetime < as.POSIXct("2023-05-02 12:04:00", tz = "Europe/Zurich") |
posmo$datetime > as.POSIXct("2023-05-02 17:44:00", tz = "Europe/Zurich") & posmo$datetime < as.POSIXct("2023-05-02 18:04:00", tz = "Europe/Zurich") |
posmo$datetime > as.POSIXct("2023-05-15 15:57:00", tz = "Europe/Zurich") & posmo$datetime < as.POSIXct("2023-05-15 16:32:00", tz = "Europe/Zurich") |
posmo$datetime > as.POSIXct("2023-06-01 07:37:00", tz = "Europe/Zurich") & posmo$datetime < as.POSIXct("2023-06-01 07:53:00", tz = "Europe/Zurich") |
posmo$datetime > as.POSIXct("2023-06-01 18:38:00", tz = "Europe/Zurich") & posmo$datetime < as.POSIXct("2023-06-01 18:52:00", tz = "Europe/Zurich") |
posmo$datetime > as.POSIXct("2023-05-31 07:40:00", tz = "Europe/Zurich") & posmo$datetime < as.POSIXct("2023-05-31 08:00:00", tz = "Europe/Zurich") |
posmo$datetime > as.POSIXct("2023-05-31 17:32:30", tz = "Europe/Zurich") & posmo$datetime < as.POSIXct("2023-05-31 17:50:00", tz = "Europe/Zurich") |
posmo$datetime > as.POSIXct("2023-05-31 18:02:00", tz = "Europe/Zurich") & posmo$datetime < as.POSIXct("2023-05-31 18:04:00", tz = "Europe/Zurich") |
posmo$datetime > as.POSIXct("2023-05-31 18:09:00", tz = "Europe/Zurich") & posmo$datetime < as.POSIXct("2023-05-31 18:35:00", tz = "Europe/Zurich")
#select train segments
condition_trains <-
posmo$datetime > as.POSIXct("2023-05-17 19:40:30", tz = "Europe/Zurich") & posmo$datetime < as.POSIXct("2023-05-17 22:15:30", tz = "Europe/Zurich") |
posmo$datetime > as.POSIXct("2023-05-22 07:35:00", tz = "Europe/Zurich") & posmo$datetime < as.POSIXct("2023-05-22 07:57:00", tz = "Europe/Zurich") |
posmo$datetime > as.POSIXct("2023-05-28 15:00:00", tz = "Europe/Zurich") & posmo$datetime < as.POSIXct("2023-05-28 15:10:00", tz = "Europe/Zurich") |
posmo$datetime > as.POSIXct("2023-05-28 15:31:00", tz = "Europe/Zurich") & posmo$datetime < as.POSIXct("2023-05-28 15:34:00", tz = "Europe/Zurich") |
posmo$datetime > as.POSIXct("2023-05-29 22:38:30", tz = "Europe/Zurich") & posmo$datetime < as.POSIXct("2023-05-29 22:55:00", tz = "Europe/Zurich") |
posmo$datetime > as.POSIXct("2023-05-02 08:53:00", tz = "Europe/Zurich") & posmo$datetime < as.POSIXct("2023-05-02 08:59:30", tz = "Europe/Zurich") |
posmo$datetime > as.POSIXct("2023-05-05 15:44:30", tz = "Europe/Zurich") & posmo$datetime < as.POSIXct("2023-05-05 15:59:30", tz = "Europe/Zurich") |
posmo$datetime > as.POSIXct("2023-05-05 22:00:00", tz = "Europe/Zurich") & posmo$datetime < as.POSIXct("2023-05-05 22:44:30", tz = "Europe/Zurich") |
posmo$datetime > as.POSIXct("2023-05-05 23:29:00", tz = "Europe/Zurich") & posmo$datetime < as.POSIXct("2023-05-05 23:42:00", tz = "Europe/Zurich") |
posmo$datetime > as.POSIXct("2023-05-12 07:05:00", tz = "Europe/Zurich") & posmo$datetime < as.POSIXct("2023-05-12 07:40:00", tz = "Europe/Zurich") |
posmo$datetime > as.POSIXct("2023-04-28 17:49:00", tz = "Europe/Zurich") & posmo$datetime < as.POSIXct("2023-04-28 18:28:00", tz = "Europe/Zurich") |
posmo$datetime > as.POSIXct("2023-04-21 06:51:00", tz = "Europe/Zurich") & posmo$datetime < as.POSIXct("2023-04-21 06:54:00", tz = "Europe/Zurich") |
posmo$datetime > as.POSIXct("2023-04-21 07:12:00", tz = "Europe/Zurich") & posmo$datetime < as.POSIXct("2023-04-21 07:31:00", tz = "Europe/Zurich") |
posmo$datetime > as.POSIXct("2023-04-21 12:48:00", tz = "Europe/Zurich") & posmo$datetime < as.POSIXct("2023-04-21 13:13:00", tz = "Europe/Zurich") |
posmo$datetime > as.POSIXct("2023-04-21 18:24:00", tz = "Europe/Zurich") & posmo$datetime < as.POSIXct("2023-04-21 19:52:00", tz = "Europe/Zurich") |
posmo$datetime > as.POSIXct("2023-04-22 14:30:00", tz = "Europe/Zurich") & posmo$datetime < as.POSIXct("2023-04-22 14:58:00", tz = "Europe/Zurich") |
posmo$datetime > as.POSIXct("2023-05-26 18:10:00", tz = "Europe/Zurich") & posmo$datetime < as.POSIXct("2023-05-26 19:18:00", tz = "Europe/Zurich") |
posmo$datetime > as.POSIXct("2023-05-09 8:40:00", tz = "Europe/Zurich") & posmo$datetime < as.POSIXct("2023-05-09 10:55:00", tz = "Europe/Zurich") |
posmo$datetime > as.POSIXct("2023-04-28 12:20:00", tz = "Europe/Zurich") & posmo$datetime < as.POSIXct("2023-04-28 12:50:00", tz = "Europe/Zurich") |
posmo$datetime > as.POSIXct("2023-06-09 19:30:00", tz = "Europe/Zurich") & posmo$datetime < as.POSIXct("2023-06-09 21:30:00", tz = "Europe/Zurich") |
posmo$datetime > as.POSIXct("2023-06-09 12:23:00", tz = "Europe/Zurich") & posmo$datetime < as.POSIXct("2023-06-09 12:48:00", tz = "Europe/Zurich") |
posmo$datetime > as.POSIXct("2023-06-11 02:30:00", tz = "Europe/Zurich") & posmo$datetime < as.POSIXct("2023-06-11 02:45:00", tz = "Europe/Zurich") |
posmo$datetime > as.POSIXct("2023-06-12 07:00:00", tz = "Europe/Zurich") & posmo$datetime < as.POSIXct("2023-06-12 07:53:00", tz = "Europe/Zurich") |
posmo$datetime > as.POSIXct("2023-06-14 17:25:00", tz = "Europe/Zurich") & posmo$datetime < as.POSIXct("2023-06-14 17:42:00", tz = "Europe/Zurich") |
posmo$datetime > as.POSIXct("2023-06-06 09:05:00", tz = "Europe/Zurich") & posmo$datetime < as.POSIXct("2023-06-06 11:00:00", tz = "Europe/Zurich") |
posmo$datetime > as.POSIXct("2023-06-15 08:06:00", tz = "Europe/Zurich") & posmo$datetime < as.POSIXct("2023-06-15 10:02:00", tz = "Europe/Zurich") |
posmo$datetime > as.POSIXct("2023-06-16 18:48:00", tz = "Europe/Zurich") & posmo$datetime < as.POSIXct("2023-06-16 19:11:30", tz = "Europe/Zurich") |
posmo$datetime > as.POSIXct("2023-06-18 12:45:30", tz = "Europe/Zurich") & posmo$datetime < as.POSIXct("2023-06-18 16:33:30", tz = "Europe/Zurich") |
posmo$datetime > as.POSIXct("2023-06-02 07:04:00", tz = "Europe/Zurich") & posmo$datetime < as.POSIXct("2023-06-02 07:43:00", tz = "Europe/Zurich") |
posmo$datetime > as.POSIXct("2023-06-02 15:27:00", tz = "Europe/Zurich") & posmo$datetime < as.POSIXct("2023-06-02 17:53:00", tz = "Europe/Zurich") |
posmo$datetime > as.POSIXct("2023-03-31 16:23:00", tz = "Europe/Zurich") & posmo$datetime < as.POSIXct("2023-03-31 16:47:00", tz = "Europe/Zurich") |
posmo$datetime > as.POSIXct("2023-04-03 21:32:00", tz = "Europe/Zurich") & posmo$datetime < as.POSIXct("2023-04-04 00:22:00", tz = "Europe/Zurich") |
posmo$datetime > as.POSIXct("2023-04-06 17:30:00", tz = "Europe/Zurich") & posmo$datetime < as.POSIXct("2023-04-06 17:57:00", tz = "Europe/Zurich")
#select trams segments
condition_trams <-
posmo$datetime > as.POSIXct("2023-05-22 07:57:00", tz = "Europe/Zurich") & posmo$datetime < as.POSIXct("2023-05-22 08:03:00", tz = "Europe/Zurich") |
posmo$datetime > as.POSIXct("2023-05-22 10:15:00", tz = "Europe/Zurich") & posmo$datetime < as.POSIXct("2023-05-22 10:20:00", tz = "Europe/Zurich") |
posmo$datetime > as.POSIXct("2023-05-22 12:03:00", tz = "Europe/Zurich") & posmo$datetime < as.POSIXct("2023-05-22 12:08:00", tz = "Europe/Zurich") |
posmo$datetime > as.POSIXct("2023-05-26 16:18:00", tz = "Europe/Zurich") & posmo$datetime < as.POSIXct("2023-05-26 16:32:00", tz = "Europe/Zurich") |
posmo$datetime > as.POSIXct("2023-05-04 16:02:00", tz = "Europe/Zurich") & posmo$datetime < as.POSIXct("2023-05-04 16:05:00", tz = "Europe/Zurich") |
posmo$datetime > as.POSIXct("2023-05-08 18:50:00", tz = "Europe/Zurich") & posmo$datetime < as.POSIXct("2023-05-08 18:58:30", tz = "Europe/Zurich") |
posmo$datetime > as.POSIXct("2023-05-08 22:04:00", tz = "Europe/Zurich") & posmo$datetime < as.POSIXct("2023-05-08 22:10:00", tz = "Europe/Zurich") |
posmo$datetime > as.POSIXct("2023-05-09 11:10:00", tz = "Europe/Zurich") & posmo$datetime < as.POSIXct("2023-05-09 11:25:00", tz = "Europe/Zurich") |
posmo$datetime > as.POSIXct("2023-05-13 15:02:00", tz = "Europe/Zurich") & posmo$datetime < as.POSIXct("2023-05-13 15:17:00", tz = "Europe/Zurich") |
posmo$datetime > as.POSIXct("2023-05-14 13:02:00", tz = "Europe/Zurich") & posmo$datetime < as.POSIXct("2023-05-14 13:20:30", tz = "Europe/Zurich") |
posmo$datetime > as.POSIXct("2023-05-14 14:41:00", tz = "Europe/Zurich") & posmo$datetime < as.POSIXct("2023-05-14 14:52:30", tz = "Europe/Zurich") |
posmo$datetime > as.POSIXct("2023-05-01 18:52:00", tz = "Europe/Zurich") & posmo$datetime < as.POSIXct("2023-05-01 18:58:00", tz = "Europe/Zurich") |
posmo$datetime > as.POSIXct("2023-04-18 08:39:00", tz = "Europe/Zurich") & posmo$datetime < as.POSIXct("2023-04-18 08:47:00", tz = "Europe/Zurich") |
posmo$datetime > as.POSIXct("2023-04-19 08:41:00", tz = "Europe/Zurich") & posmo$datetime < as.POSIXct("2023-04-19 08:46:30", tz = "Europe/Zurich") |
posmo$datetime > as.POSIXct("2023-04-22 15:21:00", tz = "Europe/Zurich") & posmo$datetime < as.POSIXct("2023-04-22 15:40:00", tz = "Europe/Zurich") |
posmo$datetime > as.POSIXct("2023-05-16 20:01:00", tz = "Europe/Zurich") & posmo$datetime < as.POSIXct("2023-05-16 20:06:00", tz = "Europe/Zurich") |
posmo$datetime > as.POSIXct("2023-06-09 17:17:00", tz = "Europe/Zurich") & posmo$datetime < as.POSIXct("2023-06-09 17:22:00", tz = "Europe/Zurich") |
posmo$datetime > as.POSIXct("2023-06-09 13:40:00", tz = "Europe/Zurich") & posmo$datetime < as.POSIXct("2023-06-09 13:54:00", tz = "Europe/Zurich") |
posmo$datetime > as.POSIXct("2023-06-06 11:09:00", tz = "Europe/Zurich") & posmo$datetime < as.POSIXct("2023-06-06 11:23:00", tz = "Europe/Zurich") |
posmo$datetime > as.POSIXct("2023-06-05 18:49:00", tz = "Europe/Zurich") & posmo$datetime < as.POSIXct("2023-06-05 19:03:00", tz = "Europe/Zurich") |
posmo$datetime > as.POSIXct("2023-06-05 23:33:00", tz = "Europe/Zurich") & posmo$datetime < as.POSIXct("2023-06-05 23:40:00", tz = "Europe/Zurich") |
posmo$datetime > as.POSIXct("2023-04-25 15:50:00", tz = "Europe/Zurich") & posmo$datetime < as.POSIXct("2023-04-25 15:57:00", tz = "Europe/Zurich") |
posmo$datetime > as.POSIXct("2023-06-15 10:07:00", tz = "Europe/Zurich") & posmo$datetime < as.POSIXct("2023-06-15 10:19:45", tz = "Europe/Zurich") |
posmo$datetime > as.POSIXct("2023-06-15 18:36:00", tz = "Europe/Zurich") & posmo$datetime < as.POSIXct("2023-06-15 18:56:30", tz = "Europe/Zurich") |
posmo$datetime > as.POSIXct("2023-06-04 23:05:00", tz = "Europe/Zurich") & posmo$datetime < as.POSIXct("2023-06-04 23:16:00", tz = "Europe/Zurich") |
posmo$datetime > as.POSIXct("2023-04-04 16:51:00", tz = "Europe/Zurich") & posmo$datetime < as.POSIXct("2023-04-04 17:01:00", tz = "Europe/Zurich") |
posmo$datetime > as.POSIXct("2023-04-04 19:29:00", tz = "Europe/Zurich") & posmo$datetime < as.POSIXct("2023-04-04 19:41:00", tz = "Europe/Zurich") |
posmo$datetime > as.POSIXct("2023-04-05 08:45:00", tz = "Europe/Zurich") & posmo$datetime < as.POSIXct("2023-04-05 08:52:00", tz = "Europe/Zurich") |
posmo$datetime > as.POSIXct("2023-04-06 07:40:00", tz = "Europe/Zurich") & posmo$datetime < as.POSIXct("2023-04-06 07:50:00", tz = "Europe/Zurich")
#select bus segments
condition_bus <-
posmo$datetime > as.POSIXct("2023-05-17 19:29:30", tz = "Europe/Zurich") & posmo$datetime < as.POSIXct("2023-05-17 19:35:30", tz = "Europe/Zurich") |
posmo$datetime > as.POSIXct("2023-05-19 18:55:00", tz = "Europe/Zurich") & posmo$datetime < as.POSIXct("2023-05-19 19:01:30", tz = "Europe/Zurich") |
posmo$datetime > as.POSIXct("2023-05-19 23:00:00", tz = "Europe/Zurich") & posmo$datetime < as.POSIXct("2023-05-19 23:04:30", tz = "Europe/Zurich") |
posmo$datetime > as.POSIXct("2023-05-22 12:24:00", tz = "Europe/Zurich") & posmo$datetime < as.POSIXct("2023-05-22 12:33:00", tz = "Europe/Zurich") |
posmo$datetime > as.POSIXct("2023-05-22 19:11:00", tz = "Europe/Zurich") & posmo$datetime < as.POSIXct("2023-05-22 19:15:00", tz = "Europe/Zurich") |
posmo$datetime > as.POSIXct("2023-05-22 19:15:00", tz = "Europe/Zurich") & posmo$datetime < as.POSIXct("2023-05-22 19:17:00", tz = "Europe/Zurich") |
posmo$datetime > as.POSIXct("2023-05-22 21:53:00", tz = "Europe/Zurich") & posmo$datetime < as.POSIXct("2023-05-22 21:57:00", tz = "Europe/Zurich") |
posmo$datetime > as.POSIXct("2023-05-26 14:52:00", tz = "Europe/Zurich") & posmo$datetime < as.POSIXct("2023-05-26 15:07:30", tz = "Europe/Zurich") |
posmo$datetime > as.POSIXct("2023-05-26 19:22:00", tz = "Europe/Zurich") & posmo$datetime < as.POSIXct("2023-05-26 19:29:00", tz = "Europe/Zurich") |
posmo$datetime > as.POSIXct("2023-05-26 19:33:00", tz = "Europe/Zurich") & posmo$datetime < as.POSIXct("2023-05-26 19:39:20", tz = "Europe/Zurich") |
posmo$datetime > as.POSIXct("2023-05-28 14:33:00", tz = "Europe/Zurich") & posmo$datetime < as.POSIXct("2023-05-28 14:55:40", tz = "Europe/Zurich") |
posmo$datetime > as.POSIXct("2023-05-29 23:13:00", tz = "Europe/Zurich") & posmo$datetime < as.POSIXct("2023-05-29 23:17:00", tz = "Europe/Zurich") |
posmo$datetime > as.POSIXct("2023-06-10 18:28:00", tz = "Europe/Zurich") & posmo$datetime < as.POSIXct("2023-06-10 18:33:00", tz = "Europe/Zurich") |
posmo$datetime > as.POSIXct("2023-05-02 18:27:00", tz = "Europe/Zurich") & posmo$datetime < as.POSIXct("2023-05-02 18:44:30", tz = "Europe/Zurich") |
posmo$datetime > as.POSIXct("2023-05-02 23:19:00", tz = "Europe/Zurich") & posmo$datetime < as.POSIXct("2023-05-02 23:30:30", tz = "Europe/Zurich") |
posmo$datetime > as.POSIXct("2023-05-03 19:30:15", tz = "Europe/Zurich") & posmo$datetime < as.POSIXct("2023-05-03 19:53:00", tz = "Europe/Zurich") |
posmo$datetime > as.POSIXct("2023-05-03 22:28:00", tz = "Europe/Zurich") & posmo$datetime < as.POSIXct("2023-05-03 22:32:15", tz = "Europe/Zurich") |
posmo$datetime > as.POSIXct("2023-05-04 07:42:00", tz = "Europe/Zurich") & posmo$datetime < as.POSIXct("2023-05-04 07:53:15", tz = "Europe/Zurich") |
posmo$datetime > as.POSIXct("2023-05-04 16:15:00", tz = "Europe/Zurich") & posmo$datetime < as.POSIXct("2023-05-04 16:30:30", tz = "Europe/Zurich") |
posmo$datetime > as.POSIXct("2023-05-05 06:55:00", tz = "Europe/Zurich") & posmo$datetime < as.POSIXct("2023-05-05 06:53:30", tz = "Europe/Zurich") |
posmo$datetime > as.POSIXct("2023-05-05 07:45:00", tz = "Europe/Zurich") & posmo$datetime < as.POSIXct("2023-05-05 07:50:30", tz = "Europe/Zurich") |
posmo$datetime > as.POSIXct("2023-05-08 18:39:00", tz = "Europe/Zurich") & posmo$datetime < as.POSIXct("2023-05-08 18:47:00", tz = "Europe/Zurich") |
posmo$datetime > as.POSIXct("2023-05-08 22:15:00", tz = "Europe/Zurich") & posmo$datetime < as.POSIXct("2023-05-08 22:23:00", tz = "Europe/Zurich") |
posmo$datetime > as.POSIXct("2023-05-10 16:07:00", tz = "Europe/Zurich") & posmo$datetime < as.POSIXct("2023-05-10 16:12:00", tz = "Europe/Zurich") |
posmo$datetime > as.POSIXct("2023-05-10 18:20:00", tz = "Europe/Zurich") & posmo$datetime < as.POSIXct("2023-05-10 18:41:00", tz = "Europe/Zurich") |
posmo$datetime > as.POSIXct("2023-05-10 22:03:00", tz = "Europe/Zurich") & posmo$datetime < as.POSIXct("2023-05-10 22:11:00", tz = "Europe/Zurich") |
posmo$datetime > as.POSIXct("2023-05-11 13:24:00", tz = "Europe/Zurich") & posmo$datetime < as.POSIXct("2023-05-11 13:29:00", tz = "Europe/Zurich") |
posmo$datetime > as.POSIXct("2023-05-11 14:36:00", tz = "Europe/Zurich") & posmo$datetime < as.POSIXct("2023-05-11 14:53:00", tz = "Europe/Zurich") |
posmo$datetime > as.POSIXct("2023-05-12 06:56:00", tz = "Europe/Zurich") & posmo$datetime < as.POSIXct("2023-05-12 07:02:00", tz = "Europe/Zurich") |
posmo$datetime > as.POSIXct("2023-05-12 07:46:00", tz = "Europe/Zurich") & posmo$datetime < as.POSIXct("2023-05-12 07:52:00", tz = "Europe/Zurich") |
posmo$datetime > as.POSIXct("2023-05-12 18:05:00", tz = "Europe/Zurich") & posmo$datetime < as.POSIXct("2023-05-12 18:07:00", tz = "Europe/Zurich") |
posmo$datetime > as.POSIXct("2023-05-12 19:37:00", tz = "Europe/Zurich") & posmo$datetime < as.POSIXct("2023-05-12 19:40:00", tz = "Europe/Zurich") |
posmo$datetime > as.POSIXct("2023-05-12 21:23:00", tz = "Europe/Zurich") & posmo$datetime < as.POSIXct("2023-05-12 21:27:00", tz = "Europe/Zurich") |
posmo$datetime > as.POSIXct("2023-05-13 22:29:00", tz = "Europe/Zurich") & posmo$datetime < as.POSIXct("2023-05-13 22:52:00", tz = "Europe/Zurich") |
posmo$datetime > as.POSIXct("2023-05-14 12:47:00", tz = "Europe/Zurich") & posmo$datetime < as.POSIXct("2023-05-14 13:02:00", tz = "Europe/Zurich") |
posmo$datetime > as.POSIXct("2023-05-14 17:35:00", tz = "Europe/Zurich") & posmo$datetime < as.POSIXct("2023-05-14 17:38:00", tz = "Europe/Zurich") |
posmo$datetime > as.POSIXct("2023-04-20 07:36:00", tz = "Europe/Zurich") & posmo$datetime < as.POSIXct("2023-04-20 07:50:30", tz = "Europe/Zurich") |
posmo$datetime > as.POSIXct("2023-04-20 14:48:00", tz = "Europe/Zurich") & posmo$datetime < as.POSIXct("2023-04-20 15:05:00", tz = "Europe/Zurich") |
posmo$datetime > as.POSIXct("2023-04-20 18:18:00", tz = "Europe/Zurich") & posmo$datetime < as.POSIXct("2023-04-20 18:35:00", tz = "Europe/Zurich") |
posmo$datetime > as.POSIXct("2023-04-24 10:42:00", tz = "Europe/Zurich") & posmo$datetime < as.POSIXct("2023-04-24 10:49:00", tz = "Europe/Zurich") |
posmo$datetime > as.POSIXct("2023-04-24 12:40:00", tz = "Europe/Zurich") & posmo$datetime < as.POSIXct("2023-04-24 12:53:00", tz = "Europe/Zurich") |
posmo$datetime > as.POSIXct("2023-04-26 16:40:00", tz = "Europe/Zurich") & posmo$datetime < as.POSIXct("2023-04-26 17:15:00", tz = "Europe/Zurich") |
posmo$datetime > as.POSIXct("2023-04-28 13:40:00", tz = "Europe/Zurich") & posmo$datetime < as.POSIXct("2023-04-28 13:44:00", tz = "Europe/Zurich") |
posmo$datetime > as.POSIXct("2023-04-28 15:38:00", tz = "Europe/Zurich") & posmo$datetime < as.POSIXct("2023-04-28 15:42:00", tz = "Europe/Zurich") |
posmo$datetime > as.POSIXct("2023-04-28 12:06:00", tz = "Europe/Zurich") & posmo$datetime < as.POSIXct("2023-04-28 12:13:00", tz = "Europe/Zurich") |
posmo$datetime > as.POSIXct("2023-04-17 10:03:00", tz = "Europe/Zurich") & posmo$datetime < as.POSIXct("2023-04-17 10:07:00", tz = "Europe/Zurich") |
posmo$datetime > as.POSIXct("2023-04-17 16:04:00", tz = "Europe/Zurich") & posmo$datetime < as.POSIXct("2023-04-17 16:21:00", tz = "Europe/Zurich") |
posmo$datetime > as.POSIXct("2023-04-17 18:26:00", tz = "Europe/Zurich") & posmo$datetime < as.POSIXct("2023-04-17 18:44:00", tz = "Europe/Zurich") |
posmo$datetime > as.POSIXct("2023-04-18 08:32:00", tz = "Europe/Zurich") & posmo$datetime < as.POSIXct("2023-04-18 08:37:00", tz = "Europe/Zurich") |
posmo$datetime > as.POSIXct("2023-04-18 17:17:00", tz = "Europe/Zurich") & posmo$datetime < as.POSIXct("2023-04-18 17:40:00", tz = "Europe/Zurich") |
posmo$datetime > as.POSIXct("2023-04-18 19:35:00", tz = "Europe/Zurich") & posmo$datetime < as.POSIXct("2023-04-18 19:51:30", tz = "Europe/Zurich") |
posmo$datetime > as.POSIXct("2023-04-19 08:32:00", tz = "Europe/Zurich") & posmo$datetime < as.POSIXct("2023-04-19 08:39:00", tz = "Europe/Zurich") |
posmo$datetime > as.POSIXct("2023-04-19 18:34:00", tz = "Europe/Zurich") & posmo$datetime < as.POSIXct("2023-04-19 18:43:00", tz = "Europe/Zurich") |
posmo$datetime > as.POSIXct("2023-04-19 19:34:00", tz = "Europe/Zurich") & posmo$datetime < as.POSIXct("2023-04-19 19:49:00", tz = "Europe/Zurich") |
posmo$datetime > as.POSIXct("2023-04-19 22:01:00", tz = "Europe/Zurich") & posmo$datetime < as.POSIXct("2023-04-19 22:15:00", tz = "Europe/Zurich") |
posmo$datetime > as.POSIXct("2023-04-21 06:44:00", tz = "Europe/Zurich") & posmo$datetime < as.POSIXct("2023-04-21 06:48:00", tz = "Europe/Zurich") |
posmo$datetime > as.POSIXct("2023-04-21 07:35:00", tz = "Europe/Zurich") & posmo$datetime < as.POSIXct("2023-04-21 07:40:00", tz = "Europe/Zurich") |
posmo$datetime > as.POSIXct("2023-04-21 15:19:00", tz = "Europe/Zurich") & posmo$datetime < as.POSIXct("2023-04-21 15:36:00", tz = "Europe/Zurich") |
posmo$datetime > as.POSIXct("2023-04-21 16:51:00", tz = "Europe/Zurich") & posmo$datetime < as.POSIXct("2023-04-21 16:57:00", tz = "Europe/Zurich") |
posmo$datetime > as.POSIXct("2023-04-21 19:53:00", tz = "Europe/Zurich") & posmo$datetime < as.POSIXct("2023-04-21 19:59:00", tz = "Europe/Zurich") |
posmo$datetime > as.POSIXct("2023-04-22 14:20:00", tz = "Europe/Zurich") & posmo$datetime < as.POSIXct("2023-04-22 14:25:00", tz = "Europe/Zurich") |
posmo$datetime > as.POSIXct("2023-04-22 18:16:00", tz = "Europe/Zurich") & posmo$datetime < as.POSIXct("2023-04-22 18:45:00", tz = "Europe/Zurich") |
posmo$datetime > as.POSIXct("2023-05-16 20:09:00", tz = "Europe/Zurich") & posmo$datetime < as.POSIXct("2023-05-16 20:14:00", tz = "Europe/Zurich") |
posmo$datetime > as.POSIXct("2023-05-16 11:20:00", tz = "Europe/Zurich") & posmo$datetime < as.POSIXct("2023-05-16 11:31:40", tz = "Europe/Zurich") |
posmo$datetime > as.POSIXct("2023-06-09 17:25:00", tz = "Europe/Zurich") & posmo$datetime < as.POSIXct("2023-06-09 17:31:00", tz = "Europe/Zurich") |
posmo$datetime > as.POSIXct("2023-05-24 19:32:00", tz = "Europe/Zurich") & posmo$datetime < as.POSIXct("2023-05-24 19:49:00", tz = "Europe/Zurich") |
posmo$datetime > as.POSIXct("2023-05-24 22:10:00", tz = "Europe/Zurich") & posmo$datetime < as.POSIXct("2023-05-24 22:22:00", tz = "Europe/Zurich") |
posmo$datetime > as.POSIXct("2023-06-07 07:53:00", tz = "Europe/Zurich") & posmo$datetime < as.POSIXct("2023-06-07 08:12:00", tz = "Europe/Zurich") | posmo$datetime > as.POSIXct("2023-06-07 08:35:00", tz = "Europe/Zurich") & posmo$datetime < as.POSIXct("2023-06-07 08:45:00", tz = "Europe/Zurich") | posmo$datetime > as.POSIXct("2023-06-07 19:23:00", tz = "Europe/Zurich") & posmo$datetime < as.POSIXct("2023-06-07 19:37:00", tz = "Europe/Zurich") |
posmo$datetime > as.POSIXct("2023-06-06 20:39:00", tz = "Europe/Zurich") & posmo$datetime < as.POSIXct("2023-06-06 20:44:00", tz = "Europe/Zurich") |
posmo$datetime > as.POSIXct("2023-06-06 21:03:00", tz = "Europe/Zurich") & posmo$datetime < as.POSIXct("2023-06-06 21:17:00", tz = "Europe/Zurich") |
posmo$datetime > as.POSIXct("2023-06-06 08:15:00", tz = "Europe/Zurich") & posmo$datetime < as.POSIXct("2023-06-06 08:25:00", tz = "Europe/Zurich") |
posmo$datetime > as.POSIXct("2023-06-06 08:28:00", tz = "Europe/Zurich") & posmo$datetime < as.POSIXct("2023-06-06 08:35:00", tz = "Europe/Zurich") |
posmo$datetime > as.POSIXct("2023-06-05 18:37:00", tz = "Europe/Zurich") & posmo$datetime < as.POSIXct("2023-06-05 18:46:00", tz = "Europe/Zurich") |
posmo$datetime > as.POSIXct("2023-04-25 11:36:00", tz = "Europe/Zurich") & posmo$datetime < as.POSIXct("2023-04-25 11:57:00", tz = "Europe/Zurich") |
posmo$datetime > as.POSIXct("2023-04-25 16:33:00", tz = "Europe/Zurich") & posmo$datetime < as.POSIXct("2023-04-25 16:39:00", tz = "Europe/Zurich") |
posmo$datetime > as.POSIXct("2023-06-17 16:07:00", tz = "Europe/Zurich") & posmo$datetime < as.POSIXct("2023-06-17 16:19:30", tz = "Europe/Zurich") |
posmo$datetime > as.POSIXct("2023-06-18 12:12:00", tz = "Europe/Zurich") & posmo$datetime < as.POSIXct("2023-06-18 12:16:30", tz = "Europe/Zurich") |
posmo$datetime > as.POSIXct("2023-06-18 12:23:30", tz = "Europe/Zurich") & posmo$datetime < as.POSIXct("2023-06-18 12:29:30", tz = "Europe/Zurich") |
posmo$datetime > as.POSIXct("2023-06-04 23:17:00", tz = "Europe/Zurich") & posmo$datetime < as.POSIXct("2023-06-04 23:23:00", tz = "Europe/Zurich") |
posmo$datetime > as.POSIXct("2023-06-02 06:59:00", tz = "Europe/Zurich") & posmo$datetime < as.POSIXct("2023-06-02 07:04:00", tz = "Europe/Zurich") |
posmo$datetime > as.POSIXct("2023-06-02 07:45:00", tz = "Europe/Zurich") & posmo$datetime < as.POSIXct("2023-06-02 07:50:00", tz = "Europe/Zurich") |
posmo$datetime > as.POSIXct("2023-06-01 22:17:00", tz = "Europe/Zurich") & posmo$datetime < as.POSIXct("2023-06-01 22:30:00", tz = "Europe/Zurich") |
posmo$datetime > as.POSIXct("2023-05-31 21:57:00", tz = "Europe/Zurich") & posmo$datetime < as.POSIXct("2023-05-31 22:14:00", tz = "Europe/Zurich") |
posmo$datetime > as.POSIXct("2023-03-31 16:13:00", tz = "Europe/Zurich") & posmo$datetime < as.POSIXct("2023-03-31 16:20:00", tz = "Europe/Zurich") |
posmo$datetime > as.POSIXct("2023-04-04 19:45:00", tz = "Europe/Zurich") & posmo$datetime < as.POSIXct("2023-04-04 19:59:00", tz = "Europe/Zurich") |
posmo$datetime > as.POSIXct("2023-04-06 07:30:00", tz = "Europe/Zurich") & posmo$datetime < as.POSIXct("2023-04-06 07:33:00", tz = "Europe/Zurich")
#select walking segments
condition_walks <-
posmo$datetime > as.POSIXct("2023-05-18 11:54:00", tz = "Europe/Zurich") & posmo$datetime < as.POSIXct("2023-05-18 11:57:30", tz = "Europe/Zurich") |
posmo$datetime > as.POSIXct("2023-05-18 22:06:00", tz = "Europe/Zurich") & posmo$datetime < as.POSIXct("2023-05-18 22:08:30", tz = "Europe/Zurich") |
posmo$datetime > as.POSIXct("2023-05-19 18:50:00", tz = "Europe/Zurich") & posmo$datetime < as.POSIXct("2023-05-19 18:53:30", tz = "Europe/Zurich") |
posmo$datetime > as.POSIXct("2023-05-19 19:02:00", tz = "Europe/Zurich") & posmo$datetime < as.POSIXct("2023-05-19 20:01:30", tz = "Europe/Zurich") |
posmo$datetime > as.POSIXct("2023-05-19 23:04:30", tz = "Europe/Zurich") & posmo$datetime < as.POSIXct("2023-05-19 23:08:30", tz = "Europe/Zurich") |
posmo$datetime > as.POSIXct("2023-05-21 14:05:00", tz = "Europe/Zurich") & posmo$datetime < as.POSIXct("2023-05-21 18:00:00", tz = "Europe/Zurich") |
posmo$datetime > as.POSIXct("2023-05-22 09:45:00", tz = "Europe/Zurich") & posmo$datetime < as.POSIXct("2023-05-22 10:02:00", tz = "Europe/Zurich") |
posmo$datetime > as.POSIXct("2023-05-22 10:11:00", tz = "Europe/Zurich") & posmo$datetime < as.POSIXct("2023-05-22 10:15:00", tz = "Europe/Zurich") |
posmo$datetime > as.POSIXct("2023-05-22 11:56:00", tz = "Europe/Zurich") & posmo$datetime < as.POSIXct("2023-05-22 11:59:00", tz = "Europe/Zurich") |
posmo$datetime > as.POSIXct("2023-05-22 12:10:00", tz = "Europe/Zurich") & posmo$datetime < as.POSIXct("2023-05-22 12:12:00", tz = "Europe/Zurich") |
posmo$datetime > as.POSIXct("2023-05-22 12:33:00", tz = "Europe/Zurich") & posmo$datetime < as.POSIXct("2023-05-22 12:36:00", tz = "Europe/Zurich") |
posmo$datetime > as.POSIXct("2023-05-25 20:48:00", tz = "Europe/Zurich") & posmo$datetime < as.POSIXct("2023-05-25 20:59:00", tz = "Europe/Zurich") |
posmo$datetime > as.POSIXct("2023-05-26 15:07:30", tz = "Europe/Zurich") & posmo$datetime < as.POSIXct("2023-05-26 15:20:00", tz = "Europe/Zurich") |
posmo$datetime > as.POSIXct("2023-05-26 19:29:00", tz = "Europe/Zurich") & posmo$datetime < as.POSIXct("2023-05-26 19:32:00", tz = "Europe/Zurich") |
posmo$datetime > as.POSIXct("2023-05-26 19:39:20", tz = "Europe/Zurich") & posmo$datetime < as.POSIXct("2023-05-26 19:43:00", tz = "Europe/Zurich") |
posmo$datetime > as.POSIXct("2023-05-28 14:55:40", tz = "Europe/Zurich") & posmo$datetime < as.POSIXct("2023-05-28 14:59:00", tz = "Europe/Zurich") |
posmo$datetime > as.POSIXct("2023-05-28 15:34:00", tz = "Europe/Zurich") & posmo$datetime < as.POSIXct("2023-05-28 21:12:00", tz = "Europe/Zurich") |
posmo$datetime > as.POSIXct("2023-06-10 18:24:00", tz = "Europe/Zurich") & posmo$datetime < as.POSIXct("2023-06-10 18:27:00", tz = "Europe/Zurich") |
posmo$datetime > as.POSIXct("2023-06-10 18:33:00", tz = "Europe/Zurich") & posmo$datetime < as.POSIXct("2023-06-10 18:59:00", tz = "Europe/Zurich") |
posmo$datetime > as.POSIXct("2023-06-10 19:12:00", tz = "Europe/Zurich") & posmo$datetime < as.POSIXct("2023-06-10 20:06:00", tz = "Europe/Zurich") |
posmo$datetime > as.POSIXct("2023-05-05 07:50:30", tz = "Europe/Zurich") & posmo$datetime < as.POSIXct("2023-05-05 08:50:30", tz = "Europe/Zurich") |
posmo$datetime > as.POSIXct("2023-05-05 15:19:00", tz = "Europe/Zurich") & posmo$datetime < as.POSIXct("2023-05-05 15:44:30", tz = "Europe/Zurich") |
posmo$datetime > as.POSIXct("2023-05-05 18:20:30", tz = "Europe/Zurich") & posmo$datetime < as.POSIXct("2023-05-05 21:59:30", tz = "Europe/Zurich") |
posmo$datetime > as.POSIXct("2023-05-06 19:01:30", tz = "Europe/Zurich") & posmo$datetime < as.POSIXct("2023-05-06 19:40:00", tz = "Europe/Zurich") |
posmo$datetime > as.POSIXct("2023-05-07 03:06:00", tz = "Europe/Zurich") & posmo$datetime < as.POSIXct("2023-05-07 03:14:00", tz = "Europe/Zurich") |
posmo$datetime > as.POSIXct("2023-05-08 18:35:00", tz = "Europe/Zurich") & posmo$datetime < as.POSIXct("2023-05-08 18:39:00", tz = "Europe/Zurich") |
posmo$datetime > as.POSIXct("2023-05-08 18:47:00", tz = "Europe/Zurich") & posmo$datetime < as.POSIXct("2023-05-08 18:50:00", tz = "Europe/Zurich") |
posmo$datetime > as.POSIXct("2023-05-08 18:58:30", tz = "Europe/Zurich") & posmo$datetime < as.POSIXct("2023-05-08 19:10:00", tz = "Europe/Zurich") |
posmo$datetime > as.POSIXct("2023-05-08 22:23:00", tz = "Europe/Zurich") & posmo$datetime < as.POSIXct("2023-05-08 22:35:00", tz = "Europe/Zurich") |
posmo$datetime > as.POSIXct("2023-05-09 11:25:00", tz = "Europe/Zurich") & posmo$datetime < as.POSIXct("2023-05-09 11:33:30", tz = "Europe/Zurich") |
posmo$datetime > as.POSIXct("2023-05-11 07:52:00", tz = "Europe/Zurich") & posmo$datetime < as.POSIXct("2023-05-11 07:57:00", tz = "Europe/Zurich") |
posmo$datetime > as.POSIXct("2023-05-12 07:52:00", tz = "Europe/Zurich") & posmo$datetime < as.POSIXct("2023-05-12 07:56:00", tz = "Europe/Zurich") |
posmo$datetime > as.POSIXct("2023-05-12 16:40:30", tz = "Europe/Zurich") & posmo$datetime < as.POSIXct("2023-05-12 16:44:00", tz = "Europe/Zurich") |
posmo$datetime > as.POSIXct("2023-05-12 16:44:00", tz = "Europe/Zurich") & posmo$datetime < as.POSIXct("2023-05-12 16:50:00", tz = "Europe/Zurich") |
posmo$datetime > as.POSIXct("2023-05-12 17:31:00", tz = "Europe/Zurich") & posmo$datetime < as.POSIXct("2023-05-12 18:05:00", tz = "Europe/Zurich") |
posmo$datetime > as.POSIXct("2023-05-14 13:20:30", tz = "Europe/Zurich") & posmo$datetime < as.POSIXct("2023-05-14 13:40:00", tz = "Europe/Zurich") |
posmo$datetime > as.POSIXct("2023-05-14 14:36:00", tz = "Europe/Zurich") & posmo$datetime < as.POSIXct("2023-05-14 14:40:30", tz = "Europe/Zurich") |
posmo$datetime > as.POSIXct("2023-05-14 14:52:30", tz = "Europe/Zurich") & posmo$datetime < as.POSIXct("2023-05-14 16:31:00", tz = "Europe/Zurich") |
posmo$datetime > as.POSIXct("2023-04-20 07:50:30", tz = "Europe/Zurich") & posmo$datetime < as.POSIXct("2023-04-20 08:00:00", tz = "Europe/Zurich") |
posmo$datetime > as.POSIXct("2023-04-20 14:39:30", tz = "Europe/Zurich") & posmo$datetime < as.POSIXct("2023-04-20 14:46:00", tz = "Europe/Zurich") |
posmo$datetime > as.POSIXct("2023-04-23 12:22:00", tz = "Europe/Zurich") & posmo$datetime < as.POSIXct("2023-04-23 14:35:00", tz = "Europe/Zurich") |
posmo$datetime > as.POSIXct("2023-04-24 10:36:20", tz = "Europe/Zurich") & posmo$datetime < as.POSIXct("2023-04-24 10:39:00", tz = "Europe/Zurich") |
posmo$datetime > as.POSIXct("2023-04-24 11:15:00", tz = "Europe/Zurich") & posmo$datetime < as.POSIXct("2023-04-24 11:19:00", tz = "Europe/Zurich") |
posmo$datetime > as.POSIXct("2023-04-24 12:34:00", tz = "Europe/Zurich") & posmo$datetime < as.POSIXct("2023-04-24 12:37:00", tz = "Europe/Zurich") |
posmo$datetime > as.POSIXct("2023-04-24 12:53:00", tz = "Europe/Zurich") & posmo$datetime < as.POSIXct("2023-04-24 12:58:00", tz = "Europe/Zurich") |
posmo$datetime > as.POSIXct("2023-04-24 13:41:00", tz = "Europe/Zurich") & posmo$datetime < as.POSIXct("2023-04-24 14:00:00", tz = "Europe/Zurich") |
posmo$datetime > as.POSIXct("2023-04-28 12:00:00", tz = "Europe/Zurich") & posmo$datetime < as.POSIXct("2023-04-28 12:06:00", tz = "Europe/Zurich") |
posmo$datetime > as.POSIXct("2023-05-01 08:02:30", tz = "Europe/Zurich") & posmo$datetime < as.POSIXct("2023-05-01 08:14:00", tz = "Europe/Zurich") |
posmo$datetime > as.POSIXct("2023-04-18 08:47:00", tz = "Europe/Zurich") & posmo$datetime < as.POSIXct("2023-04-18 08:55:00", tz = "Europe/Zurich") |
posmo$datetime > as.POSIXct("2023-04-18 17:10:00", tz = "Europe/Zurich") & posmo$datetime < as.POSIXct("2023-04-18 17:15:00", tz = "Europe/Zurich") |
posmo$datetime > as.POSIXct("2023-04-18 17:40:00", tz = "Europe/Zurich") & posmo$datetime < as.POSIXct("2023-04-18 17:48:00", tz = "Europe/Zurich") |
posmo$datetime > as.POSIXct("2023-04-18 17:55:00", tz = "Europe/Zurich") & posmo$datetime < as.POSIXct("2023-04-18 18:05:00", tz = "Europe/Zurich") |
posmo$datetime > as.POSIXct("2023-04-19 08:47:00", tz = "Europe/Zurich") & posmo$datetime < as.POSIXct("2023-04-19 08:55:00", tz = "Europe/Zurich") |
posmo$datetime > as.POSIXct("2023-04-19 18:27:00", tz = "Europe/Zurich") & posmo$datetime < as.POSIXct("2023-04-19 18:33:30", tz = "Europe/Zurich") |
posmo$datetime > as.POSIXct("2023-04-19 19:49:00", tz = "Europe/Zurich") & posmo$datetime < as.POSIXct("2023-04-19 19:55:00", tz = "Europe/Zurich") |
posmo$datetime > as.POSIXct("2023-04-19 21:45:00", tz = "Europe/Zurich") & posmo$datetime < as.POSIXct("2023-04-19 21:59:30", tz = "Europe/Zurich") |
posmo$datetime > as.POSIXct("2023-04-21 07:40:00", tz = "Europe/Zurich") & posmo$datetime < as.POSIXct("2023-04-21 07:48:00", tz = "Europe/Zurich") |
posmo$datetime > as.POSIXct("2023-04-21 12:02:00", tz = "Europe/Zurich") & posmo$datetime < as.POSIXct("2023-04-21 12:47:00", tz = "Europe/Zurich") |
posmo$datetime > as.POSIXct("2023-04-21 13:13:30", tz = "Europe/Zurich") & posmo$datetime < as.POSIXct("2023-04-21 13:30:00", tz = "Europe/Zurich") |
posmo$datetime > as.POSIXct("2023-04-21 13:50:00", tz = "Europe/Zurich") & posmo$datetime < as.POSIXct("2023-04-21 14:00:00", tz = "Europe/Zurich") |
posmo$datetime > as.POSIXct("2023-04-21 19:59:00", tz = "Europe/Zurich") & posmo$datetime < as.POSIXct("2023-04-21 20:15:00", tz = "Europe/Zurich") |
posmo$datetime > as.POSIXct("2023-04-21 22:48:00", tz = "Europe/Zurich") & posmo$datetime < as.POSIXct("2023-04-21 22:58:00", tz = "Europe/Zurich") |
posmo$datetime > as.POSIXct("2023-04-22 18:00:00", tz = "Europe/Zurich") & posmo$datetime < as.POSIXct("2023-04-22 18:16:00", tz = "Europe/Zurich") |
posmo$datetime > as.POSIXct("2023-04-22 18:46:00", tz = "Europe/Zurich") & posmo$datetime < as.POSIXct("2023-04-22 19:00:00", tz = "Europe/Zurich") |
posmo$datetime > as.POSIXct("2023-05-16 11:31:40", tz = "Europe/Zurich") & posmo$datetime < as.POSIXct("2023-05-16 11:40:00", tz = "Europe/Zurich") |
posmo$datetime > as.POSIXct("2023-04-16 15:30:00", tz = "Europe/Zurich") & posmo$datetime < as.POSIXct("2023-04-16 15:37:00", tz = "Europe/Zurich") |
posmo$datetime > as.POSIXct("2023-04-16 15:44:00", tz = "Europe/Zurich") & posmo$datetime < as.POSIXct("2023-04-16 15:56:00", tz = "Europe/Zurich") |
posmo$datetime > as.POSIXct("2023-05-13 13:29:00", tz = "Europe/Zurich") & posmo$datetime < as.POSIXct("2023-05-13 15:01:00", tz = "Europe/Zurich") |
posmo$datetime > as.POSIXct("2023-05-13 16:40:00", tz = "Europe/Zurich") & posmo$datetime < as.POSIXct("2023-05-13 22:01:00", tz = "Europe/Zurich") |
posmo$datetime > as.POSIXct("2023-05-13 22:04:00", tz = "Europe/Zurich") & posmo$datetime < as.POSIXct("2023-05-13 22:29:00", tz = "Europe/Zurich") |
posmo$datetime > as.POSIXct("2023-05-20 11:48:00", tz = "Europe/Zurich") & posmo$datetime < as.POSIXct("2023-05-20 22:35:00", tz = "Europe/Zurich") |
posmo$datetime > as.POSIXct("2023-05-22 10:20:00", tz = "Europe/Zurich") & posmo$datetime < as.POSIXct("2023-05-22 11:42:00", tz = "Europe/Zurich") |
posmo$datetime > as.POSIXct("2023-05-29 16:18:00", tz = "Europe/Zurich") & posmo$datetime < as.POSIXct("2023-05-29 16:46:00", tz = "Europe/Zurich") |
posmo$datetime > as.POSIXct("2023-06-09 11:30:00", tz = "Europe/Zurich") & posmo$datetime < as.POSIXct("2023-06-09 12:20:00", tz = "Europe/Zurich") |
posmo$datetime > as.POSIXct("2023-06-11 02:46:00", tz = "Europe/Zurich") & posmo$datetime < as.POSIXct("2023-06-11 03:04:00", tz = "Europe/Zurich") |
posmo$datetime > as.POSIXct("2023-06-12 07:53:00", tz = "Europe/Zurich") & posmo$datetime < as.POSIXct("2023-06-12 08:07:00", tz = "Europe/Zurich") |
posmo$datetime > as.POSIXct("2023-06-06 08:11:00", tz = "Europe/Zurich") & posmo$datetime < as.POSIXct("2023-06-06 08:15:00", tz = "Europe/Zurich") |
posmo$datetime > as.POSIXct("2023-06-05 23:40:00", tz = "Europe/Zurich") & posmo$datetime < as.POSIXct("2023-06-05 23:57:00", tz = "Europe/Zurich") |
posmo$datetime > as.POSIXct("2023-04-12 09:00:00", tz = "Europe/Zurich") & posmo$datetime < as.POSIXct("2023-04-12 09:12:00", tz = "Europe/Zurich") |
posmo$datetime > as.POSIXct("2023-04-12 18:40:00", tz = "Europe/Zurich") & posmo$datetime < as.POSIXct("2023-04-12 18:50:00", tz = "Europe/Zurich") |
posmo$datetime > as.POSIXct("2023-04-25 11:57:00", tz = "Europe/Zurich") & posmo$datetime < as.POSIXct("2023-04-25 12:05:00", tz = "Europe/Zurich") |
posmo$datetime > as.POSIXct("2023-04-25 15:40:00", tz = "Europe/Zurich") & posmo$datetime < as.POSIXct("2023-04-25 15:50:00", tz = "Europe/Zurich") |
posmo$datetime > as.POSIXct("2023-04-13 08:43:00", tz = "Europe/Zurich") & posmo$datetime < as.POSIXct("2023-04-13 08:46:00", tz = "Europe/Zurich") |
posmo$datetime > as.POSIXct("2023-04-13 18:05:30", tz = "Europe/Zurich") & posmo$datetime < as.POSIXct("2023-04-13 18:15:30", tz = "Europe/Zurich") |
posmo$datetime > as.POSIXct("2023-04-15 13:20:00", tz = "Europe/Zurich") & posmo$datetime < as.POSIXct("2023-04-15 13:25:00", tz = "Europe/Zurich") |
posmo$datetime > as.POSIXct("2023-06-15 10:19:45", tz = "Europe/Zurich") & posmo$datetime < as.POSIXct("2023-06-15 10:24:00", tz = "Europe/Zurich") |
posmo$datetime > as.POSIXct("2023-06-15 18:56:30", tz = "Europe/Zurich") & posmo$datetime < as.POSIXct("2023-06-15 19:15:00", tz = "Europe/Zurich") |
posmo$datetime > as.POSIXct("2023-06-16 18:29:00", tz = "Europe/Zurich") & posmo$datetime < as.POSIXct("2023-06-16 18:45:30", tz = "Europe/Zurich") |
posmo$datetime > as.POSIXct("2023-06-16 21:41:30", tz = "Europe/Zurich") & posmo$datetime < as.POSIXct("2023-06-16 22:23:30", tz = "Europe/Zurich") |
posmo$datetime > as.POSIXct("2023-06-17 15:59:00", tz = "Europe/Zurich") & posmo$datetime < as.POSIXct("2023-06-17 16:04:30", tz = "Europe/Zurich") |
posmo$datetime > as.POSIXct("2023-06-17 16:20:00", tz = "Europe/Zurich") & posmo$datetime < as.POSIXct("2023-06-17 16:28:30", tz = "Europe/Zurich") |
posmo$datetime > as.POSIXct("2023-06-17 22:31:00", tz = "Europe/Zurich") & posmo$datetime < as.POSIXct("2023-06-17 22:45:30", tz = "Europe/Zurich") |
posmo$datetime > as.POSIXct("2023-06-18 12:16:30", tz = "Europe/Zurich") & posmo$datetime < as.POSIXct("2023-06-18 12:19:30", tz = "Europe/Zurich") |
posmo$datetime > as.POSIXct("2023-06-18 12:29:30", tz = "Europe/Zurich") & posmo$datetime < as.POSIXct("2023-06-18 12:33:30", tz = "Europe/Zurich") |
posmo$datetime > as.POSIXct("2023-06-04 17:59:00", tz = "Europe/Zurich") & posmo$datetime < as.POSIXct("2023-06-04 18:12:00", tz = "Europe/Zurich") |
posmo$datetime > as.POSIXct("2023-06-04 22:50:00", tz = "Europe/Zurich") & posmo$datetime < as.POSIXct("2023-06-04 23:05:00", tz = "Europe/Zurich") |
posmo$datetime > as.POSIXct("2023-06-02 15:04:00", tz = "Europe/Zurich") & posmo$datetime < as.POSIXct("2023-06-02 15:23:00", tz = "Europe/Zurich") |
posmo$datetime > as.POSIXct("2023-05-31 17:50:00", tz = "Europe/Zurich") & posmo$datetime < as.POSIXct("2023-05-31 18:02:00", tz = "Europe/Zurich") |
posmo$datetime > as.POSIXct("2023-05-31 18:04:00", tz = "Europe/Zurich") & posmo$datetime < as.POSIXct("2023-05-31 18:09:00", tz = "Europe/Zurich") |
posmo$datetime > as.POSIXct("2023-05-31 19:47:00", tz = "Europe/Zurich") & posmo$datetime < as.POSIXct("2023-05-31 20:00:00", tz = "Europe/Zurich") |
posmo$datetime > as.POSIXct("2023-05-31 21:45:00", tz = "Europe/Zurich") & posmo$datetime < as.POSIXct("2023-05-31 21:55:00", tz = "Europe/Zurich") |
posmo$datetime > as.POSIXct("2023-04-10 18:29:00", tz = "Europe/Zurich") & posmo$datetime < as.POSIXct("2023-04-10 18:45:00", tz = "Europe/Zurich") |
posmo$datetime > as.POSIXct("2023-04-10 22:10:00", tz = "Europe/Zurich") & posmo$datetime < as.POSIXct("2023-04-10 22:18:00", tz = "Europe/Zurich") |
posmo$datetime > as.POSIXct("2023-04-11 11:51:30", tz = "Europe/Zurich") & posmo$datetime < as.POSIXct("2023-04-11 11:56:00", tz = "Europe/Zurich") |
posmo$datetime > as.POSIXct("2023-04-11 15:28:00", tz = "Europe/Zurich") & posmo$datetime < as.POSIXct("2023-04-11 15:36:00", tz = "Europe/Zurich")
#select car segments
condition_cars <-
posmo$datetime > as.POSIXct("2023-05-17 22:23:00", tz = "Europe/Zurich") & posmo$datetime < as.POSIXct("2023-05-17 22:38:30", tz = "Europe/Zurich") |
posmo$datetime > as.POSIXct("2023-05-19 23:08:30", tz = "Europe/Zurich") & posmo$datetime < as.POSIXct("2023-05-19 23:41:30", tz = "Europe/Zurich") |
posmo$datetime > as.POSIXct("2023-05-20 11:18:30", tz = "Europe/Zurich") & posmo$datetime < as.POSIXct("2023-05-20 11:48:00", tz = "Europe/Zurich") |
posmo$datetime > as.POSIXct("2023-05-20 22:35:00", tz = "Europe/Zurich") & posmo$datetime < as.POSIXct("2023-05-20 22:51:00", tz = "Europe/Zurich") |
posmo$datetime > as.POSIXct("2023-05-21 13:00:00", tz = "Europe/Zurich") & posmo$datetime < as.POSIXct("2023-05-21 13:10:00", tz = "Europe/Zurich") |
posmo$datetime > as.POSIXct("2023-05-21 13:15:00", tz = "Europe/Zurich") & posmo$datetime < as.POSIXct("2023-05-21 14:05:00", tz = "Europe/Zurich") |
posmo$datetime > as.POSIXct("2023-05-21 18:01:00", tz = "Europe/Zurich") & posmo$datetime < as.POSIXct("2023-05-21 18:20:00", tz = "Europe/Zurich") |
posmo$datetime > as.POSIXct("2023-05-21 20:01:00", tz = "Europe/Zurich") & posmo$datetime < as.POSIXct("2023-05-21 20:37:00", tz = "Europe/Zurich") |
posmo$datetime > as.POSIXct("2023-05-22 07:25:00", tz = "Europe/Zurich") & posmo$datetime < as.POSIXct("2023-05-22 07:27:00", tz = "Europe/Zurich") |
posmo$datetime > as.POSIXct("2023-05-27 09:20:20", tz = "Europe/Zurich") & posmo$datetime < as.POSIXct("2023-05-27 09:42:00", tz = "Europe/Zurich") |
posmo$datetime > as.POSIXct("2023-05-27 11:52:00", tz = "Europe/Zurich") & posmo$datetime < as.POSIXct("2023-05-27 12:10:00", tz = "Europe/Zurich") |
posmo$datetime > as.POSIXct("2023-05-28 04:19:00", tz = "Europe/Zurich") & posmo$datetime < as.POSIXct("2023-05-28 05:10:00", tz = "Europe/Zurich") |
posmo$datetime > as.POSIXct("2023-05-28 21:51:40", tz = "Europe/Zurich") & posmo$datetime < as.POSIXct("2023-05-28 22:59:00", tz = "Europe/Zurich") |
posmo$datetime > as.POSIXct("2023-05-29 15:28:59", tz = "Europe/Zurich") & posmo$datetime < as.POSIXct("2023-05-29 16:18:00", tz = "Europe/Zurich") |
posmo$datetime > as.POSIXct("2023-05-29 17:28:00", tz = "Europe/Zurich") & posmo$datetime < as.POSIXct("2023-05-29 18:18:00", tz = "Europe/Zurich") |
posmo$datetime > as.POSIXct("2023-04-23 20:09:00", tz = "Europe/Zurich") & posmo$datetime < as.POSIXct("2023-04-23 20:42:00", tz = "Europe/Zurich") |
posmo$datetime > as.POSIXct("2023-04-28 18:30:00", tz = "Europe/Zurich") & posmo$datetime < as.POSIXct("2023-04-28 18:48:00", tz = "Europe/Zurich") |
posmo$datetime > as.POSIXct("2023-04-30 21:56:00", tz = "Europe/Zurich") & posmo$datetime < as.POSIXct("2023-04-30 22:28:00", tz = "Europe/Zurich") |
posmo$datetime > as.POSIXct("2023-05-01 07:24:30", tz = "Europe/Zurich") & posmo$datetime < as.POSIXct("2023-05-01 07:28:00", tz = "Europe/Zurich") |
posmo$datetime > as.POSIXct("2023-04-17 07:40:00", tz = "Europe/Zurich") & posmo$datetime < as.POSIXct("2023-04-17 07:57:00", tz = "Europe/Zurich") |
posmo$datetime > as.POSIXct("2023-04-22 03:45:00", tz = "Europe/Zurich") & posmo$datetime < as.POSIXct("2023-04-22 04:05:00", tz = "Europe/Zurich") |
posmo$datetime > as.POSIXct("2023-06-11 15:00:00", tz = "Europe/Zurich") & posmo$datetime < as.POSIXct("2023-06-11 17:00:00", tz = "Europe/Zurich") |
posmo$datetime > as.POSIXct("2023-04-24 14:00:00", tz = "Europe/Zurich") & posmo$datetime < as.POSIXct("2023-04-24 17:30:00", tz = "Europe/Zurich") |
posmo$datetime > as.POSIXct("2023-06-10 13:40:00", tz = "Europe/Zurich") & posmo$datetime < as.POSIXct("2023-06-10 17:33:00", tz = "Europe/Zurich") |
posmo$datetime > as.POSIXct("2023-06-09 21:45:00", tz = "Europe/Zurich") & posmo$datetime < as.POSIXct("2023-06-09 22:30:00", tz = "Europe/Zurich") |
posmo$datetime > as.POSIXct("2023-06-14 07:10:00", tz = "Europe/Zurich") & posmo$datetime < as.POSIXct("2023-06-14 08:00:00", tz = "Europe/Zurich") |
posmo$datetime > as.POSIXct("2023-06-14 17:44:00", tz = "Europe/Zurich") & posmo$datetime < as.POSIXct("2023-06-14 18:10:00", tz = "Europe/Zurich") |
posmo$datetime > as.POSIXct("2023-06-13 18:00:00", tz = "Europe/Zurich") & posmo$datetime < as.POSIXct("2023-06-13 19:15:00", tz = "Europe/Zurich") |
posmo$datetime > as.POSIXct("2023-04-15 14:25:00", tz = "Europe/Zurich") & posmo$datetime < as.POSIXct("2023-04-15 21:41:30", tz = "Europe/Zurich") |
posmo$datetime > as.POSIXct("2023-06-15 07:20:00", tz = "Europe/Zurich") & posmo$datetime < as.POSIXct("2023-06-15 08:04:00", tz = "Europe/Zurich") |
posmo$datetime > as.POSIXct("2023-06-16 23:06:30", tz = "Europe/Zurich") & posmo$datetime < as.POSIXct("2023-06-16 23:40:30", tz = "Europe/Zurich") |
posmo$datetime > as.POSIXct("2023-06-17 14:34:30", tz = "Europe/Zurich") & posmo$datetime < as.POSIXct("2023-06-17 14:42:30", tz = "Europe/Zurich") |
posmo$datetime > as.POSIXct("2023-06-04 17:30:00", tz = "Europe/Zurich") & posmo$datetime < as.POSIXct("2023-06-04 17:59:00", tz = "Europe/Zurich") |
posmo$datetime > as.POSIXct("2023-06-03 21:50:00", tz = "Europe/Zurich") & posmo$datetime < as.POSIXct("2023-06-03 22:18:00", tz = "Europe/Zurich") |
posmo$datetime > as.POSIXct("2023-06-02 18:02:00", tz = "Europe/Zurich") & posmo$datetime < as.POSIXct("2023-06-02 18:43:00", tz = "Europe/Zurich") |
posmo$datetime > as.POSIXct("2023-04-06 21:30:00", tz = "Europe/Zurich") & posmo$datetime < as.POSIXct("2023-04-06 22:37:00", tz = "Europe/Zurich") |
posmo$datetime > as.POSIXct("2023-04-09 09:50:00", tz = "Europe/Zurich") & posmo$datetime < as.POSIXct("2023-04-09 12:05:00", tz = "Europe/Zurich") |
posmo$datetime > as.POSIXct("2023-04-10 08:45:00", tz = "Europe/Zurich") & posmo$datetime < as.POSIXct("2023-04-10 09:39:00", tz = "Europe/Zurich") |
posmo$datetime > as.POSIXct("2023-04-10 18:45:00", tz = "Europe/Zurich") & posmo$datetime < as.POSIXct("2023-04-10 21:10:00", tz = "Europe/Zurich")
#final labelizing
posmo <- posmo %>%
mutate(tmode_manual = case_when(condition_walks ~ 'walk',
condition_boats ~ 'boat',
condition_planes ~ 'plane',
condition_trains ~ 'train',
condition_bikes ~ 'bike',
condition_scooters ~ 'kick-scooter',
condition_cars ~ 'car',
condition_bus ~ 'bus',
condition_trams ~ 'tram',
condition_t_bars ~ 't_bar',
condition_cable_cars ~ 'cable_car',
condition_ski_lifts ~ 'ski_lift',
condition_skis ~ 'ski',
is.na(posmo$tmode_manual) == TRUE ~ 'unclassified'
))
# Remove heights for plane (bc completely false):
posmo$H[posmo$tmode_manual == "plane"] <- 0.0
# select columns
posmo <- dplyr::select(posmo, c("source", "datetime", "tmode_manual", "E", "N", "H", "geometry"))
In the posmo dataset, have been recorded the following transport modes, ordered from the highest number of points (n) to the lowest:
#count of of points per means of transports:
posmo_tmode_count <- posmo %>% group_by(tmode_manual) %>% count() %>% arrange(desc(n))
knitr::kable(posmo_tmode_count %>% st_drop_geometry(),
caption = "Number of points labelized per mode of transport - posmo")
tmode_manual | n |
---|---|
unclassified | 12232 |
car | 9422 |
walk | 8575 |
bus | 5080 |
train | 5075 |
bike | 2306 |
tram | 1964 |
ski | 1795 |
ski_lift | 1148 |
plane | 465 |
cable_car | 225 |
t_bar | 183 |
kick-scooter | 149 |
boat | 85 |
Data cleaning and basic processing
The trajectory points recorded on GPS are recorded in the coordinate
system WGS84. The points were then converted in the Swiss system (MN95),
and the coordinates extracted. And the time recorded was converted to
the time zone ‘Europe/Zurich’. No further preparation were required.
## Load the data
gps_data_raw <- read_delim(here(data_folder, "yelu_dataset_all.csv"), ";")
## Use right coordinate system and preserve original E/N columns
gps_data_raw <- st_as_sf(gps_data_raw, coords = c("Longitude", "Latitude"), crs = 4326, remove = FALSE)
gps_data <- gps_data_raw %>% st_transform(crs = 2056)
gps_data$Datetime <- as.POSIXct(paste(as.Date(gps_data$Date, format = '%d.%m.%Y'), gps_data$Time), format = "%Y-%m-%d %H:%M:%S", tz = "UTC")
gps_data <- gps_data %>% mutate(Datetime = Datetime %>% with_tz(tzone = "Europe/Zurich"))
gps_data$source <- "gps"
# Extract E and N values
coords <- gps_data |> st_coordinates()
gps_data <- gps_data |>
mutate(Latitude = coords[,2], Longitude = coords[,1])
# Rename columns
colnames(gps_data)[11] <- "datetime"
colnames(gps_data)[3] <- "N"
colnames(gps_data)[4] <- "E"
colnames(gps_data)[8] <- "H"
# Add column for travel mode
gps_data$tmode_manual <- NA
gps_data <- dplyr::select(gps_data, c("source", "datetime", "N", "E", "H", "geometry", "tmode_manual"))
Labelizing data
# Conditions for different travel modes
condition_bus <-
gps_data$datetime >= as.POSIXct("2023-04-21 16:18:35") & gps_data$datetime <= as.POSIXct("2023-04-21 16:23:40") |
gps_data$datetime >= as.POSIXct("2023-04-30 18:04:25") & gps_data$datetime <= as.POSIXct("2023-04-30 18:20:15") |
gps_data$datetime >= as.POSIXct("2023-04-30 19:46:20") & gps_data$datetime <= as.POSIXct("2023-04-30 19:59:25") |
gps_data$datetime >= as.POSIXct("2023-05-10 19:31:25") & gps_data$datetime <= as.POSIXct("2023-05-10 19:34:55") |
gps_data$datetime >= as.POSIXct("2023-05-10 22:43:00") & gps_data$datetime <= as.POSIXct("2023-05-10 22:56:05") |
gps_data$datetime >= as.POSIXct("2023-05-12 07:46:00") & gps_data$datetime <= as.POSIXct("2023-05-12 07:51:20") |
gps_data$datetime >= as.POSIXct("2023-05-26 07:45:20") & gps_data$datetime <= as.POSIXct("2023-05-26 07:50:25") |
gps_data$datetime >= as.POSIXct("2023-06-05 14:59:35") & gps_data$datetime <= as.POSIXct("2023-06-05 15:09:00") |
gps_data$datetime >= as.POSIXct("2023-06-05 15:17:55") & gps_data$datetime <= as.POSIXct("2023-06-05 15:27:40")
condition_tram <-
gps_data$datetime >= as.POSIXct("2023-04-21 17:18:55") & gps_data$datetime <= as.POSIXct("2023-04-21 17:26:05") |
gps_data$datetime >= as.POSIXct("2023-04-25 15:07:50") & gps_data$datetime <= as.POSIXct("2023-04-25 15:11:55") |
gps_data$datetime >= as.POSIXct("2023-04-25 17:40:50") & gps_data$datetime <= as.POSIXct("2023-04-25 17:47:20") |
gps_data$datetime >= as.POSIXct("2023-04-25 18:57:55") & gps_data$datetime <= as.POSIXct("2023-04-25 19:04:12") |
gps_data$datetime >= as.POSIXct("2023-04-25 19:50:45") & gps_data$datetime <= as.POSIXct("2023-04-25 19:54:25") |
gps_data$datetime >= as.POSIXct("2023-04-25 20:26:10") & gps_data$datetime <= as.POSIXct("2023-04-25 20:32:05") |
gps_data$datetime >= as.POSIXct("2023-04-29 18:06:45") & gps_data$datetime <= as.POSIXct("2023-04-29 18:12:20") |
gps_data$datetime >= as.POSIXct("2023-04-30 16:32:55") & gps_data$datetime <= as.POSIXct("2023-04-30 16:34:50") |
gps_data$datetime >= as.POSIXct("2023-04-30 16:45:05") & gps_data$datetime <= as.POSIXct("2023-04-30 16:56:30") |
gps_data$datetime >= as.POSIXct("2023-04-30 17:46:25") & gps_data$datetime <= as.POSIXct("2023-04-30 18:00:00") |
gps_data$datetime >= as.POSIXct("2023-04-30 20:10:35") & gps_data$datetime <= as.POSIXct("2023-04-30 20:12:05") |
gps_data$datetime >= as.POSIXct("2023-05-01 12:56:05") & gps_data$datetime <= as.POSIXct("2023-05-01 13:04:10") |
gps_data$datetime >= as.POSIXct("2023-05-01 13:16:50") & gps_data$datetime <= as.POSIXct("2023-05-01 13:25:05") |
gps_data$datetime >= as.POSIXct("2023-05-01 13:25:55") & gps_data$datetime <= as.POSIXct("2023-05-01 13:29:40") |
gps_data$datetime >= as.POSIXct("2023-05-01 19:28:40") & gps_data$datetime <= as.POSIXct("2023-05-01 19:30:35") |
gps_data$datetime >= as.POSIXct("2023-05-06 13:42:15") & gps_data$datetime <= as.POSIXct("2023-05-06 13:54:15") |
gps_data$datetime >= as.POSIXct("2023-05-06 13:58:00") & gps_data$datetime <= as.POSIXct("2023-05-06 14:04:35") |
gps_data$datetime >= as.POSIXct("2023-05-06 15:51:20") & gps_data$datetime <= as.POSIXct("2023-05-06 15:59:40") |
gps_data$datetime >= as.POSIXct("2023-05-06 16:07:35") & gps_data$datetime <= as.POSIXct("2023-05-06 16:18:05") |
gps_data$datetime >= as.POSIXct("2023-05-06 16:27:00") & gps_data$datetime <= as.POSIXct("2023-05-06 16:33:40") |
gps_data$datetime >= as.POSIXct("2023-05-06 17:13:15") & gps_data$datetime <= as.POSIXct("2023-05-06 17:20:05") |
gps_data$datetime >= as.POSIXct("2023-05-07 17:32:15") & gps_data$datetime <= as.POSIXct("2023-05-07 17:34:00") |
gps_data$datetime >= as.POSIXct("2023-05-07 18:09:50") & gps_data$datetime <= as.POSIXct("2023-05-07 18:26:55") |
gps_data$datetime >= as.POSIXct("2023-05-07 18:43:35") & gps_data$datetime <= as.POSIXct("2023-05-07 18:46:25") |
gps_data$datetime >= as.POSIXct("2023-05-07 19:41:25") & gps_data$datetime <= as.POSIXct("2023-05-07 19:44:50") |
gps_data$datetime >= as.POSIXct("2023-05-07 19:47:25") & gps_data$datetime <= as.POSIXct("2023-05-07 20:09:15") |
gps_data$datetime >= as.POSIXct("2023-05-08 19:25:25") & gps_data$datetime <= as.POSIXct("2023-05-08 19:28:35") |
gps_data$datetime >= as.POSIXct("2023-05-08 19:49:45") & gps_data$datetime <= as.POSIXct("2023-05-08 19:53:25") |
gps_data$datetime >= as.POSIXct("2023-05-10 17:16:45") & gps_data$datetime <= as.POSIXct("2023-05-10 17:28:45") |
gps_data$datetime >= as.POSIXct("2023-05-10 17:52:15") & gps_data$datetime <= as.POSIXct("2023-05-10 17:53:15") |
gps_data$datetime >= as.POSIXct("2023-05-12 18:00:50") & gps_data$datetime <= as.POSIXct("2023-05-12 18:08:15") |
gps_data$datetime >= as.POSIXct("2023-05-13 17:39:25") & gps_data$datetime <= as.POSIXct("2023-05-13 17:41:25") |
gps_data$datetime >= as.POSIXct("2023-05-16 17:48:05") & gps_data$datetime <= as.POSIXct("2023-05-16 17:50:00") |
gps_data$datetime >= as.POSIXct("2023-05-17 14:27:15") & gps_data$datetime <= as.POSIXct("2023-05-17 14:35:00") |
gps_data$datetime >= as.POSIXct("2023-05-17 14:52:15") & gps_data$datetime <= as.POSIXct("2023-05-17 15:00:25") |
gps_data$datetime >= as.POSIXct("2023-05-17 15:22:40") & gps_data$datetime <= as.POSIXct("2023-05-17 15:30:10") |
gps_data$datetime >= as.POSIXct("2023-05-18 14:45:50") & gps_data$datetime <= as.POSIXct("2023-05-18 14:56:25") |
gps_data$datetime >= as.POSIXct("2023-05-18 16:00:10") & gps_data$datetime <= as.POSIXct("2023-05-18 16:07:10") |
gps_data$datetime >= as.POSIXct("2023-05-20 16:43:35") & gps_data$datetime <= as.POSIXct("2023-05-20 16:51:35") |
gps_data$datetime >= as.POSIXct("2023-05-20 17:00:30") & gps_data$datetime <= as.POSIXct("2023-05-20 17:07:45") |
gps_data$datetime >= as.POSIXct("2023-05-20 20:30:40") & gps_data$datetime <= as.POSIXct("2023-05-20 20:45:40") |
gps_data$datetime >= as.POSIXct("2023-05-21 19:10:00") & gps_data$datetime <= as.POSIXct("2023-05-21 19:14:15") |
gps_data$datetime >= as.POSIXct("2023-05-21 19:16:50") & gps_data$datetime <= as.POSIXct("2023-05-21 19:37:25") |
gps_data$datetime >= as.POSIXct("2023-05-21 20:25:00") & gps_data$datetime <= as.POSIXct("2023-05-21 20:29:10") |
gps_data$datetime >= as.POSIXct("2023-05-21 20:37:45") & gps_data$datetime <= as.POSIXct("2023-05-21 20:47:35") |
gps_data$datetime >= as.POSIXct("2023-05-23 17:36:35") & gps_data$datetime <= as.POSIXct("2023-05-23 17:37:15") |
gps_data$datetime >= as.POSIXct("2023-05-23 17:43:35") & gps_data$datetime <= as.POSIXct("2023-05-23 17:57:50") |
gps_data$datetime >= as.POSIXct("2023-05-23 19:19:35") & gps_data$datetime <= as.POSIXct("2023-05-23 19:34:45") |
gps_data$datetime >= as.POSIXct("2023-05-24 13:19:10") & gps_data$datetime <= as.POSIXct("2023-05-24 13:24:35") |
gps_data$datetime >= as.POSIXct("2023-05-24 17:39:00") & gps_data$datetime <= as.POSIXct("2023-05-24 17:55:35") |
gps_data$datetime >= as.POSIXct("2023-05-25 14:46:30") & gps_data$datetime <= as.POSIXct("2023-05-25 14:55:30") |
gps_data$datetime >= as.POSIXct("2023-05-25 15:19:20") & gps_data$datetime <= as.POSIXct("2023-05-25 15:28:35") |
gps_data$datetime >= as.POSIXct("2023-05-26 13:17:05") & gps_data$datetime <= as.POSIXct("2023-05-26 13:31:20") |
gps_data$datetime >= as.POSIXct("2023-05-26 13:53:00") & gps_data$datetime <= as.POSIXct("2023-05-26 14:10:00") |
gps_data$datetime >= as.POSIXct("2023-05-26 14:16:00") & gps_data$datetime <= as.POSIXct("2023-05-26 14:18:05") |
gps_data$datetime >= as.POSIXct("2023-05-28 14:52:20") & gps_data$datetime <= as.POSIXct("2023-05-28 15:04:05") |
gps_data$datetime >= as.POSIXct("2023-05-28 15:22:40") & gps_data$datetime <= as.POSIXct("2023-05-28 15:38:50") |
gps_data$datetime >= as.POSIXct("2023-05-28 20:20:00") & gps_data$datetime <= as.POSIXct("2023-05-28 20:37:00") |
gps_data$datetime >= as.POSIXct("2023-05-28 20:45:05") & gps_data$datetime <= as.POSIXct("2023-05-28 20:56:10") |
gps_data$datetime >= as.POSIXct("2023-05-29 17:40:05") & gps_data$datetime <= as.POSIXct("2023-05-29 17:59:50") |
gps_data$datetime >= as.POSIXct("2023-05-29 18:19:55") & gps_data$datetime <= as.POSIXct("2023-05-29 18:27:35") |
gps_data$datetime >= as.POSIXct("2023-05-29 18:28:35") & gps_data$datetime <= as.POSIXct("2023-05-29 18:36:00") |
gps_data$datetime >= as.POSIXct("2023-05-29 18:56:35") & gps_data$datetime <= as.POSIXct("2023-05-29 19:09:30") |
gps_data$datetime >= as.POSIXct("2023-05-30 17:31:10") & gps_data$datetime <= as.POSIXct("2023-05-30 17:42:50") |
gps_data$datetime >= as.POSIXct("2023-05-30 19:04:55") & gps_data$datetime <= as.POSIXct("2023-05-30 19:48:50") |
gps_data$datetime >= as.POSIXct("2023-05-31 17:08:25") & gps_data$datetime <= as.POSIXct("2023-05-31 17:09:40") |
gps_data$datetime >= as.POSIXct("2023-05-31 17:34:25") & gps_data$datetime <= as.POSIXct("2023-05-31 17:42:25") |
gps_data$datetime >= as.POSIXct("2023-06-02 13:29:10") & gps_data$datetime <= as.POSIXct("2023-06-02 13:30:45") |
gps_data$datetime >= as.POSIXct("2023-06-02 13:36:25") & gps_data$datetime <= as.POSIXct("2023-06-02 13:50:55") |
gps_data$datetime >= as.POSIXct("2023-06-02 14:55:10") & gps_data$datetime <= as.POSIXct("2023-06-02 15:13:10") |
gps_data$datetime >= as.POSIXct("2023-06-04 16:58:15") & gps_data$datetime <= as.POSIXct("2023-06-04 17:00:00") |
gps_data$datetime >= as.POSIXct("2023-06-04 17:09:20") & gps_data$datetime <= as.POSIXct("2023-06-04 17:14:10") |
gps_data$datetime >= as.POSIXct("2023-06-04 18:18:35") & gps_data$datetime <= as.POSIXct("2023-06-04 18:22:25") |
gps_data$datetime >= as.POSIXct("2023-06-04 18:48:20") & gps_data$datetime <= as.POSIXct("2023-06-04 19:03:35") |
gps_data$datetime >= as.POSIXct("2023-06-05 15:37:15") & gps_data$datetime <= as.POSIXct("2023-06-05 15:40:15") |
gps_data$datetime >= as.POSIXct("2023-06-09 13:51:50") & gps_data$datetime <= as.POSIXct("2023-06-09 13:53:45") |
gps_data$datetime >= as.POSIXct("2023-06-10 18:35:10") & gps_data$datetime <= as.POSIXct("2023-06-10 18:48:30") |
gps_data$datetime >= as.POSIXct("2023-06-10 20:48:30") & gps_data$datetime <= as.POSIXct("2023-06-10 21:01:00") |
gps_data$datetime >= as.POSIXct("2023-06-12 19:07:00") & gps_data$datetime <= as.POSIXct("2023-06-12 19:14:45") |
gps_data$datetime >= as.POSIXct("2023-06-12 19:43:20") & gps_data$datetime <= as.POSIXct("2023-06-12 19:45:40")
condition_walk <-
gps_data$datetime >= as.POSIXct("2023-04-21 16:13:40") & gps_data$datetime <= as.POSIXct("2023-04-21 16:16:25") |
gps_data$datetime >= as.POSIXct("2023-04-25 15:13:05") & gps_data$datetime <= as.POSIXct("2023-04-25 15:18:00") |
gps_data$datetime >= as.POSIXct("2023-04-25 15:37:00") & gps_data$datetime <= as.POSIXct("2023-04-25 15:40:15") |
gps_data$datetime >= as.POSIXct("2023-04-25 19:05:05") & gps_data$datetime <= as.POSIXct("2023-04-25 19:08:25") |
gps_data$datetime >= as.POSIXct("2023-04-25 20:32:45") & gps_data$datetime <= as.POSIXct("2023-04-25 20:35:05") |
gps_data$datetime >= as.POSIXct("2023-04-29 21:41:45") & gps_data$datetime <= as.POSIXct("2023-04-29 22:19:40") |
gps_data$datetime >= as.POSIXct("2023-04-30 16:25:55") & gps_data$datetime <= as.POSIXct("2023-04-30 16:29:00") |
gps_data$datetime >= as.POSIXct("2023-04-30 16:57:00") & gps_data$datetime <= as.POSIXct("2023-04-30 17:45:00") |
gps_data$datetime >= as.POSIXct("2023-04-30 18:20:25") & gps_data$datetime <= as.POSIXct("2023-04-30 19:39:00") |
gps_data$datetime >= as.POSIXct("2023-04-30 20:13:20") & gps_data$datetime <= as.POSIXct("2023-04-30 20:15:40") |
gps_data$datetime >= as.POSIXct("2023-05-01 13:31:50") & gps_data$datetime <= as.POSIXct("2023-05-01 19:22:20") |
gps_data$datetime >= as.POSIXct("2023-05-01 19:31:55") & gps_data$datetime <= as.POSIXct("2023-05-01 19:36:05") |
gps_data$datetime >= as.POSIXct("2023-05-06 14:06:10") & gps_data$datetime <= as.POSIXct("2023-05-06 14:09:40") |
gps_data$datetime >= as.POSIXct("2023-05-06 16:18:45") & gps_data$datetime <= as.POSIXct("2023-05-06 16:21:30") |
gps_data$datetime >= as.POSIXct("2023-05-06 16:23:35") & gps_data$datetime <= as.POSIXct("2023-05-06 16:26:25") |
gps_data$datetime >= as.POSIXct("2023-05-06 16:33:50") & gps_data$datetime <= as.POSIXct("2023-05-06 16:34:40") |
gps_data$datetime >= as.POSIXct("2023-05-06 17:29:25") & gps_data$datetime <= as.POSIXct("2023-05-06 17:32:15") |
gps_data$datetime >= as.POSIXct("2023-05-07 17:34:55") & gps_data$datetime <= as.POSIXct("2023-05-07 18:05:05") |
gps_data$datetime >= as.POSIXct("2023-05-07 18:30:50") & gps_data$datetime <= as.POSIXct("2023-05-07 18:42:30") |
gps_data$datetime >= as.POSIXct("2023-05-07 20:13:00") & gps_data$datetime <= as.POSIXct("2023-05-07 20:15:35") |
gps_data$datetime >= as.POSIXct("2023-05-08 18:56:05") & gps_data$datetime <= as.POSIXct("2023-05-08 19:21:30") |
gps_data$datetime >= as.POSIXct("2023-05-08 19:53:55") & gps_data$datetime <= as.POSIXct("2023-05-08 20:14:55") |
gps_data$datetime >= as.POSIXct("2023-05-10 17:10:10") & gps_data$datetime <= as.POSIXct("2023-05-10 17:14:10") |
gps_data$datetime >= as.POSIXct("2023-05-10 17:28:50") & gps_data$datetime <= as.POSIXct("2023-05-10 17:52:00") |
gps_data$datetime >= as.POSIXct("2023-05-10 18:11:15") & gps_data$datetime <= as.POSIXct("2023-05-10 18:14:00") |
gps_data$datetime >= as.POSIXct("2023-05-10 19:36:45") & gps_data$datetime <= as.POSIXct("2023-05-10 19:42:45") |
gps_data$datetime >= as.POSIXct("2023-05-10 22:56:30") & gps_data$datetime <= as.POSIXct("2023-05-10 23:04:50") |
gps_data$datetime >= as.POSIXct("2023-05-12 07:52:05") & gps_data$datetime <= as.POSIXct("2023-05-12 07:54:35") |
gps_data$datetime >= as.POSIXct("2023-05-12 17:28:25") & gps_data$datetime <= as.POSIXct("2023-05-12 17:30:45") |
gps_data$datetime >= as.POSIXct("2023-05-12 18:25:50") & gps_data$datetime <= as.POSIXct("2023-05-12 18:28:20") |
gps_data$datetime >= as.POSIXct("2023-05-13 17:19:15") & gps_data$datetime <= as.POSIXct("2023-05-13 17:39:00") |
gps_data$datetime >= as.POSIXct("2023-05-13 17:41:40") & gps_data$datetime <= as.POSIXct("2023-05-13 17:45:25") |
gps_data$datetime >= as.POSIXct("2023-05-16 17:44:00") & gps_data$datetime <= as.POSIXct("2023-05-16 17:47:00") |
gps_data$datetime >= as.POSIXct("2023-05-16 17:51:20") & gps_data$datetime <= as.POSIXct("2023-05-16 17:54:05") |
gps_data$datetime >= as.POSIXct("2023-05-17 15:44:15") & gps_data$datetime <= as.POSIXct("2023-05-17 15:48:05") |
gps_data$datetime >= as.POSIXct("2023-05-18 14:21:00") & gps_data$datetime <= as.POSIXct("2023-05-18 14:40:35") |
gps_data$datetime >= as.POSIXct("2023-05-18 14:57:00") & gps_data$datetime <= as.POSIXct("2023-05-18 15:56:00") |
gps_data$datetime >= as.POSIXct("2023-05-18 16:11:45") & gps_data$datetime <= as.POSIXct("2023-05-18 16:14:30") |
gps_data$datetime >= as.POSIXct("2023-05-20 16:39:20") & gps_data$datetime <= as.POSIXct("2023-05-20 16:41:00") |
gps_data$datetime >= as.POSIXct("2023-05-20 17:09:00") & gps_data$datetime <= as.POSIXct("2023-05-20 20:30:00") |
gps_data$datetime >= as.POSIXct("2023-05-20 20:49:40") & gps_data$datetime <= as.POSIXct("2023-05-20 20:53:35") |
gps_data$datetime >= as.POSIXct("2023-05-21 19:37:35") & gps_data$datetime <= as.POSIXct("2023-05-21 20:24:05") |
gps_data$datetime >= as.POSIXct("2023-05-21 20:48:00") & gps_data$datetime <= as.POSIXct("2023-05-21 20:52:20") |
gps_data$datetime >= as.POSIXct("2023-05-23 17:34:05") & gps_data$datetime <= as.POSIXct("2023-05-23 17:36:25") |
gps_data$datetime >= as.POSIXct("2023-05-23 17:58:45") & gps_data$datetime <= as.POSIXct("2023-05-23 19:16:00") |
gps_data$datetime >= as.POSIXct("2023-05-23 19:39:10") & gps_data$datetime <= as.POSIXct("2023-05-23 19:42:10") |
gps_data$datetime >= as.POSIXct("2023-05-24 09:12:05") & gps_data$datetime <= as.POSIXct("2023-05-24 09:17:20") |
gps_data$datetime >= as.POSIXct("2023-05-24 14:50:15") & gps_data$datetime <= as.POSIXct("2023-05-24 14:52:20") |
gps_data$datetime >= as.POSIXct("2023-05-24 15:42:05") & gps_data$datetime <= as.POSIXct("2023-05-24 16:38:35") |
gps_data$datetime >= as.POSIXct("2023-05-25 14:35:55") & gps_data$datetime <= as.POSIXct("2023-05-25 14:40:25") |
gps_data$datetime >= as.POSIXct("2023-05-25 14:55:35") & gps_data$datetime <= as.POSIXct("2023-05-25 14:57:05") |
gps_data$datetime >= as.POSIXct("2023-05-25 15:14:50") & gps_data$datetime <= as.POSIXct("2023-05-25 15:19:15") |
gps_data$datetime >= as.POSIXct("2023-05-25 15:29:00") & gps_data$datetime <= as.POSIXct("2023-05-25 15:35:35") |
gps_data$datetime >= as.POSIXct("2023-05-25 20:06:20") & gps_data$datetime <= as.POSIXct("2023-05-25 20:52:00") |
gps_data$datetime >= as.POSIXct("2023-05-26 07:51:15") & gps_data$datetime <= as.POSIXct("2023-05-26 07:54:15") |
gps_data$datetime >= as.POSIXct("2023-05-26 08:47:15") & gps_data$datetime <= as.POSIXct("2023-05-26 12:17:30") |
gps_data$datetime >= as.POSIXct("2023-05-26 13:31:40") & gps_data$datetime <= as.POSIXct("2023-05-26 13:49:00") |
gps_data$datetime >= as.POSIXct("2023-05-26 14:27:10") & gps_data$datetime <= as.POSIXct("2023-05-26 17:20:10") |
gps_data$datetime >= as.POSIXct("2023-05-28 15:39:00") & gps_data$datetime <= as.POSIXct("2023-05-28 20:19:00") |
gps_data$datetime >= as.POSIXct("2023-05-28 20:57:05") & gps_data$datetime <= as.POSIXct("2023-05-28 21:00:00") |
gps_data$datetime >= as.POSIXct("2023-05-29 17:05:05") & gps_data$datetime <= as.POSIXct("2023-05-29 17:40:00") |
gps_data$datetime >= as.POSIXct("2023-05-29 18:13:30") & gps_data$datetime <= as.POSIXct("2023-05-29 18:18:45") |
gps_data$datetime >= as.POSIXct("2023-05-30 11:33:25") & gps_data$datetime <= as.POSIXct("2023-05-30 11:37:15") |
gps_data$datetime >= as.POSIXct("2023-05-31 17:44:25") & gps_data$datetime <= as.POSIXct("2023-05-31 17:48:00") |
gps_data$datetime >= as.POSIXct("2023-06-02 13:23:45") & gps_data$datetime <= as.POSIXct("2023-06-02 13:28:00") |
gps_data$datetime >= as.POSIXct("2023-06-02 13:55:35") & gps_data$datetime <= as.POSIXct("2023-06-02 14:19:00") |
gps_data$datetime >= as.POSIXct("2023-06-02 14:39:30") & gps_data$datetime <= as.POSIXct("2023-06-02 14:47:05") |
gps_data$datetime >= as.POSIXct("2023-06-02 15:16:45") & gps_data$datetime <= as.POSIXct("2023-06-02 15:20:25") |
gps_data$datetime >= as.POSIXct("2023-06-04 17:00:50") & gps_data$datetime <= as.POSIXct("2023-06-04 17:06:30") |
gps_data$datetime >= as.POSIXct("2023-06-04 17:17:20") & gps_data$datetime <= as.POSIXct("2023-06-04 18:17:00") |
gps_data$datetime >= as.POSIXct("2023-06-04 19:04:45") & gps_data$datetime <= as.POSIXct("2023-06-04 19:08:00") |
gps_data$datetime >= as.POSIXct("2023-06-05 12:51:35") & gps_data$datetime <= as.POSIXct("2023-06-05 12:56:40") |
gps_data$datetime >= as.POSIXct("2023-06-05 14:49:00") & gps_data$datetime <= as.POSIXct("2023-06-05 14:55:25") |
gps_data$datetime >= as.POSIXct("2023-06-05 15:28:00") & gps_data$datetime <= as.POSIXct("2023-06-05 15:36:00") |
gps_data$datetime >= as.POSIXct("2023-06-05 15:42:40") & gps_data$datetime <= as.POSIXct("2023-06-05 15:46:05") |
gps_data$datetime >= as.POSIXct("2023-06-06 20:00:05") & gps_data$datetime <= as.POSIXct("2023-06-06 20:34:25") |
gps_data$datetime >= as.POSIXct("2023-06-08 20:55:10") & gps_data$datetime <= as.POSIXct("2023-06-08 21:22:10") |
gps_data$datetime >= as.POSIXct("2023-06-09 13:54:10") & gps_data$datetime <= as.POSIXct("2023-06-09 17:14:20") |
gps_data$datetime >= as.POSIXct("2023-06-10 18:50:20") & gps_data$datetime <= as.POSIXct("2023-06-10 20:45:00") |
gps_data$datetime >= as.POSIXct("2023-06-10 21:03:15") & gps_data$datetime <= as.POSIXct("2023-06-10 21:05:40") |
gps_data$datetime >= as.POSIXct("2023-06-12 21:29:35") & gps_data$datetime <= as.POSIXct("2023-06-12 22:11:45") |
gps_data$datetime >= as.POSIXct("2023-06-15 21:43:25") & gps_data$datetime <= as.POSIXct("2023-06-15 21:45:00") |
gps_data$datetime >= as.POSIXct("2023-06-15 21:52:10") & gps_data$datetime <= as.POSIXct("2023-06-15 21:55:05") |
gps_data$datetime >= as.POSIXct("2023-06-15 21:58:05") & gps_data$datetime <= as.POSIXct("2023-06-15 21:59:55") |
gps_data$datetime >= as.POSIXct("2023-06-15 22:02:00") & gps_data$datetime <= as.POSIXct("2023-06-15 22:07:15") |
gps_data$datetime >= as.POSIXct("2023-06-15 22:14:45") & gps_data$datetime <= as.POSIXct("2023-06-15 22:19:00") |
gps_data$datetime >= as.POSIXct("2023-06-15 22:25:25") & gps_data$datetime <= as.POSIXct("2023-06-15 22:28:20") |
gps_data$datetime >= as.POSIXct("2023-06-16 10:17:45") & gps_data$datetime <= as.POSIXct("2023-06-16 10:21:25")
condition_boat <-
gps_data$datetime >= as.POSIXct("2023-06-02 14:20:00") & gps_data$datetime <= as.POSIXct("2023-06-02 14:37:20")
condition_run <-
gps_data$datetime >= as.POSIXct("2023-05-21 20:24:10") & gps_data$datetime <= as.POSIXct("2023-05-21 20:24:55") |
gps_data$datetime >= as.POSIXct("2023-06-08 20:59:00") & gps_data$datetime <= as.POSIXct("2023-06-08 21:17:55") |
gps_data$datetime >= as.POSIXct("2023-06-15 21:45:10") & gps_data$datetime <= as.POSIXct("2023-06-15 21:52:00") |
gps_data$datetime >= as.POSIXct("2023-06-15 21:55:10") & gps_data$datetime <= as.POSIXct("2023-06-15 21:57:25") |
gps_data$datetime >= as.POSIXct("2023-06-15 22:00:00") & gps_data$datetime <= as.POSIXct("2023-06-15 22:01:55") |
gps_data$datetime >= as.POSIXct("2023-06-15 22:07:40") & gps_data$datetime <= as.POSIXct("2023-06-15 22:14:40") |
gps_data$datetime >= as.POSIXct("2023-06-15 22:19:45") & gps_data$datetime <= as.POSIXct("2023-06-15 22:20:55")
# Labelizing data
gps_data <- gps_data %>%
mutate(tmode_manual = case_when(condition_walk ~ 'walk',
condition_tram ~ 'tram',
condition_bus ~ 'bus',
condition_boat ~ 'boat',
condition_run ~ 'run',
is.na(gps_data$tmode_manual) == TRUE ~ 'unclassified'
))
In the GPS tracker dataset, have been recorded the following transport modes, ordered from the highest number of points (n) to the lowest:
#count of of points per means of transports:
gps_tmode_count <- gps_data %>% group_by(tmode_manual) %>% count() %>% arrange(desc(n))
knitr::kable(gps_tmode_count %>% st_drop_geometry(),
caption = "Number of points labelized per mode of transport - GPS tracker")
tmode_manual | n |
---|---|
walk | 9746 |
tram | 7542 |
unclassified | 7343 |
bus | 929 |
run | 245 |
boat | 203 |
Merge GPS tracker and Posmo datasets
#here merge with gps tracker
mvmt_data <- rbind(posmo, gps_data)
col_order <- c("source", "datetime", "tmode_manual" ,"E", "N", "H", "geometry")
mvmt_data <- mvmt_data[, col_order]
# add regions (for visualisation)
#Zurich
zh_ll <- c(2674900, 1244000)
zh_ur <- c(2692100, 1260300)
#Waadt
wt_ll <- c(2480000, 1127600)
wt_ur <- c(2575200, 1195100)
#wallis
ws_ll <- c(2538000, 1076875)
ws_ur <- c(2661500, 1153125)
# Import the datasets
ch_boundaries <- read_sf(here(data_folder, "boundaries", "CH_boundaries.shp")) %>% st_zm()
Kanton_boundaries <- read_sf(here(data_folder, "boundaries", "Kanton_boundaries.shp")) %>% st_zm()
waadt <- Kanton_boundaries %>% filter(KANTONSNUM == 22)
zh_commune <- read_sf(here(data_folder, "boundaries", "ZH_gemeinde.shp")) %>% st_zm()
mvmt_data$region <- NA
mvmt_data <- mvmt_data |>
mutate(
region = case_when(
E > zh_ll[1] & E < zh_ur[1] & N > zh_ll[2] & N < zh_ur[2] ~ 'Zurich',
E > wt_ll[1] & E < wt_ur[1] & N > wt_ll[2] & N < wt_ur[2] ~ 'Waadt',
E > ws_ll[1] & E < ws_ur[1] & N > ws_ll[2] & N < ws_ur[2] ~ 'Wallis'
)
)
Here is the final total of recorded points and the distribution within the means of transport:
#count of points:
tmode_count <- mvmt_data %>% group_by(tmode_manual) %>% count() %>% arrange(desc(n))
knitr::kable(tmode_count %>% st_drop_geometry(),
caption = "Number of points labelized per mode of transport")
tmode_manual | n |
---|---|
unclassified | 19575 |
walk | 18321 |
tram | 9506 |
car | 9422 |
bus | 6009 |
train | 5075 |
bike | 2306 |
ski | 1795 |
ski_lift | 1148 |
plane | 465 |
boat | 288 |
run | 245 |
cable_car | 225 |
t_bar | 183 |
kick-scooter | 149 |
Map general overview - transport modes:
mvmt_data_classified <- mvmt_data %>% filter(tmode_manual != "unclassified")
ggplot() +
geom_sf(data = ch_boundaries) +
labs(title = "All recorded trajectory points") +
geom_point(data = mvmt_data_classified, aes(E, N, color = tmode_manual)) +
coord_sf(datum=2056) +
theme_bw()
Map local overview - Zurich:
mvmt_data_zh <- mvmt_data_classified %>% filter(region == "Zurich")
ggplot() +
geom_sf(data = zh_commune) +
labs(title = "Recorded trajectory points - Zurich city") +
geom_point(data = mvmt_data_zh, aes(E, N, color = tmode_manual)) +
coord_sf(datum=2056) +
theme_bw()
Map local overview - Waadt:
mvmt_data_waadt <- mvmt_data_classified %>% filter(region == "Waadt")
ggplot() +
geom_sf(data = waadt) +
labs(title = "Recorded trajectory points - Waadt kanton") +
geom_point(data = mvmt_data_waadt, aes(E, N, color = tmode_manual)) +
coord_sf(datum=2056) +
theme_bw()
The points will be segmentized based on the user, gaps in recording and the labelized transport mode. The static points will be detected and removed from the dataset. Finally, to have multiple segments for machine learning, the segments will again be segmentized per duration with a defined threshold.
Compute raw trajectory derivatives
We will need the timelag and steplength between each points. The timelag
will be used for detecting the gaps, and the steplength to find the
static points.
The points with a timelag of 0 have been removed after this step. After
inspection, it seems these are points created by posmo processing of the
data: At each change of transport mode, the point is duplicate with
different coordinates.
Detect gaps in recording
Posmo recording timerate is of 10 seconds, and the gps tracker of 5
sec. We will use a threshold value above these numbers which has been
decided for 30 seconds. We estimated under 30 seconds, the gaps are not
too important to define the derivative (max 3 missing points in posmo
and max. 6 in GPS tracker).
# 1) Compute raw trajectory derivatives
# Speed and steplength
mvmt_data <-
mvmt_data %>%
group_by(source) %>%
mutate(
timelag = as.numeric(difftime(lead(datetime), datetime)),
steplength_m = sqrt((E-lead(E))^2 + (N-lead(N))^2), # Horizontal steplength
steplength_h = H-lead(H), # Add vertical steplength
speed_ms = abs(steplength_m)/timelag, # Horizontal speed
speed_v = abs(steplength_h)/timelag, # Vertical speed
slope = (lead(H) - H)/steplength_m * 100
)
# Turning angle
trackAngle <- function(Ecol, Ncol){
coords <- data.matrix(cbind(Ecol, Ncol))
n <- length(Ecol)
if(n > 2){
angles <- abs(c(trackAzimuth(coords), 0) -
c(0, rev(trackAzimuth(coords[nrow(coords):1, ]))))
# angles <- ifelse(angles > 180, 360 - angles, angles) #[0,180] to [0,360]
angles[is.na(angles)] <- 180
return(c(0, angles[-c(1, length(angles))], 0))
}
else{
return(numeric(n))
}
}
#remove timelag 0 -> recording errors
mvmt_data <- mvmt_data %>% filter(timelag != 0)
# 2) Find gaps in the data
mvmt_data_gap <- mvmt_data |>
ungroup() |>
mutate(gap = timelag > 30)
Segmentation per gap, user and transport mode
A unique segment ID is attributed at each point based on the user,
transport mode, and gaps. Three individual indexes for those are
created. The points are traversed in user and datetime order, and at
each change of user or transport mode, the indexes are increased. For
the gap, the last point before the gap closes the current segment index.
Finally, the three indexes are combined to form unique segments.
Once the segments are created, the total duration could be calculated as well as meansteps at different window sizes. We looked at 5 points before and after the considered points, so we could calculate three mean values of the steplength: one small window (only 1 value before/after), one medium window (3 before/after), and one big window (5 before/after). These different steplength means will be used for the static calculation and to define the characteristics of transport modes.
# Increment an index at each change of value in the vec column.
# function source: GEO880 - Exercise3
rle_id <- function(vec) {
x <- rle(vec)$lengths
as.factor(rep(seq_along(x), times = x))
}
mvmt_data_segmentized <- mvmt_data_gap %>%
mutate(
segment_ID_user = rle_id(source),
segment_ID_gap = 1,
segment_ID_tmode = rle_id(tmode_manual)
)
# Gap: Increment segment_ID_gap for each TRUE
# Takes some time: to optimize
for (i in 2:nrow(mvmt_data_segmentized)) {
if (mvmt_data_segmentized$gap[i-1] == TRUE) {
mvmt_data_segmentized$segment_ID_gap[i] <- mvmt_data_segmentized$segment_ID_gap[i-1] + 1
} else {
mvmt_data_segmentized$segment_ID_gap[i] <- mvmt_data_segmentized$segment_ID_gap[i-1]
}
}
# ADD PRE-FINAL ID for segmentation
mvmt_data_segmentized <- mvmt_data_segmentized %>%
mutate(
segment_ID_both = paste(segment_ID_user, segment_ID_gap, segment_ID_tmode, sep = "-")
)
# Add duration per segment
mvmt_data_segmentized <- mvmt_data_segmentized %>%
group_by(segment_ID_both) %>%
mutate(duration_secs = as.integer(difftime(max(datetime), min(datetime), units = "secs")))
# Calculate distances to previous/next points
mvmt_data_segmentized <- mvmt_data_segmentized |>
group_by(segment_ID_both) %>%
mutate(
nMinus5 = sqrt((lag(E, 5) - E)^2 + (lag(N, 5) - N)^2),
nMinus4 = sqrt((lag(E, 4) - E)^2 + (lag(N, 4) - N)^2), #...
nMinus3 = sqrt((lag(E, 3) - E)^2 + (lag(N, 3) - N)^2), #dist to pos -30/20s
nMinus2 = sqrt((lag(E, 2) - E)^2 + (lag(N, 2) - N)^2), #dist to pos -20/10s
nMinus1 = sqrt((lag(E, 1) - E)^2 + (lag(N, 1) - N)^2), #dist to pos -10/5s
nPlus1 = sqrt((E - lead(E, 1))^2 + (N - lead(N, 1))^2), #dist to pos +10/5s
nPlus2 = sqrt((E - lead(E, 2))^2 + (N - lead(N, 2))^2), #dist to pos +20/10s
nPlus3 = sqrt((E - lead(E, 3))^2 + (N - lead(N, 3))^2), #dist to pos +30/20s
nPlus4 = sqrt((E - lead(E, 4))^2 + (N - lead(N, 4))^2), #...
nPlus5 = sqrt((E - lead(E, 5))^2 + (N - lead(N, 5))^2),
)
# Calculate meanstep at three different windows: small, middle and big
mvmt_data_segmentized <- mvmt_data_segmentized |>
rowwise() |>
mutate(
stepMean_smallw = round(mean(c(nMinus1, nPlus1)),2),
stepMean_midw = round(mean(c(nMinus3, nMinus2, nMinus1, nPlus1, nPlus2, nPlus3)),2),
stepMean_bigw = round(mean(c(nMinus5, nMinus4, nMinus3, nMinus2, nMinus1, nPlus1, nPlus2, nPlus3, nPlus4, nPlus5)),2)
) |>
ungroup()
# Calculate turning angle of each point in each segment
mvmt_data_segmentized <- mvmt_data_segmentized %>%
group_by(segment_ID_both) %>%
mutate(turning_angle = trackAngle(E, N))
# Calculate acceleration for each point in each segment
mvmt_data_segmentized <- mvmt_data_segmentized %>%
group_by(segment_ID_both) %>%
mutate(acceleration_h = (speed_ms - lead(speed_ms))/timelag,
acceleration_v = (speed_v - lead(speed_v)/timelag))
# Remove NA with 0 in acceleration columns
mvmt_data_segmentized <- mvmt_data_segmentized %>%
mutate(
acceleration_h = replace(acceleration_h, is.na(acceleration_h), 0),
acceleration_v = replace(acceleration_v, is.na(acceleration_v), 0),
)
Determine static points, and remove static and small
segments
The static is calculated per segment, by comparing the mean steplength
of the big window to the one from the small window. This method was
chosen because some transport modes have variances in speed (stops in
public transport, or traffic jams, light stops, …). Therefore, a
comparison with the overall segment was not ideal.
Static points with a steplength of less than 5m were also considered as
statics.
Finally, segments with a duration of less than 30 seconds were
removed.
The information about static was still kept in the segments by
calculating a percentage of static, as this could be useful to determine
the mean of transport.
#determine static points: choose an average distance to use as the threshold
mvmt_data_add_static <- mvmt_data_segmentized |>
group_by(segment_ID_both) %>%
mutate(
static =
stepMean_bigw < mean(stepMean_smallw, na.rm=TRUE) |
steplength_m < 0.5,
static = ifelse(is.na(static), FALSE, static)
)
# Add percentage of static in each segment
mvmt_data_add_static <- mvmt_data_add_static |>
group_by(segment_ID_both) %>%
mutate(
static_percent = (sum(static))/(length(static))
# Optimize by only considering continuous static points
)
# Filter static and small segments
mvmt_data_filtered <- mvmt_data_add_static %>%
group_by(segment_ID_both, source, tmode_manual, duration_secs, static, static_percent) %>%
# remove static segments
filter(static == FALSE) %>%
#remove the too small segments (duration < 30 s)
filter(duration_secs > 30)
Final segmentation per duration
In order to have more segments of each transport modes for the machine
learning, the long segments were segmentized based on the duration. The
maximum duration considered was 100 seconds. However, to not create very
small parts, when the last resegmentized part was smaller than 50
seconds, this was added to the penultimate part.
# Segmentize per duration
mvmt_data_split <- mvmt_data_filtered %>%
group_by(segment_ID_both) %>%
mutate(
#remove timelag to next segment
timelag = case_when(timelag > 30 ~ 0,
timelag <= -20 ~ 0,
.default = timelag),
duration_index = case_when(
(duration_secs > 100) & (duration_secs %% 100) >= 50 ~ ceiling(cumsum(timelag)/100),
(duration_secs > 100) & (duration_secs %% 100) < 50 ~ ceiling((cumsum(timelag)/100)-1),
.default = 0
),
segment_ID_final = paste(segment_ID_both, duration_index, sep = "-")
)
# Remove unwanted columns
mvmt_seg_final <- mvmt_data_split %>%
dplyr::select(-c("segment_ID_user", "segment_ID_both", "segment_ID_gap", "duration_index", "segment_ID_tmode", "nMinus5", "nMinus4", "nMinus3", "nMinus2", "nMinus1", "nPlus1", "nPlus2", "nPlus3", "nPlus4", "nPlus5"
))
Here is the summary of the resulting number of segments, ordered from the most to last:
# Count no of segs per means of transports:
mvmt_seg_count <- mvmt_seg_final %>% st_drop_geometry() %>% group_by(segment_ID_final, tmode_manual) %>% summarize() %>% group_by(tmode_manual) %>% count() %>% arrange(desc(n))
knitr::kable(mvmt_seg_count,
caption = "Number of segments per mode of transport")
tmode_manual | n |
---|---|
unclassified | 1349 |
walk | 1234 |
car | 813 |
tram | 526 |
train | 465 |
bus | 437 |
bike | 194 |
ski | 137 |
ski_lift | 107 |
plane | 42 |
cable_car | 21 |
boat | 20 |
t_bar | 18 |
run | 16 |
kick-scooter | 13 |
Segments visualisation - example
Here we take an example with both posmo and gps data, at different steps of the analysis.
# MAP VISUALISATIONS
# Final segments per transport modes
visu_data <- mvmt_seg_final %>% filter(as.Date(datetime) == "2023-05-16")
n_tmode <- visu_data$tmode_manual %>% unique() %>% length()
visu_data_wgs <- visu_data %>% st_transform(crs = 4326)
set.seed(2)
factpal <- colorFactor(topo.colors(n_tmode), domain =visu_data_wgs$tmode_manual)
m <- leaflet() %>%
#addTiles() %>% # Add default OpenStreetMap map tiles
addProviderTiles("Esri.WorldGrayCanvas") %>%
addCircleMarkers(data = visu_data_wgs,
#opacity = 0.3,
radius = 0.2,
popup = paste("seg_ID: ", visu_data_wgs$segment_ID_final),
color = ~factpal(tmode_manual)) %>%
addLegend(position = 'topright',
#colors = c('red'),
#labels = c('static'),
pal = factpal,
values = visu_data_wgs$tmode_manual,
title = '2023-05-16: Final segments per transport mode')
## Plot the leaflet object m
m
# Gaps
visu_gaps_static <- mvmt_data_add_static %>% filter(as.Date(datetime) == "2023-05-16")
visu_gaps_static |>
ggplot(aes(E, N, color = gap)) +
#geom_path() +
geom_point() +
coord_fixed() +
coord_equal() +
labs(title = "2023-05-16: Identified gaps") +
theme(legend.position = "right")
# Static
visu_gaps_static |>
ggplot(aes(E, N, color = static)) +
#geom_path() +
geom_point() +
coord_fixed() +
coord_equal() +
labs(title = "2023-05-16: Identified static points") +
theme(legend.position = "right")
# Final segments
visu_data |>
ggplot(aes(E, N, color = segment_ID_final)) +
#geom_path() +
geom_point() +
coord_fixed() +
coord_equal() +
labs(title = "2023-05-16: Final segments") +
theme(legend.position = "right")
After assigning unique ID for every segments, we then calculated the following characteristics of points in each trajectory:
We summarized derivatives for each trajectory by calculating mean value, standard deviation, and range of the aforementioned characteristics.
# SUMMARIZE
mvmt_seg_summary <- mvmt_seg_final %>%
group_by(segment_ID_final, static_percent, duration_secs, tmode_manual) %>%
summarize(
# Total segments
#time_duration = duration_secs,
space_duration = sqrt((sum(abs(steplength_m))^2 + (sum(abs(steplength_h))^2))),
stepMean_midw_mean = mean(stepMean_midw, na.rm=TRUE),
# Step length
steplength_mean = mean(steplength_m),
steplength_sd = sd(steplength_m),
# Horizontal speed
speed_h_mean = mean(speed_ms),
speed_h_sd = sd(speed_ms),
speed_h_range = max(speed_ms) - min(speed_ms),
# Vertical speed
speed_v_mean = mean(speed_v),
speed_v_sd = sd(speed_v),
speed_v_range = max(speed_v) - min(speed_v),
# Horizontal Acceleration
a_h_mean = mean(acceleration_h),
a_h_sd = sd(acceleration_h),
a_h_range = max(acceleration_h) - min(acceleration_h),
# Vertical Acceleration
a_v_mean = mean(acceleration_v),
a_v_sd = sd(acceleration_v),
a_v_range = max(acceleration_v) - min(acceleration_v),
# Sinuosity
ta_mean = mean(turning_angle),
ta_sd = sd(turning_angle),
ta_range = max(turning_angle) - min(turning_angle),
# Geometry
lst_geometry = list(geometry))
Each point will be annotated with the information TRUE/FALSE of its
proximity to the following environment elements:
- Railways (train)
- Trams (rails)
- Bus stops
- Highways (“Autobahn” and “Autostrasse”)
- Cable-cars, ski-lifts and t-bars
- Lakes
- Mountainous area (altitude)
The railways for the trains and trams come from map.geoadmin.ch. The
lakes, bus stops, highways and cable-cars & co comes from SwissTLM3D
(swisstopo). Due to the number of water elements, only the biggest lakes
have been considered. For the highways, it was decided to only take this
category of road because this was the most helpful to separate with the
other transport modes. Classical roads, in addition of being all other
the territory, allow other transport modes such as bikes or walking. Bus
networks lines were not available from swisstopo for downloading for
whole Switzerland coverage, therefore the stops points were used.
The distance considered for proximity has been chosen individually for
each transport mode, based on inspection of all the points labelized in
the transport mode. Finally, the altitude simply used the heights of the
points and applies a threshold. The limit of 1200m has been chosen, as
it has been observed that most ski stations in the country are above
this limit.
# Calculate concave hull based on the trajectory points
concave_hull <- concaveman(mvmt_seg_final, concavity = 2, length_threshold=50) %>% st_buffer(500)
# Join the trajectory points with the environment datasets, by using the concave hull on the shp and to limit calculation time. Dissolve and simplification also reduce the calculation time.
# 1) RAILS
rails_shp <- read_sf(here(data_folder, "railways", "rails.shp")) %>% st_zm() %>% st_union() %>% st_as_sf() %>% st_intersection(concave_hull) %>% mutate(environment = "rails") %>% st_simplify(dTolerance = 1)
joined_data <- mvmt_seg_final %>%
st_join(rails_shp, join = st_is_within_distance, 30)
mvmt_seg_final$isClosetoRails <- !is.na(joined_data$environment)
# 2) TRAMS
trams_shp <- read_sf(here(data_folder, "railways", "trams.shp")) %>% st_zm() %>% st_union() %>% st_as_sf() %>% st_intersection(concave_hull) %>% mutate(environment = "trams") %>% st_simplify(dTolerance = 1)
joined_data <- mvmt_seg_final %>%
st_join(trams_shp, join = st_is_within_distance, 20)
mvmt_seg_final$isClosetoTrams <- !is.na(joined_data$environment)
# 3) BUS STOPS
bus_shp <- read_sf(here(data_folder, "stops", "bus_stops.shp")) %>% st_zm() %>% st_union() %>% st_as_sf() %>% st_intersection(concave_hull) %>% mutate(environment = "bus")
joined_data <- mvmt_seg_final %>%
st_join(bus_shp, join = st_is_within_distance, 50)
mvmt_seg_final$isClosetoBus <- !is.na(joined_data$environment)
# 4) HIGHWAYS
# Highways have first been extracted from the whole swissTLM3D roads datasets by selecting "Autobahn" and "Autostrasse". This is not in the script, due to the huges amount of segments.
roads_shp <- read_sf(here(data_folder, "roads", "roads_dissolved.shp")) %>% st_zm() %>% st_intersection(concave_hull) %>% mutate(environment = "cars") %>% st_simplify(dTolerance = 1)
joined_data <- mvmt_seg_final %>%
st_join(roads_shp, join = st_is_within_distance, 20)
mvmt_seg_final$isClosetoHighways <- !is.na(joined_data$environment)
# 5) LAKES
# Larges lakes have been preselected by defining a threshold of surface ("surface">=10000000)
lakes_shp <- read_sf(here(data_folder, "lakes", "large_lakes.shp")) %>% st_zm() %>% st_union() %>% st_as_sf() %>% st_intersection(concave_hull) %>% mutate(environment = "lakes") %>% st_simplify(dTolerance = 5)
joined_data <- mvmt_seg_final %>% st_join(lakes_shp, join = st_is_within_distance, 1)
mvmt_seg_final$isClosetoLakes <- !is.na(joined_data$environment)
# 6) SKI LIFTS AND CABLE CARS
cables_shp <- read_sf(here(data_folder, "ski_lift_cable_car", "skilift_cablecar_raw.shp")) %>% st_zm() %>% st_union() %>% st_as_sf() %>% st_transform(crs=2056) %>% st_intersection(concave_hull) %>% mutate(environment = "cable_cars") %>% st_simplify(dTolerance = 1)
joined_data <- mvmt_seg_final %>%
st_join(cables_shp, join = st_is_within_distance, 10)
mvmt_seg_final$isClosetoCables <- !is.na(joined_data$environment)
# 7) HIGH ALTITUDES
mvmt_seg_final <- mvmt_seg_final %>% mutate(
isHighAltitude = case_when(H >= 1200.0 ~ TRUE,
.default = FALSE)
)
Once the points have been annotated, a proximity for the whole segments was defined. A threshold based on percentage of proximity was defined, again individually for each mode to optimize the correct classification.
# SUMMARIZE ANNOTATIONS
mvmt_seg_annotations_summarized <- mvmt_seg_final %>%
group_by(segment_ID_final, static_percent, duration_secs, tmode_manual) %>%
summarize(
isClosetoLakes = (sum(isClosetoLakes) / n() * 100 ) > 0,
isClosetoRails = (sum(isClosetoRails) / n() * 100 ) > 70.0,
isClosetoBus = (sum(isClosetoBus) / n() * 100 ) > 30.0,
isClosetoTrams = (sum(isClosetoTrams) / n() * 100 ) > 70.0,
isClosetoHighways = (sum(isClosetoHighways) / n() * 100 ) > 80.0,
isClosetoCables = (sum(isClosetoCables) / n() * 100 ) > 80.0,
isHighAltitude = (sum(isHighAltitude) / n() * 100 ) > 80.0
)
Visualisation of the environment datasets:
# Rails
ggplot() +
geom_sf(data = rails_shp) +
labs(title = "Railways - Proximity of the recorded trajectory points") +
geom_point(data = mvmt_seg_final, aes(E, N, color = isClosetoRails), size=0.5, alpha=0.5) +
coord_sf(datum=2056) +
theme_bw()
# Trams
ggplot() +
geom_sf(data = trams_shp) +
labs(title = "Trams - Proximity of the recorded trajectory points") +
geom_point(data = mvmt_seg_final, aes(E, N, color = isClosetoTrams), size=0.5, alpha=0.5) +
coord_sf(datum=2056) +
theme_bw()
# Bus stops
ggplot() +
geom_sf(data = bus_shp, size=0.5) +
labs(title = "Bus stops - Proximity of the recorded trajectory points") +
geom_point(data = mvmt_seg_final, aes(E, N, color = isClosetoBus), size=1, alpha=0.5) +
coord_sf(datum=2056) +
theme_bw()
# Highways
ggplot() +
geom_sf(data = roads_shp) +
labs(title = "Highways - Proximity of the recorded trajectory points") +
geom_point(data = mvmt_seg_final, aes(E, N, color = isClosetoHighways), size=0.5, alpha=0.5) +
coord_sf(datum=2056) +
theme_bw()
# Lakes
ggplot() +
geom_sf(data = lakes_shp) +
labs(title = "Lakes - Proximity of the recorded trajectory points") +
geom_point(data = mvmt_seg_final, aes(E, N, color = isClosetoLakes), size=0.5, alpha=0.5) +
coord_sf(datum=2056) +
theme_bw()
# Ski lifts and cable cars
ggplot() +
geom_sf(data = cables_shp) +
labs(title = "Cable car and ski lifts - Proximity of the recorded trajectory points") +
geom_point(data = mvmt_seg_final, aes(E, N, color = isClosetoCables), size=0.5, alpha=0.5) +
coord_sf(datum=2056) +
theme_bw()
Visualisation - university day
On the map below in Zurich, can be observed trams, bus and walk travel
modes.
# Visu proximity to environment for specific days
# Zurich - trams, bus and walk
visu_data <- mvmt_seg_final %>% filter(as.Date(datetime) == "2023-05-16") %>% st_transform(crs=4326)
# Cut environment data for vizualisation
c_hull <- concaveman(visu_data, concavity = 2) %>% st_buffer(500)
trams_c <- trams_shp %>% st_transform(crs = 4326) %>% st_intersection(c_hull)
bus_c <- bus_shp %>% st_transform(crs = 4326) %>% st_intersection(c_hull) %>% st_cast("POINT")
visu_bus <- visu_data %>% filter(isClosetoBus)
visu_tram <- visu_data %>% filter(isClosetoTrams)
m <- leaflet() %>%
addProviderTiles("Esri.WorldGrayCanvas") %>%
addPolylines(data = trams_c,
color = "black",
opacity = 0.5) %>%
addCircleMarkers(data = bus_c,
color = "#696969",
radius = 10,
fillOpacity = 0.0) %>%
addCircleMarkers(data = visu_data,
radius = 0.2,
popup = paste(
" tmode: ", visu_data$tmode_manual
),
color = "#DAA520") %>%
addCircleMarkers(data = visu_bus,
opacity = 0.8,
radius = 5,
popup = paste(
" tmode: ", visu_bus$tmode_manual
),
color = "#0000FF") %>%
addCircleMarkers(data = visu_tram,
opacity = 0.8,
radius = 1,
popup = paste(
" tmode: ", visu_tram$tmode_manual
),
color = "#FF0000") %>%
addLegend(position = 'topright',
colors = c("#DAA520", "#0000FF", "#FF0000", "#696969", "black"),
labels = c('All points', "isCloseToBus", "isCloseToTrams", "Circle: bus stops", "Lines: trams"),
title = '2023-05-16: Proximity - Lecture day')
## Plot the leaflet object m
m
Visualisation - ski day
On this map below in Grimentz, Wallis, can be seen the travel modes ski,
cable-car, ski-lift and t-bar.
# Visu proximity to environment for specific days
# Grimentz - ski, ski-lift, cable-car, t-bar
visu_data <- mvmt_seg_final %>% filter(as.Date(datetime) == "2023-04-13") %>% st_transform(crs=4326)
# Cut environment data for vizualisation
c_hull <- concaveman(visu_data, concavity = 2) %>% st_buffer(1000)
cables_c <- cables_shp %>% st_transform(crs = 4326) %>% st_intersection(c_hull)
visu_cables <- visu_data %>% filter(isClosetoCables)
m <- leaflet() %>%
addProviderTiles("Esri.WorldGrayCanvas") %>%
addPolylines(data = cables_c,
color = "black",
opacity = 0.5) %>%
addCircleMarkers(data = visu_data,
radius = 0.2,
popup = paste(
" tmode: ", visu_data$tmode_manual
),
color = "#DAA520") %>%
addCircleMarkers(data = visu_cables,
opacity = 0.8,
radius = 1,
popup = paste(
" tmode: ", visu_cables$tmode_manual
),
color = "#0000FF") %>%
addLegend(position = 'topright',
colors = c("#DAA520", "#0000FF", "black"),
labels = c('All points', "isClosetoCables", "Lines: cable car, ski-lifts, t-bars"),
title = '2023-04-13: Proximity - ski day')
## Plot the leaflet object m
m
In this part, we performed statistical analysis on each trajectory derivative in order to answer the first research question, which is to see whether movement characteristics differ between different transport modes.
For all the 28 derivatives calculated, we applied pairwise t-test to every two transport modes. And based on the result of t-test, we then could tell if one derivative is significantly different between two transport modes. We then calculated the percentage of significantly different groups among all groups.
According to the following table, the mean percentage of significant groups is 47%, and 15 derivatives have a significant percent more than 50%. The top five derivatives which differs most significantly between transport modes are mean vertical acceleration (a_v_mean), mean vertical speed (speed_v_mean), percentage of static points (static_percent), range of vertical acceleration (a_v_range), standard deviation of vertical acceleration (a_v_sd).
Based on the result, we could say most derivatives we calculated from movement characteristics differ significantly between transport mode. And therefore, we believe using these derivatives to identify travel mode is feasible.
# extrafont::font_import()
# Clean data for statistics
mvmt_seg_annotations_summarized_value <- mvmt_seg_final %>%
group_by(segment_ID_final, static_percent, duration_secs, tmode_manual) %>%
summarize(
isClosetoLakes = sum(isClosetoLakes) / n() * 100,
isClosetoRails = sum(isClosetoRails) / n() * 100,
isClosetoBus = sum(isClosetoBus) / n() * 100,
isClosetoTrams = sum(isClosetoTrams) / n() * 100,
isClosetoHighways = sum(isClosetoHighways) / n() * 100 ,
isClosetoCables = sum(isClosetoCables) / n() * 100,
isHighAltitude = sum(isHighAltitude) / n() * 100
) %>%
st_drop_geometry()
# Merge result of derivative and annotation
mvmt_sta <- merge(st_drop_geometry(mvmt_seg_summary),
mvmt_seg_annotations_summarized_value,
by = c('segment_ID_final', 'tmode_manual',
'duration_secs', 'static_percent'))
# Remove 'unclassified' class and 'lst_geometry' column
mvmt_sta <- mvmt_sta %>%
dplyr::select(-c("lst_geometry")) %>%
filter(tmode_manual != "unclassified")
# Remove NA
mvmt_sta <- na.omit(mvmt_sta)
# T test for each derivative
ttesttable <- data.frame(matrix(nrow = 30, ncol = 5))
rownames(ttesttable) <- colnames(mvmt_sta)
colnames(ttesttable) <- c("*", "**", "***", "****", "ns")
ttesttable <- tail(ttesttable, -2)
index <- data.frame(matrix(nrow = 5, ncol = 2))
index[,1] <- c("*", "**", "***", "****", "ns")
index[,2] <- numeric(5)
colnames(index) <- c("p.signif", "n")
pwc <- mvmt_sta %>%
pairwise_t_test(duration_secs ~ tmode_manual,
p.adjust.method = "bonferroni")
a <- pwc%>%count(p.signif)
b <- merge(index, a, by = "p.signif", all = TRUE)
b <- replace(b,is.na(b),0)
b$n <- b$n.x + b$n.y
ttesttable[1,] <- b$n
pwc <- mvmt_sta %>%
pairwise_t_test(static_percent ~ tmode_manual,
p.adjust.method = "bonferroni")
a <- pwc%>%count(p.signif)
b <- merge(index, a, by = "p.signif", all = TRUE)
b <- replace(b,is.na(b),0)
b$n <- b$n.x + b$n.y
ttesttable[2,] <- b$n
pwc <- mvmt_sta %>%
pairwise_t_test(space_duration ~ tmode_manual,
p.adjust.method = "bonferroni")
a <- pwc%>%count(p.signif)
b <- merge(index, a, by = "p.signif", all = TRUE)
b <- replace(b,is.na(b),0)
b$n <- b$n.x + b$n.y
ttesttable[3,] <- b$n
pwc <- mvmt_sta %>%
pairwise_t_test(stepMean_midw_mean ~ tmode_manual,
p.adjust.method = "bonferroni")
a <- pwc%>%count(p.signif)
b <- merge(index, a, by = "p.signif", all = TRUE)
b <- replace(b,is.na(b),0)
b$n <- b$n.x + b$n.y
ttesttable[4,] <- b$n
pwc <- mvmt_sta %>%
pairwise_t_test(steplength_mean ~ tmode_manual,
p.adjust.method = "bonferroni")
a <- pwc%>%count(p.signif)
b <- merge(index, a, by = "p.signif", all = TRUE)
b <- replace(b,is.na(b),0)
b$n <- b$n.x + b$n.y
ttesttable[5,] <- b$n
pwc <- mvmt_sta %>%
pairwise_t_test(steplength_sd ~ tmode_manual,
p.adjust.method = "bonferroni")
a <- pwc%>%count(p.signif)
b <- merge(index, a, by = "p.signif", all = TRUE)
b <- replace(b,is.na(b),0)
b$n <- b$n.x + b$n.y
ttesttable[6,] <- b$n
pwc <- mvmt_sta %>%
pairwise_t_test(speed_h_mean ~ tmode_manual,
p.adjust.method = "bonferroni")
a <- pwc%>%count(p.signif)
b <- merge(index, a, by = "p.signif", all = TRUE)
b <- replace(b,is.na(b),0)
b$n <- b$n.x + b$n.y
ttesttable[7,] <- b$n
pwc <- mvmt_sta %>%
pairwise_t_test(speed_h_sd ~ tmode_manual,
p.adjust.method = "bonferroni")
a <- pwc%>%count(p.signif)
b <- merge(index, a, by = "p.signif", all = TRUE)
b <- replace(b,is.na(b),0)
b$n <- b$n.x + b$n.y
ttesttable[8,] <- b$n
pwc <- mvmt_sta %>%
pairwise_t_test(speed_h_range ~ tmode_manual,
p.adjust.method = "bonferroni")
a <- pwc%>%count(p.signif)
b <- merge(index, a, by = "p.signif", all = TRUE)
b <- replace(b,is.na(b),0)
b$n <- b$n.x + b$n.y
ttesttable[9,] <- b$n
pwc <- mvmt_sta %>%
pairwise_t_test(speed_v_mean ~ tmode_manual,
p.adjust.method = "bonferroni")
a <- pwc%>%count(p.signif)
b <- merge(index, a, by = "p.signif", all = TRUE)
b <- replace(b,is.na(b),0)
b$n <- b$n.x + b$n.y
ttesttable[10,] <- b$n
pwc <- mvmt_sta %>%
pairwise_t_test(speed_v_sd ~ tmode_manual,
p.adjust.method = "bonferroni")
a <- pwc%>%count(p.signif)
b <- merge(index, a, by = "p.signif", all = TRUE)
b <- replace(b,is.na(b),0)
b$n <- b$n.x + b$n.y
ttesttable[11,] <- b$n
pwc <- mvmt_sta %>%
pairwise_t_test(speed_v_range ~ tmode_manual,
p.adjust.method = "bonferroni")
a <- pwc%>%count(p.signif)
b <- merge(index, a, by = "p.signif", all = TRUE)
b <- replace(b,is.na(b),0)
b$n <- b$n.x + b$n.y
ttesttable[12,] <- b$n
pwc <- mvmt_sta %>%
pairwise_t_test(a_h_mean ~ tmode_manual,
p.adjust.method = "bonferroni")
a <- pwc%>%count(p.signif)
b <- merge(index, a, by = "p.signif", all = TRUE)
b <- replace(b,is.na(b),0)
b$n <- b$n.x + b$n.y
ttesttable[13,] <- b$n
pwc <- mvmt_sta %>%
pairwise_t_test(a_h_sd ~ tmode_manual,
p.adjust.method = "bonferroni")
a <- pwc%>%count(p.signif)
b <- merge(index, a, by = "p.signif", all = TRUE)
b <- replace(b,is.na(b),0)
b$n <- b$n.x + b$n.y
ttesttable[14,] <- b$n
pwc <- mvmt_sta %>%
pairwise_t_test(a_h_range ~ tmode_manual,
p.adjust.method = "bonferroni")
a <- pwc%>%count(p.signif)
b <- merge(index, a, by = "p.signif", all = TRUE)
b <- replace(b,is.na(b),0)
b$n <- b$n.x + b$n.y
ttesttable[15,] <- b$n
pwc <- mvmt_sta %>%
pairwise_t_test(a_v_mean ~ tmode_manual,
p.adjust.method = "bonferroni")
a <- pwc%>%count(p.signif)
b <- merge(index, a, by = "p.signif", all = TRUE)
b <- replace(b,is.na(b),0)
b$n <- b$n.x + b$n.y
ttesttable[16,] <- b$n
pwc <- mvmt_sta %>%
pairwise_t_test(a_v_sd ~ tmode_manual,
p.adjust.method = "bonferroni")
a <- pwc%>%count(p.signif)
b <- merge(index, a, by = "p.signif", all = TRUE)
b <- replace(b,is.na(b),0)
b$n <- b$n.x + b$n.y
ttesttable[17,] <- b$n
pwc <- mvmt_sta %>%
pairwise_t_test(a_v_range ~ tmode_manual,
p.adjust.method = "bonferroni")
a <- pwc%>%count(p.signif)
b <- merge(index, a, by = "p.signif", all = TRUE)
b <- replace(b,is.na(b),0)
b$n <- b$n.x + b$n.y
ttesttable[18,] <- b$n
pwc <- mvmt_sta %>%
pairwise_t_test(ta_mean ~ tmode_manual,
p.adjust.method = "bonferroni")
a <- pwc%>%count(p.signif)
b <- merge(index, a, by = "p.signif", all = TRUE)
b <- replace(b,is.na(b),0)
b$n <- b$n.x + b$n.y
ttesttable[19,] <- b$n
pwc <- mvmt_sta %>%
pairwise_t_test(ta_sd ~ tmode_manual,
p.adjust.method = "bonferroni")
a <- pwc%>%count(p.signif)
b <- merge(index, a, by = "p.signif", all = TRUE)
b <- replace(b,is.na(b),0)
b$n <- b$n.x + b$n.y
ttesttable[20,] <- b$n
pwc <- mvmt_sta %>%
pairwise_t_test(ta_range ~ tmode_manual,
p.adjust.method = "bonferroni")
a <- pwc%>%count(p.signif)
b <- merge(index, a, by = "p.signif", all = TRUE)
b <- replace(b,is.na(b),0)
b$n <- b$n.x + b$n.y
ttesttable[21,] <- b$n
pwc <- mvmt_sta %>%
pairwise_t_test(isClosetoLakes ~ tmode_manual,
p.adjust.method = "bonferroni")
a <- pwc%>%count(p.signif)
b <- merge(index, a, by = "p.signif", all = TRUE)
b <- replace(b,is.na(b),0)
b$n <- b$n.x + b$n.y
ttesttable[22,] <- b$n
pwc <- mvmt_sta %>%
pairwise_t_test(isClosetoRails ~ tmode_manual,
p.adjust.method = "bonferroni")
a <- pwc%>%count(p.signif)
b <- merge(index, a, by = "p.signif", all = TRUE)
b <- replace(b,is.na(b),0)
b$n <- b$n.x + b$n.y
ttesttable[23,] <- b$n
pwc <- mvmt_sta %>%
pairwise_t_test(isClosetoBus ~ tmode_manual,
p.adjust.method = "bonferroni")
a <- pwc%>%count(p.signif)
b <- merge(index, a, by = "p.signif", all = TRUE)
b <- replace(b,is.na(b),0)
b$n <- b$n.x + b$n.y
ttesttable[24,] <- b$n
pwc <- mvmt_sta %>%
pairwise_t_test(isClosetoTrams ~ tmode_manual,
p.adjust.method = "bonferroni")
a <- pwc%>%count(p.signif)
b <- merge(index, a, by = "p.signif", all = TRUE)
b <- replace(b,is.na(b),0)
b$n <- b$n.x + b$n.y
ttesttable[25,] <- b$n
pwc <- mvmt_sta %>%
pairwise_t_test(isClosetoHighways ~ tmode_manual,
p.adjust.method = "bonferroni")
a <- pwc%>%count(p.signif)
b <- merge(index, a, by = "p.signif", all = TRUE)
b <- replace(b,is.na(b),0)
b$n <- b$n.x + b$n.y
ttesttable[26,] <- b$n
pwc <- mvmt_sta %>%
pairwise_t_test(isClosetoCables ~ tmode_manual,
p.adjust.method = "bonferroni")
a <- pwc%>%count(p.signif)
b <- merge(index, a, by = "p.signif", all = TRUE)
b <- replace(b,is.na(b),0)
b$n <- b$n.x + b$n.y
ttesttable[27,] <- b$n
pwc <- mvmt_sta %>%
pairwise_t_test(isHighAltitude ~ tmode_manual,
p.adjust.method = "bonferroni")
a <- pwc%>%count(p.signif)
b <- merge(index, a, by = "p.signif", all = TRUE)
b <- replace(b,is.na(b),0)
b$n <- b$n.x + b$n.y
ttesttable[28,] <- b$n
ttesttable$significant_percent <- (ttesttable$`*`
+ ttesttable$`**`
+ ttesttable$`***`
+ ttesttable$`****`)/
(ttesttable$`*` + ttesttable$`**` + ttesttable$`***` +
ttesttable$`****` + ttesttable$ns)
ttesttable <- ttesttable[order(ttesttable$significant_percent,decreasing=TRUE),]
knitr::kable(ttesttable,
caption = "Pairwise t-test result and significant percent")
* | ** | *** | **** | ns | significant_percent | |
---|---|---|---|---|---|---|
a_v_mean | 3 | 2 | 4 | 60 | 22 | 0.7582418 |
speed_v_mean | 1 | 2 | 3 | 58 | 27 | 0.7032967 |
static_percent | 5 | 12 | 9 | 35 | 30 | 0.6703297 |
a_v_range | 5 | 2 | 2 | 52 | 30 | 0.6703297 |
a_v_sd | 4 | 1 | 4 | 51 | 31 | 0.6593407 |
speed_v_sd | 4 | 11 | 6 | 38 | 32 | 0.6483516 |
speed_v_range | 5 | 9 | 8 | 37 | 32 | 0.6483516 |
ta_range | 6 | 7 | 11 | 35 | 32 | 0.6483516 |
ta_sd | 5 | 4 | 5 | 41 | 36 | 0.6043956 |
ta_mean | 6 | 2 | 4 | 40 | 39 | 0.5714286 |
duration_secs | 5 | 3 | 4 | 39 | 40 | 0.5604396 |
isClosetoBus | 7 | 10 | 2 | 31 | 41 | 0.5494505 |
isHighAltitude | 1 | 2 | 2 | 45 | 41 | 0.5494505 |
speed_h_mean | 3 | 2 | 1 | 41 | 44 | 0.5164835 |
isClosetoTrams | 6 | 8 | 4 | 28 | 45 | 0.5054945 |
stepMean_midw_mean | 1 | 1 | 2 | 40 | 47 | 0.4835165 |
isClosetoCables | 0 | 0 | 0 | 43 | 48 | 0.4725275 |
isClosetoRails | 4 | 7 | 4 | 24 | 52 | 0.4285714 |
space_duration | 0 | 7 | 6 | 18 | 60 | 0.3406593 |
steplength_mean | 6 | 9 | 2 | 14 | 60 | 0.3406593 |
isClosetoLakes | 0 | 0 | 0 | 25 | 66 | 0.2747253 |
speed_h_range | 6 | 7 | 3 | 6 | 69 | 0.2417582 |
speed_h_sd | 5 | 8 | 2 | 6 | 70 | 0.2307692 |
a_h_range | 7 | 3 | 4 | 6 | 71 | 0.2197802 |
a_h_sd | 7 | 4 | 4 | 3 | 73 | 0.1978022 |
isClosetoHighways | 1 | 0 | 0 | 13 | 77 | 0.1538462 |
steplength_sd | 1 | 0 | 3 | 4 | 83 | 0.0879121 |
a_h_mean | 3 | 3 | 1 | 1 | 83 | 0.0879121 |
Two different supervised algorithms will be tested:
- K-Nearest Neighbors
- Random forest (mutliple Decision Tree)
We suspect random forest will perform better, as this algorithm is more suitable to categorical data.
Common issues in machine learning: imbalance training sets (not much boat against walk segments)
The segments annotated with trajectory derivatives and environment will be splitted into training and testing parts. We will validate the model by doing a K-fold cross validation.
# Drop geometry
# Merge all variables
mvmt_all <- merge(st_drop_geometry(mvmt_seg_summary),
st_drop_geometry(mvmt_seg_annotations_summarized),
by = c('segment_ID_final', 'tmode_manual',
'duration_secs', 'static_percent'))
# Filter the 'unclassified' records
mvmt_all <- mvmt_all %>% filter(tmode_manual != "unclassified")
# Remove
mvmt_all <- na.omit(mvmt_all)
# Label travel mode
mvmt_all$label <- as.factor(mvmt_all$tmode_manual)
# Convert boolean to 0/1 value
mvmt_all$isClosetoLakes <- as.integer(as.logical(mvmt_all$isClosetoLakes))
mvmt_all$isClosetoRails <- as.integer(as.logical(mvmt_all$isClosetoRails))
mvmt_all$isClosetoBus <- as.integer(as.logical(mvmt_all$isClosetoBus))
mvmt_all$isClosetoTrams <- as.integer(as.logical(mvmt_all$isClosetoTrams))
mvmt_all$isClosetoHighways <- as.integer(as.logical(mvmt_all$isClosetoHighways))
mvmt_all$isClosetoCables <- as.integer(as.logical(mvmt_all$isClosetoCables))
mvmt_all$isHighAltitude <- as.integer(as.logical(mvmt_all$isHighAltitude))
# For machine learning, select factor columns only
# Keep segmentID to for visualisation later, drop after the split in train/test data
mvmt_all <- mvmt_all %>% dplyr::select(c("segment_ID_final", "static_percent", "space_duration",
"stepMean_midw_mean", "steplength_mean",
"steplength_sd",
"speed_h_mean", "speed_h_sd",
"speed_h_range", "speed_v_mean",
"speed_v_sd", "speed_v_range",
"a_h_mean", "a_h_sd", "a_h_range",
"a_v_mean", "a_v_sd", "a_v_range",
"ta_mean","ta_sd", "ta_range",
"isClosetoLakes","isClosetoRails",
"isClosetoBus", "isClosetoTrams",
"isClosetoHighways", "isClosetoCables",
"isHighAltitude", "label"))
# Split into training and testing dataset
split <- sample.split(mvmt_all, SplitRatio = 0.3)
mvmt_all_test_g <- subset(mvmt_all, split==TRUE)
mvmt_all_train_g <- subset(mvmt_all, split==FALSE)
mvmt_all_test <- mvmt_all_test_g %>% dplyr::select(-c("segment_ID_final"))
mvmt_all_train <- mvmt_all_train_g %>% dplyr::select(-c("segment_ID_final"))
# For validation
# for 10-fold cross validation
k_fold <- 10
n3 <- nrow(mvmt_all_train)
n_tail <- n3%/%k_fold
rnd_n <- runif(n3)
rank_n <- rank(rnd_n)
chunk <- (rank_n - 1)%/%n_tail + 1
chunk <- as.factor(chunk)
mvmt_all_train_chunk <- mvmt_all_train
mvmt_all_train_chunk$chunk <- chunk
The classification will also be tried without the environment annotations features to see if how good the classification could still be or not be, with only trajectory derivative.
# Create test/train sets without environment annotation
mvmt_all_test_nprox <- mvmt_all_test %>% dplyr::select(-c("isClosetoLakes", "isClosetoRails", "isClosetoBus","isClosetoTrams", "isClosetoHighways", "isClosetoCables", "isHighAltitude"))
mvmt_all_train_nprox <- mvmt_all_train %>% dplyr::select(-c("isClosetoLakes", "isClosetoRails", "isClosetoBus","isClosetoTrams", "isClosetoHighways", "isClosetoCables", "isHighAltitude"))
# For validation
# for 10-fold cross validation
k_fold <- 10
n3 <- nrow(mvmt_all_train_nprox)
n_tail <- n3%/%k_fold
rnd_n <- runif(n3)
rank_n <- rank(rnd_n)
chunk <- (rank_n - 1)%/%n_tail + 1
chunk <- as.factor(chunk)
mvmt_all_train_nprox_chunk <- mvmt_all_train_nprox
mvmt_all_train_nprox_chunk$chunk <- chunk
Below, is the distribution of the segments within the train and validation sets. Some transport modes do not have a lot of segments (kick-scooter, run, t_bar). The algorithms might have some difficulties with it (imbalance training sets).
# n. segment per tmode in the test set
allset = mvmt_all %>% group_by(label) %>% count()
test_set_count <- mvmt_all_test %>% group_by(label) %>% count()
train_set_count <- mvmt_all_train %>% group_by(label) %>% count()
n_seg_train_test_set <- data.frame(
labels = allset$label,
allset = allset$n,
train_set = train_set_count$n,
test_set = test_set_count$n
)
knitr::kable(n_seg_train_test_set, caption = "Segments in test and training sets")
labels | allset | train_set | test_set |
---|---|---|---|
bike | 188 | 139 | 49 |
boat | 16 | 11 | 5 |
bus | 412 | 294 | 118 |
cable_car | 20 | 14 | 6 |
car | 790 | 576 | 214 |
kick-scooter | 13 | 8 | 5 |
plane | 42 | 30 | 12 |
run | 15 | 10 | 5 |
ski | 133 | 95 | 38 |
ski_lift | 104 | 76 | 28 |
t_bar | 18 | 14 | 4 |
train | 430 | 307 | 123 |
tram | 500 | 363 | 137 |
walk | 1195 | 870 | 325 |
Knn is non linear supervised classifier. It works by assigning the major class label from the closest labelized neighbours to the unlabelized records.
The main parameter to set for Knn is “k” the number of neighbours considered. Different sets of number will be tested.
# Try kv = 5, 11, 50 and 5 is the best Try kv = 3, 4, 5, 7 and 3 is the best Try kv = 1, 2, 3 and 1 is the best
# Build models with 3 parameter sets
for(m in 1:3){
knn_train_cva <- numeric(0)
knn_test_cva <- numeric(0)
if (m == 1){
kv= 1
}
if (m == 2){
kv= 2
}
if (m == 3){
kv= 3
}
for(i in 1:k_fold){
set.seed(21)
knn_train_pred <- knn(train = dplyr::select(mvmt_all_train_chunk[chunk != i, ], -label, -chunk),
test = dplyr::select(mvmt_all_train, -label),
cl = mvmt_all_train_chunk[chunk != i, ]$label, k=kv)
knn_test_pred <- knn(train = dplyr::select(mvmt_all_train_chunk[chunk != i, ], -label, -chunk),
test = dplyr::select(mvmt_all_test, -label),
cl = mvmt_all_train_chunk[chunk != i, ]$label, k=kv)
knn_train_cva <- rbind(knn_train_cva, Accuracy(knn_train_pred, mvmt_all_train$label))
knn_test_cva <- rbind(knn_test_cva, Accuracy(knn_test_pred, mvmt_all_test$label))
}
#print(knn_train_cva)
#print(knn_test_cva)
if (m == 1){
knn_train_pred1 <- knn_train_pred
knn_test_pred1 <- knn_test_pred
knn_train_cva1 <- knn_train_cva
knn_test_cva1 <- knn_test_cva
knn_train_cva_mean1<-mean(knn_train_cva)
knn_test_cva_mean1<-mean(knn_test_cva)
}
if (m == 2){
knn_train_pred2 <- knn_train_pred
knn_test_pred2 <- knn_test_pred
knn_train_cva2 <- knn_train_cva
knn_test_cva2 <- knn_test_cva
knn_train_cva_mean2<-mean(knn_train_cva)
knn_test_cva_mean2<-mean(knn_test_cva)
}
if (m == 3){
knn_train_pred3 <- knn_train_pred
knn_test_pred3 <- knn_test_pred
knn_train_cva3 <- knn_train_cva
knn_test_cva3 <- knn_test_cva
knn_train_cva_mean3<-mean(knn_train_cva)
knn_test_cva_mean3<-mean(knn_test_cva)
}
}
#plot(mvmt_all$label)
# Create dataframe comparison of the tested models
knn_tests <- c(1, 2, 3)
knn_train_cva_mean <- c(round(knn_train_cva_mean1, 2), round(knn_train_cva_mean2,2), round(knn_train_cva_mean3,2))
knn_test_cva_mean <- c(round(knn_test_cva_mean1,2), round(knn_test_cva_mean2,2), round(knn_test_cva_mean3,2))
res_model_knn <- data.frame(model_no = knn_tests, mean_train = knn_train_cva_mean, mean_test = knn_test_cva_mean)
knitr::kable(res_model_knn, caption = "Knn - models Accuracy on train and test data")
model_no | mean_train | mean_test |
---|---|---|
1 | 0.97 | 0.69 |
2 | 0.84 | 0.69 |
3 | 0.82 | 0.69 |
Based on the accuracy of each model for train/test sets, the first model with 3 nearest neighbours seems to perform best both on train and test sets. This model will then be kept for further analysis.
Confusion matrix - knn
The confusion matrix below indicates the correct classification of the segments in the tmode categories.
We can see Knn had some confusion between car and train.
# Confusion matrix
cm_knn <- caret::confusionMatrix(knn_test_pred1, mvmt_all_test$label)
print(cm_knn)
## Confusion Matrix and Statistics
##
## Reference
## Prediction bike boat bus cable_car car kick-scooter plane run ski ski_lift
## bike 36 0 6 1 2 1 0 0 10 2
## boat 1 1 1 0 0 0 0 0 0 0
## bus 1 2 50 2 18 0 0 0 10 1
## cable_car 1 0 0 2 0 0 0 0 0 4
## car 0 0 12 0 146 1 0 0 7 0
## kick-scooter 0 0 0 0 0 0 0 0 0 1
## plane 0 0 0 0 0 0 10 0 0 0
## run 0 0 0 0 0 0 0 1 0 0
## ski 3 0 16 0 2 1 0 1 5 0
## ski_lift 4 0 1 1 0 0 0 0 1 20
## t_bar 0 0 0 0 0 0 0 0 0 0
## train 0 1 0 0 37 0 2 0 0 0
## tram 1 1 26 0 9 2 0 0 4 0
## walk 2 0 6 0 0 0 0 3 1 0
## Reference
## Prediction t_bar train tram walk
## bike 1 0 7 2
## boat 0 0 1 1
## bus 0 3 23 2
## cable_car 0 0 0 0
## car 0 52 6 2
## kick-scooter 0 0 1 0
## plane 0 2 0 0
## run 0 0 0 1
## ski 0 0 4 0
## ski_lift 1 0 1 0
## t_bar 1 0 0 0
## train 0 63 0 0
## tram 1 0 87 2
## walk 0 3 7 315
##
## Overall Statistics
##
## Accuracy : 0.6894
## 95% CI : (0.6607, 0.7171)
## No Information Rate : 0.304
## P-Value [Acc > NIR] : < 2.2e-16
##
## Kappa : 0.6205
##
## Mcnemar's Test P-Value : NA
##
## Statistics by Class:
##
## Class: bike Class: boat Class: bus Class: cable_car
## Sensitivity 0.73469 0.2000000 0.42373 0.333333
## Specificity 0.96863 0.9962406 0.93481 0.995296
## Pos Pred Value 0.52941 0.2000000 0.44643 0.285714
## Neg Pred Value 0.98701 0.9962406 0.92894 0.996234
## Prevalence 0.04584 0.0046773 0.11038 0.005613
## Detection Rate 0.03368 0.0009355 0.04677 0.001871
## Detection Prevalence 0.06361 0.0046773 0.10477 0.006548
## Balanced Accuracy 0.85166 0.5981203 0.67927 0.664315
## Class: car Class: kick-scooter Class: plane Class: run
## Sensitivity 0.6822 0.000000 0.833333 0.2000000
## Specificity 0.9064 0.998120 0.998108 0.9990602
## Pos Pred Value 0.6460 0.000000 0.833333 0.5000000
## Neg Pred Value 0.9193 0.995314 0.998108 0.9962512
## Prevalence 0.2002 0.004677 0.011225 0.0046773
## Detection Rate 0.1366 0.000000 0.009355 0.0009355
## Detection Prevalence 0.2114 0.001871 0.011225 0.0018709
## Balanced Accuracy 0.7943 0.499060 0.915721 0.5995301
## Class: ski Class: ski_lift Class: t_bar Class: train
## Sensitivity 0.131579 0.71429 0.2500000 0.51220
## Specificity 0.973812 0.99135 1.0000000 0.95772
## Pos Pred Value 0.156250 0.68966 1.0000000 0.61165
## Neg Pred Value 0.968177 0.99231 0.9971910 0.93789
## Prevalence 0.035547 0.02619 0.0037418 0.11506
## Detection Rate 0.004677 0.01871 0.0009355 0.05893
## Detection Prevalence 0.029935 0.02713 0.0009355 0.09635
## Balanced Accuracy 0.552695 0.85282 0.6250000 0.73496
## Class: tram Class: walk
## Sensitivity 0.63504 0.9692
## Specificity 0.95064 0.9704
## Pos Pred Value 0.65414 0.9347
## Neg Pred Value 0.94658 0.9863
## Prevalence 0.12816 0.3040
## Detection Rate 0.08138 0.2947
## Detection Prevalence 0.12442 0.3152
## Balanced Accuracy 0.79284 0.9698
# Models without proximities
# Build models with 3 parameter sets
for(m in 1:3){
knn_train_cva <- numeric(0)
knn_test_cva <- numeric(0)
if (m == 1){
kv= 5
}
if (m == 2){
kv= 11
}
if (m == 3){
kv= 50
}
for(i in 1:k_fold){
set.seed(21)
knn_train_pred <- knn(train = dplyr::select(mvmt_all_train_nprox_chunk[chunk != i, ], -label, -chunk),
test = dplyr::select(mvmt_all_train_nprox, -label),
cl = mvmt_all_train_nprox_chunk[chunk != i, ]$label, k=kv)
knn_test_pred <- knn(train = dplyr::select(mvmt_all_train_nprox_chunk[chunk != i, ], -label, -chunk),
test = dplyr::select(mvmt_all_test_nprox, -label),
cl = mvmt_all_train_nprox_chunk[chunk != i, ]$label, k=kv)
knn_train_cva <- rbind(knn_train_cva, Accuracy(knn_train_pred, mvmt_all_train_nprox_chunk$label))
knn_test_cva <- rbind(knn_test_cva, Accuracy(knn_test_pred, mvmt_all_test_nprox$label))
}
#print(knn_train_cva)
#print(knn_test_cva)
if (m == 1){
knn_train_nprox_pred1 <- knn_train_pred
knn_test_nprox_pred1 <- knn_test_pred
knn_train_nprox_cva1 <- knn_train_cva
knn_test_nprox_cva1 <- knn_test_cva
knn_train_nprox_cva_mean1<-mean(knn_train_cva)
knn_test_nprox_cva_mean1<-mean(knn_test_cva)
}
if (m == 2){
knn_train_nprox_pred2 <- knn_train_pred
knn_test_nprox_pred2 <- knn_test_pred
knn_train_nprox_cva2 <- knn_train_cva
knn_test_nprox_cva2 <- knn_test_cva
knn_train_nprox_cva_mean2<-mean(knn_train_cva)
knn_test_nprox_cva_mean2<-mean(knn_test_cva)
}
if (m == 3){
knn_train_nprox_pred3 <- knn_train_pred
knn_test_nprox_pred3 <- knn_test_pred
knn_train_nprox_cva3 <- knn_train_cva
knn_test_nprox_cva3 <- knn_test_cva
knn_train_nprox_cva_mean3<-mean(knn_train_cva)
knn_test_nprox_cva_mean3<-mean(knn_test_cva)
}
}
#plot(mvmt_all$label)
# Create dataframe comparison of the tested models
knn_tests <- c(1, 2, 3)
knn_train_nprox_cva_mean <- c(round(knn_train_nprox_cva_mean1, 2), round(knn_train_nprox_cva_mean2,2), round(knn_train_nprox_cva_mean3,2))
knn_test_nprox_cva_mean <- c(round(knn_test_nprox_cva_mean1,2), round(knn_test_nprox_cva_mean2,2), round(knn_test_nprox_cva_mean3,2))
res_model_nprox_knn <- data.frame(model_no = knn_tests, mean_train = knn_train_nprox_cva_mean, mean_test = knn_test_nprox_cva_mean)
knitr::kable(res_model_nprox_knn, caption = "Knn - no proximity attributes - models Accuracy on train and test data")
model_no | mean_train | mean_test |
---|---|---|
1 | 0.80 | 0.71 |
2 | 0.76 | 0.71 |
3 | 0.70 | 0.67 |
# Confusion matrix
cm_knn_nprox <- caret::confusionMatrix(knn_test_nprox_pred1, mvmt_all_test$label)
print(cm_knn_nprox)
## Confusion Matrix and Statistics
##
## Reference
## Prediction bike boat bus cable_car car kick-scooter plane run ski ski_lift
## bike 28 1 12 3 3 1 0 0 11 4
## boat 1 0 1 0 0 0 0 0 0 0
## bus 6 2 56 0 14 0 0 0 9 0
## cable_car 2 0 0 1 0 0 0 0 0 2
## car 0 0 15 0 161 0 0 0 5 0
## kick-scooter 0 0 0 0 0 0 0 0 0 0
## plane 0 0 0 0 0 0 10 0 0 0
## run 0 0 0 0 0 0 0 1 0 0
## ski 1 0 11 0 3 0 0 0 2 0
## ski_lift 7 0 0 1 0 0 0 0 1 20
## t_bar 0 2 0 0 0 2 0 0 0 2
## train 0 0 1 0 30 1 2 0 1 0
## tram 1 0 20 0 3 1 0 1 8 0
## walk 3 0 2 1 0 0 0 3 1 0
## Reference
## Prediction t_bar train tram walk
## bike 0 0 5 1
## boat 0 0 1 0
## bus 0 2 20 2
## cable_car 0 0 0 0
## car 0 58 8 1
## kick-scooter 0 0 0 0
## plane 0 2 0 0
## run 0 0 0 2
## ski 0 0 3 0
## ski_lift 1 0 2 0
## t_bar 2 0 0 0
## train 0 58 1 1
## tram 0 1 90 1
## walk 1 2 7 317
##
## Overall Statistics
##
## Accuracy : 0.6978
## 95% CI : (0.6693, 0.7253)
## No Information Rate : 0.304
## P-Value [Acc > NIR] : < 2.2e-16
##
## Kappa : 0.6299
##
## Mcnemar's Test P-Value : NA
##
## Statistics by Class:
##
## Class: bike Class: boat Class: bus Class: cable_car
## Sensitivity 0.57143 0.000000 0.47458 0.1666667
## Specificity 0.95980 0.997180 0.94217 0.9962371
## Pos Pred Value 0.40580 0.000000 0.50450 0.2000000
## Neg Pred Value 0.97900 0.995310 0.93528 0.9953008
## Prevalence 0.04584 0.004677 0.11038 0.0056127
## Detection Rate 0.02619 0.000000 0.05239 0.0009355
## Detection Prevalence 0.06455 0.002806 0.10384 0.0046773
## Balanced Accuracy 0.76562 0.498590 0.70837 0.5814519
## Class: car Class: kick-scooter Class: plane Class: run
## Sensitivity 0.7523 0.000000 0.833333 0.2000000
## Specificity 0.8982 1.000000 0.998108 0.9981203
## Pos Pred Value 0.6492 NaN 0.833333 0.3333333
## Neg Pred Value 0.9354 0.995323 0.998108 0.9962477
## Prevalence 0.2002 0.004677 0.011225 0.0046773
## Detection Rate 0.1506 0.000000 0.009355 0.0009355
## Detection Prevalence 0.2320 0.000000 0.011225 0.0028064
## Balanced Accuracy 0.8253 0.500000 0.915721 0.5990602
## Class: ski Class: ski_lift Class: t_bar Class: train
## Sensitivity 0.052632 0.71429 0.500000 0.47154
## Specificity 0.982541 0.98847 0.994366 0.96089
## Pos Pred Value 0.100000 0.62500 0.250000 0.61053
## Neg Pred Value 0.965682 0.99229 0.998115 0.93326
## Prevalence 0.035547 0.02619 0.003742 0.11506
## Detection Rate 0.001871 0.01871 0.001871 0.05426
## Detection Prevalence 0.018709 0.02993 0.007484 0.08887
## Balanced Accuracy 0.517586 0.85138 0.747183 0.71622
## Class: tram Class: walk
## Sensitivity 0.65693 0.9754
## Specificity 0.96137 0.9731
## Pos Pred Value 0.71429 0.9407
## Neg Pred Value 0.95016 0.9891
## Prevalence 0.12816 0.3040
## Detection Rate 0.08419 0.2965
## Detection Prevalence 0.11787 0.3152
## Balanced Accuracy 0.80915 0.9743
Random Forest is a multiple decision tree algorithm.
The main parameters is the number of trees. Three sets of value will be tested.
# Build 3 models with different parameter sets
#20, 40, 80
#40, 60, 80
#80, 100, 120
#80, 90, 100
#75, 80, 85
for(m in 1:3){ # Loop through the different models
if (m == 1){
ntree_value= 75}
if (m == 2){
ntree_value= 80}
if (m == 3){
ntree_value= 85}
rf_train_cva <- numeric(0)
rf_test_cva <- numeric(0)
for(i in 1:k_fold){
set.seed(21)
rf <- randomForest(x = dplyr::select(mvmt_all_train_chunk[chunk != i, ], -label, -chunk),
y = mvmt_all_train_chunk[chunk != i, ]$label,
importance=TRUE, proximity=TRUE, ntree = ntree_value)
rf_train_pred <- predict(rf, dplyr::select(mvmt_all_train, -label), type="class")
rf_test_pred <- predict(rf, dplyr::select(mvmt_all_test, -label), type="class")
rf_train_cva <- rbind(rf_train_cva, Accuracy(rf_train_pred, mvmt_all_train$label))
rf_test_cva <- rbind(rf_test_cva, Accuracy(rf_test_pred, mvmt_all_test$label))
#importance(rf)
#varImpPlot(rf)
}
#print(rf_train_cva)
#print(rf_test_cva)
# Summary recall, precision, F1
#https://blog.revolutionanalytics.com/2016/03/com_class_eval_metrics_r.html#perclass
cm_rf_simple <- ConfusionMatrix(rf_test_pred, mvmt_all_test$label)
n = sum(cm_rf_simple) # n. instances
nc = nrow(cm_rf_simple) # n. classes
diag = diag(cm_rf_simple) # n. correctly classified instances per class
rowsums = apply(cm_rf_simple, 1, sum) # n. instances per class
colsums = apply(cm_rf_simple, 2, sum) # n. predictions per class
p = rowsums / n # distribution of instances over the actual classes
q = colsums / n # distribution of instances over the predicted classes
precision = diag / colsums
recall = diag / rowsums
f1 = 2 * precision * recall / (precision + recall)
print(paste("n_trees: ", ntree_value))
df <- data.frame(precision, recall, f1) %>% as.data.frame()
df %>% replace(is.na(df), 0) %>% summary() %>% print()
# Save prediction and trains
if (m == 1){
rf1 <- rf
rf_train_pred1 <- rf_train_pred
rf_test_pred1 <- rf_test_pred
rf_train_cva1 <- rf_train_cva
rf_test_cva1 <- rf_test_cva
rf_train_cva_mean1<-mean(rf_train_cva)
rf_test_cva_mean1<-mean(rf_test_cva)
}
if (m == 2){
rf2 <- rf
rf_train_pred2 <- rf_train_pred
rf_test_pred2 <- rf_test_pred
rf_train_cva2 <- rf_train_cva
rf_test_cva2 <- rf_test_cva
rf_train_cva_mean2<-mean(rf_train_cva)
rf_test_cva_mean2<-mean(rf_test_cva)
}
if (m == 3){
rf3 <- rf
rf_train_pred3 <- rf_train_pred
rf_test_pred3 <- rf_test_pred
rf_train_cva3 <- rf_train_cva
rf_test_cva3 <- rf_test_cva
rf_train_cva_mean3<-mean(rf_train_cva)
rf_test_cva_mean3<-mean(rf_test_cva)
}}
## [1] "n_trees: 75"
## precision recall f1
## Min. :0.0000 Min. :0.0000 Min. :0.0000
## 1st Qu.:0.8220 1st Qu.:0.8166 1st Qu.:0.8026
## Median :0.9336 Median :0.9299 Median :0.9151
## Mean :0.7893 Mean :0.7540 Mean :0.7494
## 3rd Qu.:0.9596 3rd Qu.:0.9847 3rd Qu.:0.9629
## Max. :1.0000 Max. :1.0000 Max. :1.0000
## [1] "n_trees: 80"
## precision recall f1
## Min. :0.0000 Min. :0.0000 Min. :0.0000
## 1st Qu.:0.8220 1st Qu.:0.8166 1st Qu.:0.8026
## Median :0.9313 Median :0.9299 Median :0.9151
## Mean :0.7885 Mean :0.7532 Mean :0.7486
## 3rd Qu.:0.9593 3rd Qu.:0.9803 3rd Qu.:0.9598
## Max. :1.0000 Max. :1.0000 Max. :1.0000
## [1] "n_trees: 85"
## precision recall f1
## Min. :0.0000 Min. :0.0000 Min. :0.0000
## 1st Qu.:0.8220 1st Qu.:0.8218 1st Qu.:0.8064
## Median :0.9368 Median :0.9431 Median :0.9161
## Mean :0.7894 Mean :0.7560 Mean :0.7503
## 3rd Qu.:0.9583 3rd Qu.:0.9824 3rd Qu.:0.9600
## Max. :1.0000 Max. :1.0000 Max. :1.0000
# Create dataframe comparison of the tested models
# rf_tests <- c(1, 2, 3)
# rf_train_cva_mean <- c(round(rf_train_cva_mean1, 3), round(rf_train_cva_mean2,3), round(rf_train_cva_mean3,3))
# rf_test_cva_mean <- c(round(rf_test_cva_mean1,3), round(rf_test_cva_mean2,3), round(rf_test_cva_mean3,3))
# res_model_rf <- data.frame(model_no = rf_tests, mean_train = rf_train_cva_mean, mean_test = rf_test_cva_mean)
# knitr::kable(res_model_rf, caption = "RF - models Accuracy on train and test data")
# Summary accuracy on test and train sets
rf_tests <- c(1, 2, 3)
rf_train_cva_mean <- c(round(rf_train_cva_mean1, 6), round(rf_train_cva_mean2,6), round(rf_train_cva_mean3,6))
rf_test_cva_mean <- c(round(rf_test_cva_mean1,6), round(rf_test_cva_mean2,6), round(rf_test_cva_mean3,6))
res_model_rf <- data.frame(model_no = rf_tests, mean_train = rf_train_cva_mean, mean_test = rf_test_cva_mean)
knitr::kable(res_model_rf, caption = "RF - models Accuracy on train and test data")
model_no | mean_train | mean_test |
---|---|---|
1 | 0.993516 | 0.919551 |
2 | 0.993267 | 0.918803 |
3 | 0.993516 | 0.919925 |
The accuracy of each model for train/test sets tells us that the performances are very similar for the 3 different number of trees. Because the first one has less tree (less overfitting), this model will then be kept for further analysis.
Confusion matrix - RF
We can see RF perform generally better than Knn. There are no obvious confusion of train and car.
cm_rf <- caret::confusionMatrix(rf_test_pred1, mvmt_all_test$label)
print(cm_rf)
## Confusion Matrix and Statistics
##
## Reference
## Prediction bike boat bus cable_car car kick-scooter plane run ski ski_lift
## bike 46 0 0 0 0 1 0 0 0 0
## boat 0 5 1 0 0 0 0 0 0 0
## bus 1 0 93 0 15 2 0 0 0 0
## cable_car 0 0 0 1 0 0 0 0 0 0
## car 0 0 8 0 193 0 0 0 2 0
## kick-scooter 0 0 0 0 0 0 0 0 0 0
## plane 0 0 0 0 0 0 12 0 0 0
## run 0 0 0 0 0 0 0 0 0 0
## ski 0 0 1 0 1 0 0 0 35 1
## ski_lift 0 0 0 5 0 0 0 0 1 27
## t_bar 0 0 0 0 0 0 0 0 0 0
## train 0 0 0 0 5 0 0 0 0 0
## tram 1 0 13 0 0 0 0 0 0 0
## walk 1 0 2 0 0 2 0 5 0 0
## Reference
## Prediction t_bar train tram walk
## bike 0 0 1 0
## boat 0 0 0 0
## bus 0 0 9 2
## cable_car 0 0 0 0
## car 0 1 0 0
## kick-scooter 0 0 0 1
## plane 0 0 0 0
## run 0 0 0 0
## ski 0 0 0 0
## ski_lift 0 0 0 0
## t_bar 4 0 0 0
## train 0 120 0 0
## tram 0 0 125 1
## walk 0 2 2 321
##
## Overall Statistics
##
## Accuracy : 0.9186
## 95% CI : (0.9006, 0.9343)
## No Information Rate : 0.304
## P-Value [Acc > NIR] : < 2.2e-16
##
## Kappa : 0.9007
##
## Mcnemar's Test P-Value : NA
##
## Statistics by Class:
##
## Class: bike Class: boat Class: bus Class: cable_car
## Sensitivity 0.93878 1.000000 0.7881 0.1666667
## Specificity 0.99804 0.999060 0.9695 1.0000000
## Pos Pred Value 0.95833 0.833333 0.7623 1.0000000
## Neg Pred Value 0.99706 1.000000 0.9736 0.9953184
## Prevalence 0.04584 0.004677 0.1104 0.0056127
## Detection Rate 0.04303 0.004677 0.0870 0.0009355
## Detection Prevalence 0.04490 0.005613 0.1141 0.0009355
## Balanced Accuracy 0.96841 0.999530 0.8788 0.5833333
## Class: car Class: kick-scooter Class: plane Class: run
## Sensitivity 0.9019 0.0000000 1.00000 0.000000
## Specificity 0.9871 0.9990602 1.00000 1.000000
## Pos Pred Value 0.9461 0.0000000 1.00000 NaN
## Neg Pred Value 0.9757 0.9953184 1.00000 0.995323
## Prevalence 0.2002 0.0046773 0.01123 0.004677
## Detection Rate 0.1805 0.0000000 0.01123 0.000000
## Detection Prevalence 0.1908 0.0009355 0.01123 0.000000
## Balanced Accuracy 0.9445 0.4995301 1.00000 0.500000
## Class: ski Class: ski_lift Class: t_bar Class: train
## Sensitivity 0.92105 0.96429 1.000000 0.9756
## Specificity 0.99709 0.99424 1.000000 0.9947
## Pos Pred Value 0.92105 0.81818 1.000000 0.9600
## Neg Pred Value 0.99709 0.99903 1.000000 0.9968
## Prevalence 0.03555 0.02619 0.003742 0.1151
## Detection Rate 0.03274 0.02526 0.003742 0.1123
## Detection Prevalence 0.03555 0.03087 0.003742 0.1169
## Balanced Accuracy 0.95907 0.97926 1.000000 0.9852
## Class: tram Class: walk
## Sensitivity 0.9124 0.9877
## Specificity 0.9839 0.9812
## Pos Pred Value 0.8929 0.9582
## Neg Pred Value 0.9871 0.9946
## Prevalence 0.1282 0.3040
## Detection Rate 0.1169 0.3003
## Detection Prevalence 0.1310 0.3134
## Balanced Accuracy 0.9482 0.9844
# Models without proximities
# Build 3 models with different parameter sets
for(m in 1:3){ # Loop through the different models
if (m == 1){
ntree_value= 20}
if (m == 2){
ntree_value= 40}
if (m == 3){
ntree_value= 80}
rf_train_cva <- numeric(0)
rf_test_cva <- numeric(0)
for(i in 1:k_fold){
set.seed(21)
rf <- randomForest(x = dplyr::select(mvmt_all_train_nprox_chunk[chunk != i, ], -label, -chunk),
y = mvmt_all_train_nprox_chunk[chunk != i, ]$label,
importance=TRUE, proximity=TRUE, ntree = ntree_value)
rf_train_pred <- predict(rf, dplyr::select(mvmt_all_train, -label), type="class")
rf_test_pred <- predict(rf, dplyr::select(mvmt_all_test, -label), type="class")
rf_train_cva <- rbind(rf_train_cva, Accuracy(rf_train_pred, mvmt_all_train$label))
rf_test_cva <- rbind(rf_test_cva, Accuracy(rf_test_pred, mvmt_all_test$label))
#importance(rf)
#varImpPlot(rf)
}
#print(rf_train_cva)
#print(rf_test_cva)
if (m == 1){
rf_nprox1 <- rf
rf_train_nprox_pred1 <- rf_train_pred
rf_test_nprox_pred1 <- rf_test_pred
rf_train_nprox_cva1 <- rf_train_cva
rf_test_nprox_cva1 <- rf_test_cva
rf_train_nprox_cva_mean1<-mean(rf_train_cva)
rf_test_nprox_cva_mean1<-mean(rf_test_cva)
}
if (m == 2){
rf_nprox2 <- rf
rf_train_nprox_pred2 <- rf_train_pred
rf_test_nprox_pred2 <- rf_test_pred
rf_train_nprox_cva2 <- rf_train_cva
rf_test_nprox_cva2 <- rf_test_cva
rf_train_nprox_cva_mean2<-mean(rf_train_cva)
rf_test_nprox_cva_mean2<-mean(rf_test_cva)
}
if (m == 3){
rf_nprox3 <- rf
rf_train_nprox_pred3 <- rf_train_pred
rf_test_nprox_pred3 <- rf_test_pred
rf_train_nprox_cva3 <- rf_train_cva
rf_test_nprox_cva3 <- rf_test_cva
rf_train_nprox_cva_mean3<-mean(rf_train_cva)
rf_test_nprox_cva_mean3<-mean(rf_test_cva)
}}
# Create dataframe comparison of the tested models
rf_tests <- c(1, 2, 3)
rf_train_nprox_cva_mean <- c(round(rf_train_nprox_cva_mean1, 3), round(rf_train_nprox_cva_mean2,3), round(rf_train_nprox_cva_mean3,3))
rf_test_nprox_cva_mean <- c(round(rf_test_nprox_cva_mean1,3), round(rf_test_nprox_cva_mean2,3), round(rf_test_nprox_cva_mean3,3))
res_model_nprox_rf <- data.frame(model_no = rf_tests, mean_train = rf_train_nprox_cva_mean, mean_test = rf_test_nprox_cva_mean)
knitr::kable(res_model_nprox_rf, caption = "RF - models Accuracy on train and test data")
model_no | mean_train | mean_test |
---|---|---|
1 | 0.984 | 0.853 |
2 | 0.986 | 0.856 |
3 | 0.986 | 0.857 |
Confusion matrix - RF
cm_rf_nprox <- caret::confusionMatrix(rf_test_nprox_pred1, mvmt_all_test$label)
print(cm_rf_nprox)
## Confusion Matrix and Statistics
##
## Reference
## Prediction bike boat bus cable_car car kick-scooter plane run ski ski_lift
## bike 44 0 0 0 0 2 0 0 0 0
## boat 0 4 0 0 0 0 0 0 0 0
## bus 2 0 95 0 13 2 0 0 0 0
## cable_car 0 0 0 2 0 0 0 0 0 0
## car 1 0 7 0 174 0 0 0 2 0
## kick-scooter 0 0 0 0 0 0 0 0 0 0
## plane 0 0 0 0 0 0 12 0 0 0
## run 0 0 0 0 0 0 0 0 0 0
## ski 0 0 2 0 2 0 0 0 34 1
## ski_lift 0 0 0 4 0 0 0 0 2 27
## t_bar 0 0 0 0 0 0 0 0 0 0
## train 0 0 0 0 22 0 0 0 0 0
## tram 2 0 12 0 3 0 0 0 0 0
## walk 0 1 2 0 0 1 0 5 0 0
## Reference
## Prediction t_bar train tram walk
## bike 0 0 3 0
## boat 0 0 0 0
## bus 0 0 21 2
## cable_car 0 0 0 0
## car 0 33 2 0
## kick-scooter 0 0 0 1
## plane 0 0 0 0
## run 0 0 0 0
## ski 0 0 0 0
## ski_lift 0 0 0 0
## t_bar 4 0 0 0
## train 0 88 1 0
## tram 0 0 105 1
## walk 0 2 5 321
##
## Overall Statistics
##
## Accuracy : 0.8513
## 95% CI : (0.8285, 0.8721)
## No Information Rate : 0.304
## P-Value [Acc > NIR] : < 2.2e-16
##
## Kappa : 0.8182
##
## Mcnemar's Test P-Value : NA
##
## Statistics by Class:
##
## Class: bike Class: boat Class: bus Class: cable_car
## Sensitivity 0.89796 0.800000 0.80508 0.333333
## Specificity 0.99510 1.000000 0.95794 1.000000
## Pos Pred Value 0.89796 1.000000 0.70370 1.000000
## Neg Pred Value 0.99510 0.999061 0.97537 0.996251
## Prevalence 0.04584 0.004677 0.11038 0.005613
## Detection Rate 0.04116 0.003742 0.08887 0.001871
## Detection Prevalence 0.04584 0.003742 0.12629 0.001871
## Balanced Accuracy 0.94653 0.900000 0.88151 0.666667
## Class: car Class: kick-scooter Class: plane Class: run
## Sensitivity 0.8131 0.0000000 1.00000 0.000000
## Specificity 0.9474 0.9990602 1.00000 1.000000
## Pos Pred Value 0.7945 0.0000000 1.00000 NaN
## Neg Pred Value 0.9529 0.9953184 1.00000 0.995323
## Prevalence 0.2002 0.0046773 0.01123 0.004677
## Detection Rate 0.1628 0.0000000 0.01123 0.000000
## Detection Prevalence 0.2049 0.0009355 0.01123 0.000000
## Balanced Accuracy 0.8802 0.4995301 1.00000 0.500000
## Class: ski Class: ski_lift Class: t_bar Class: train
## Sensitivity 0.89474 0.96429 1.000000 0.71545
## Specificity 0.99515 0.99424 1.000000 0.97569
## Pos Pred Value 0.87179 0.81818 1.000000 0.79279
## Neg Pred Value 0.99612 0.99903 1.000000 0.96347
## Prevalence 0.03555 0.02619 0.003742 0.11506
## Detection Rate 0.03181 0.02526 0.003742 0.08232
## Detection Prevalence 0.03648 0.03087 0.003742 0.10384
## Balanced Accuracy 0.94494 0.97926 1.000000 0.84557
## Class: tram Class: walk
## Sensitivity 0.76642 0.9877
## Specificity 0.98069 0.9785
## Pos Pred Value 0.85366 0.9525
## Neg Pred Value 0.96617 0.9945
## Prevalence 0.12816 0.3040
## Detection Rate 0.09822 0.3003
## Detection Prevalence 0.11506 0.3152
## Balanced Accuracy 0.87356 0.9831
We generated box plots of all transport modes for each trajectory derivative to have a better understanding the how movement characteristics differ between different all transport modes.
In order to show the major distribution of value, for each derivative and each transport mode, we calculated the quantile values corresponding to probabilities of 10% and 90%. And by using the minimum of 10% quantile values and maximum of 90% quantile values as low limit and high limit, we then created the box plot focusing on the major parts of distribution.
Take the box plot of static_percent for example, the groups with the highest static_ _percent are ski, bus, tram and train, which is corresponding to the movement with stops. Take the box plot of space_duration for example, space_duration of plane, train and car have higher distribution than that of other transport modes.
# Boxplot
colourCount = length(unique(mvmt_sta$tmode_manual))
getPalette = colorRampPalette(brewer.pal(9, "Set3"))
# For 28 variables
limtable <- data.frame(matrix(nrow = 30, ncol = 2))
rownames(limtable) <- colnames(mvmt_sta)
limtable <- tail(limtable, -2)
lim0 <- mvmt_sta %>% group_by(tmode_manual) %>% summarize(
lowlim = quantile(duration_secs, 0.1)[[1]],
highlim = quantile(duration_secs, 0.9)[[1]])
limtable[1,] = c(min(lim0$lowlim), max(lim0$highlim))
lim0 <- mvmt_sta %>% group_by(tmode_manual) %>% summarize(
lowlim = quantile(static_percent, 0.1)[[1]],
highlim = quantile(static_percent, 0.9)[[1]])
limtable[2,] = c(min(lim0$lowlim), max(lim0$highlim))
lim0 <- mvmt_sta %>% group_by(tmode_manual) %>% summarize(
lowlim = quantile(space_duration, 0.1)[[1]],
highlim = quantile(space_duration, 0.9)[[1]])
limtable[3,] = c(min(lim0$lowlim), max(lim0$highlim))
lim0 <- mvmt_sta %>% group_by(tmode_manual) %>% summarize(
lowlim = quantile(stepMean_midw_mean, 0.1)[[1]],
highlim = quantile(stepMean_midw_mean, 0.9)[[1]])
limtable[4,] = c(min(lim0$lowlim), max(lim0$highlim))
lim0 <- mvmt_sta %>% group_by(tmode_manual) %>% summarize(
lowlim = quantile(steplength_mean, 0.1)[[1]],
highlim = quantile(steplength_mean, 0.9)[[1]])
limtable[5,] = c(min(lim0$lowlim), max(lim0$highlim))
lim0 <- mvmt_sta %>% group_by(tmode_manual) %>% summarize(
lowlim = quantile(steplength_sd, 0.1)[[1]],
highlim = quantile(steplength_sd, 0.9)[[1]])
limtable[6,] = c(min(lim0$lowlim), max(lim0$highlim))
lim0 <- mvmt_sta %>% group_by(tmode_manual) %>% summarize(
lowlim = quantile(speed_h_mean, 0.1)[[1]],
highlim = quantile(speed_h_mean, 0.9)[[1]])
limtable[7,] = c(min(lim0$lowlim), max(lim0$highlim))
lim0 <- mvmt_sta %>% group_by(tmode_manual) %>% summarize(
lowlim = quantile(speed_h_sd, 0.1)[[1]],
highlim = quantile(speed_h_sd, 0.9)[[1]])
limtable[8,] = c(min(lim0$lowlim), max(lim0$highlim))
lim0 <- mvmt_sta %>% group_by(tmode_manual) %>% summarize(
lowlim = quantile(speed_h_range, 0.1)[[1]],
highlim = quantile(speed_h_range, 0.9)[[1]])
limtable[9,] = c(min(lim0$lowlim), max(lim0$highlim))
lim0 <- mvmt_sta %>% group_by(tmode_manual) %>% summarize(
lowlim = quantile(speed_v_mean, 0.1)[[1]],
highlim = quantile(speed_v_mean, 0.9)[[1]])
limtable[10,] = c(min(lim0$lowlim), max(lim0$highlim))
lim0 <- mvmt_sta %>% group_by(tmode_manual) %>% summarize(
lowlim = quantile(speed_v_sd, 0.1)[[1]],
highlim = quantile(speed_v_sd, 0.9)[[1]])
limtable[11,] = c(min(lim0$lowlim), max(lim0$highlim))
lim0 <- mvmt_sta %>% group_by(tmode_manual) %>% summarize(
lowlim = quantile(speed_v_range, 0.1)[[1]],
highlim = quantile(speed_v_range, 0.9)[[1]])
limtable[12,] = c(min(lim0$lowlim), max(lim0$highlim))
lim0 <- mvmt_sta %>% group_by(tmode_manual) %>% summarize(
lowlim = quantile(a_h_mean, 0.1)[[1]],
highlim = quantile(a_h_mean, 0.9)[[1]])
limtable[13,] = c(min(lim0$lowlim), max(lim0$highlim))
lim0 <- mvmt_sta %>% group_by(tmode_manual) %>% summarize(
lowlim = quantile(a_h_sd, 0.1)[[1]],
highlim = quantile(a_h_sd, 0.9)[[1]])
limtable[14,] = c(min(lim0$lowlim), max(lim0$highlim))
lim0 <- mvmt_sta %>% group_by(tmode_manual) %>% summarize(
lowlim = quantile(a_h_range, 0.1)[[1]],
highlim = quantile(a_h_range, 0.9)[[1]])
limtable[15,] = c(min(lim0$lowlim), max(lim0$highlim))
lim0 <- mvmt_sta %>% group_by(tmode_manual) %>% summarize(
lowlim = quantile(a_v_mean, 0.1)[[1]],
highlim = quantile(a_v_mean, 0.9)[[1]])
limtable[16,] = c(min(lim0$lowlim), max(lim0$highlim))
lim0 <- mvmt_sta %>% group_by(tmode_manual) %>% summarize(
lowlim = quantile(a_v_sd, 0.1)[[1]],
highlim = quantile(a_v_sd, 0.9)[[1]])
limtable[17,] = c(min(lim0$lowlim), max(lim0$highlim))
lim0 <- mvmt_sta %>% group_by(tmode_manual) %>% summarize(
lowlim = quantile(a_v_range, 0.1)[[1]],
highlim = quantile(a_v_range, 0.9)[[1]])
limtable[18,] = c(min(lim0$lowlim), max(lim0$highlim))
lim0 <- mvmt_sta %>% group_by(tmode_manual) %>% summarize(
lowlim = quantile(ta_mean, 0.1)[[1]],
highlim = quantile(ta_mean, 0.9)[[1]])
limtable[19,] = c(min(lim0$lowlim), max(lim0$highlim))
lim0 <- mvmt_sta %>% group_by(tmode_manual) %>% summarize(
lowlim = quantile(ta_sd, 0.1)[[1]],
highlim = quantile(ta_sd, 0.9)[[1]])
limtable[20,] = c(min(lim0$lowlim), max(lim0$highlim))
lim0 <- mvmt_sta %>% group_by(tmode_manual) %>% summarize(
lowlim = quantile(ta_range, 0.1)[[1]],
highlim = quantile(ta_range, 0.9)[[1]])
limtable[21,] = c(min(lim0$lowlim), max(lim0$highlim))
lim0 <- mvmt_sta %>% group_by(tmode_manual) %>% summarize(
lowlim = quantile(isClosetoLakes, 0.1)[[1]],
highlim = quantile(isClosetoLakes, 0.9)[[1]])
limtable[22,] = c(min(lim0$lowlim), max(lim0$highlim))
lim0 <- mvmt_sta %>% group_by(tmode_manual) %>% summarize(
lowlim = quantile(isClosetoRails, 0.1)[[1]],
highlim = quantile(isClosetoRails, 0.9)[[1]])
limtable[23,] = c(min(lim0$lowlim), max(lim0$highlim))
lim0 <- mvmt_sta %>% group_by(tmode_manual) %>% summarize(
lowlim = quantile(isClosetoBus, 0.1)[[1]],
highlim = quantile(isClosetoBus, 0.9)[[1]])
limtable[24,] = c(min(lim0$lowlim), max(lim0$highlim))
lim0 <- mvmt_sta %>% group_by(tmode_manual) %>% summarize(
lowlim = quantile(isClosetoTrams, 0.1)[[1]],
highlim = quantile(isClosetoTrams, 0.9)[[1]])
limtable[25,] = c(min(lim0$lowlim), max(lim0$highlim))
lim0 <- mvmt_sta %>% group_by(tmode_manual) %>% summarize(
lowlim = quantile(isClosetoHighways, 0.1)[[1]],
highlim = quantile(isClosetoHighways, 0.9)[[1]])
limtable[26,] = c(min(lim0$lowlim), max(lim0$highlim))
lim0 <- mvmt_sta %>% group_by(tmode_manual) %>% summarize(
lowlim = quantile(isClosetoCables, 0.1)[[1]],
highlim = quantile(isClosetoCables, 0.9)[[1]])
limtable[27,] = c(min(lim0$lowlim), max(lim0$highlim))
lim0 <- mvmt_sta %>% group_by(tmode_manual) %>% summarize(
lowlim = quantile(isHighAltitude, 0.1)[[1]],
highlim = quantile(isHighAltitude, 0.9)[[1]])
limtable[28,] = c(min(lim0$lowlim), max(lim0$highlim))
q <- length(mvmt_sta)
for(i in 3:q){
pl <- ggplot(mvmt_sta, aes(x = tmode_manual, y = mvmt_sta[,i],
fill=tmode_manual)) +
geom_boxplot(alpha=0.7) + #, outlier.shape = NA) +
theme(legend.position="none", plot.title = element_text(hjust = 0.5)) +
scale_fill_manual(values = getPalette(colourCount)) +
ggtitle(paste("Box plot of ", colnames(mvmt_sta)[i])) +
ylab(colnames(mvmt_sta)[i]) +
# coord_cartesian(ylim = c(0, 20))
coord_flip(ylim = c(limtable[i-2,1], limtable[i-2,2])*1.1)
print(pl)
}
It is interesting to analyse the relations between the features. There are some obvious correlations: speed, acceleration and their derivatives, or high altitude correlate with closeness to cables (cable car & ski-lift & t-bar). What is also interesting to notice is the correlation between the proximity to trams and bus (0.4).
# correlation matrix
corr <- mvmt_all %>% dplyr::select(-c("segment_ID_final", "label")) %>% cor(method = "pearson") %>% round(1)
ggcorrplot(corr, hc.order = FALSE, type = "lower", lab = FALSE)
Summary of the accuracy and kappas of the 4 considered models
The model with the best accuracy and kapp is RF. Removing the environment annotations did not change a lot for Knn but we can see it had an impact on RF.
models <- c("Knn", "Knn no environment", "RF", "RF no environment")
data.frame(models = models,
accuracy = c(round(cm_knn$overall[1],3), round(cm_knn_nprox$overall[1],3), round(cm_rf$overall[1],3), round(cm_rf_nprox$overall[1],3)),
kappa = c(round(cm_knn$overall[2],3), round(cm_knn_nprox$overall[2],3), round(cm_rf$overall[2],3), round(cm_rf_nprox$overall[2],3))
)
## models accuracy kappa
## 1 Knn 0.689 0.620
## 2 Knn no environment 0.698 0.630
## 3 RF 0.919 0.901
## 4 RF no environment 0.851 0.818
Summary of the Precision, Recall and F1
Rf also has better precision, recall and F1 values than Knn.
# Extract precision, recall and F1 from confusion matrix
cm2metrics <- function(cm) {
n = sum(cm) # number of instances
nc = nrow(cm) # number of classes
diag = diag(cm) # number of correctly classified instances per class
rowsums = apply(cm, 1, sum) # number of instances per class
colsums = apply(cm, 2, sum) # number of predictions per class
p = rowsums / n # distribution of instances over the actual classes
q = colsums / n # distribution of instances over the predicted classes
precision = diag / colsums
recall = diag / rowsums
f1 = 2 * precision * recall / (precision + recall)
df <- data.frame(precision, recall, f1) %>% as.data.frame()
df <- df %>% replace(is.na(df), 0)
return(df)
}
# Knn
cm_knn_simple <- ConfusionMatrix(knn_test_pred1, mvmt_all_test$label)
df_knn <- cm2metrics(cm_knn_simple)
# Knn no proximities
cm_knn_np_simple <- ConfusionMatrix(knn_test_nprox_pred1, mvmt_all_test$label)
df_knn_np <- cm2metrics(cm_knn_np_simple)
# RF
cm_rf_simple <- ConfusionMatrix(rf_test_pred1, mvmt_all_test$label)
df_rf <- cm2metrics(cm_rf_simple)
# RF no proximities
cm_rf_np_simple <- ConfusionMatrix(rf_test_nprox_pred1, mvmt_all_test$label)
df_rf_np <- cm2metrics(cm_rf_np_simple)
# Plot result
models <- c("Knn", "Knn no environment", "RF", "RF no environment")
data.frame(models = models,
precision = c(round(mean(df_knn$precision),3), round(mean(df_knn_np$precision),3), round(mean(df_rf_np$precision),3), round(mean(df_rf_np$precision),3)),
recall = c(round(mean(df_knn$recall),3), round(mean(df_knn_np$recall),3), round(mean(df_rf_np$recall),3), round(mean(df_rf_np$recall),3)),
F1 = c(round(mean(df_knn$f1),3), round(mean(df_knn_np$f1),3), round(mean(df_rf_np$f1),3), round(mean(df_rf_np$f1),3))
)
## models precision recall F1
## 1 Knn 0.535 0.473 0.481
## 2 Knn no environment 0.440 0.455 0.441
## 3 RF 0.763 0.713 0.724
## 4 RF no environment 0.763 0.713 0.724
Confusion matrix - Knn
# Knn
print(ConfusionMatrix(knn_test_pred1, mvmt_all_test$label))
## y_pred
## y_true bike boat bus cable_car car kick-scooter plane run ski ski_lift
## bike 36 1 1 1 0 0 0 0 3 4
## boat 0 1 2 0 0 0 0 0 0 0
## bus 6 1 50 0 12 0 0 0 16 1
## cable_car 1 0 2 2 0 0 0 0 0 1
## car 2 0 18 0 146 0 0 0 2 0
## kick-scooter 1 0 0 0 1 0 0 0 1 0
## plane 0 0 0 0 0 0 10 0 0 0
## run 0 0 0 0 0 0 0 1 1 0
## ski 10 0 10 0 7 0 0 0 5 1
## ski_lift 2 0 1 4 0 1 0 0 0 20
## t_bar 1 0 0 0 0 0 0 0 0 1
## train 0 0 3 0 52 0 2 0 0 0
## tram 7 1 23 0 6 1 0 0 4 1
## walk 2 1 2 0 2 0 0 1 0 0
## y_pred
## y_true t_bar train tram walk
## bike 0 0 1 2
## boat 0 1 1 0
## bus 0 0 26 6
## cable_car 0 0 0 0
## car 0 37 9 0
## kick-scooter 0 0 2 0
## plane 0 2 0 0
## run 0 0 0 3
## ski 0 0 4 1
## ski_lift 0 0 0 0
## t_bar 1 0 1 0
## train 0 63 0 3
## tram 0 0 87 7
## walk 0 0 2 315
Confusion matrix - Knn no proximities
# Knn no prox
print(ConfusionMatrix(knn_test_nprox_pred1, mvmt_all_test$label))
## y_pred
## y_true bike boat bus cable_car car kick-scooter plane run ski ski_lift
## bike 28 1 6 2 0 0 0 0 1 7
## boat 1 0 2 0 0 0 0 0 0 0
## bus 12 1 56 0 15 0 0 0 11 0
## cable_car 3 0 0 1 0 0 0 0 0 1
## car 3 0 14 0 161 0 0 0 3 0
## kick-scooter 1 0 0 0 0 0 0 0 0 0
## plane 0 0 0 0 0 0 10 0 0 0
## run 0 0 0 0 0 0 0 1 0 0
## ski 11 0 9 0 5 0 0 0 2 1
## ski_lift 4 0 0 2 0 0 0 0 0 20
## t_bar 0 0 0 0 0 0 0 0 0 1
## train 0 0 2 0 58 0 2 0 0 0
## tram 5 1 20 0 8 0 0 0 3 2
## walk 1 0 2 0 1 0 0 2 0 0
## y_pred
## y_true t_bar train tram walk
## bike 0 0 1 3
## boat 2 0 0 0
## bus 0 1 20 2
## cable_car 0 0 0 1
## car 0 30 3 0
## kick-scooter 2 1 1 0
## plane 0 2 0 0
## run 0 0 1 3
## ski 0 1 8 1
## ski_lift 2 0 0 0
## t_bar 2 0 0 1
## train 0 58 1 2
## tram 0 1 90 7
## walk 0 1 1 317
Confusion matrix - RF
# rf
print(ConfusionMatrix(rf_test_pred1, mvmt_all_test$label))
## y_pred
## y_true bike boat bus cable_car car kick-scooter plane run ski ski_lift
## bike 46 0 1 0 0 0 0 0 0 0
## boat 0 5 0 0 0 0 0 0 0 0
## bus 0 1 93 0 8 0 0 0 1 0
## cable_car 0 0 0 1 0 0 0 0 0 5
## car 0 0 15 0 193 0 0 0 1 0
## kick-scooter 1 0 2 0 0 0 0 0 0 0
## plane 0 0 0 0 0 0 12 0 0 0
## run 0 0 0 0 0 0 0 0 0 0
## ski 0 0 0 0 2 0 0 0 35 1
## ski_lift 0 0 0 0 0 0 0 0 1 27
## t_bar 0 0 0 0 0 0 0 0 0 0
## train 0 0 0 0 1 0 0 0 0 0
## tram 1 0 9 0 0 0 0 0 0 0
## walk 0 0 2 0 0 1 0 0 0 0
## y_pred
## y_true t_bar train tram walk
## bike 0 0 1 1
## boat 0 0 0 0
## bus 0 0 13 2
## cable_car 0 0 0 0
## car 0 5 0 0
## kick-scooter 0 0 0 2
## plane 0 0 0 0
## run 0 0 0 5
## ski 0 0 0 0
## ski_lift 0 0 0 0
## t_bar 4 0 0 0
## train 0 120 0 2
## tram 0 0 125 2
## walk 0 0 1 321
Confusion matrix - RF no proximities
# rf no prox
print(ConfusionMatrix(rf_test_nprox_pred1, mvmt_all_test$label))
## y_pred
## y_true bike boat bus cable_car car kick-scooter plane run ski ski_lift
## bike 44 0 2 0 1 0 0 0 0 0
## boat 0 4 0 0 0 0 0 0 0 0
## bus 0 0 95 0 7 0 0 0 2 0
## cable_car 0 0 0 2 0 0 0 0 0 4
## car 0 0 13 0 174 0 0 0 2 0
## kick-scooter 2 0 2 0 0 0 0 0 0 0
## plane 0 0 0 0 0 0 12 0 0 0
## run 0 0 0 0 0 0 0 0 0 0
## ski 0 0 0 0 2 0 0 0 34 2
## ski_lift 0 0 0 0 0 0 0 0 1 27
## t_bar 0 0 0 0 0 0 0 0 0 0
## train 0 0 0 0 33 0 0 0 0 0
## tram 3 0 21 0 2 0 0 0 0 0
## walk 0 0 2 0 0 1 0 0 0 0
## y_pred
## y_true t_bar train tram walk
## bike 0 0 2 0
## boat 0 0 0 1
## bus 0 0 12 2
## cable_car 0 0 0 0
## car 0 22 3 0
## kick-scooter 0 0 0 1
## plane 0 0 0 0
## run 0 0 0 5
## ski 0 0 0 0
## ski_lift 0 0 0 0
## t_bar 4 0 0 0
## train 0 88 0 2
## tram 0 1 105 5
## walk 0 0 1 321
For Random Forest we can see the most important features. The meanDecreaseAccuracy indicates the influence of the feature on the performance, and the MeanDecreaseGini the influence on the homogenity.
Importances of features - RF
varImpPlot(rf1)
Importances of features - RF no proximities
varImpPlot(rf_nprox1)
General plots
# Add column predictions
mvmt_all_test_g$predicted_knn <- knn_test_pred1
mvmt_all_test_g$predicted_knn_np <- knn_test_nprox_pred1
mvmt_all_test_g$predicted_rf <- rf_test_pred1
mvmt_all_test_g$predicted_rf_np <- rf_test_nprox_pred1
mvmt_all_test_g <- mvmt_all_test_g %>%
mutate(
correct_knn = predicted_knn == label,
correct_knn_np = predicted_knn_np == label,
correct_rf = predicted_rf == label,
correct_rf_np = predicted_rf_np == label
)
# Reput geometry in the test dataset
#"lst_geometry"
mvmt_all_test_g <- mvmt_all_test_g %>%
merge(
mvmt_seg_summary %>%
dplyr::select(c("segment_ID_final", "geometry")), by = "segment_ID_final"
) %>%
st_as_sf() %>% st_cast("POINT")
coords <- mvmt_all_test_g |> st_coordinates()
mvmt_all_test_g <- mvmt_all_test_g |>
mutate(E = coords[,1], N = coords[,2])
# General Plots
ggplot() +
geom_sf(data = ch_boundaries) +
labs(title = "Knn - Predicted trajectory points") +
geom_path() +
geom_point(data = mvmt_all_test_g, aes(E, N, color = correct_knn), alpha = 0.1) +
coord_sf(datum=2056) +
theme_bw()
ggplot() +
geom_sf(data = ch_boundaries) +
labs(title = "Knn no annotations - Predicted trajectory points") +
geom_path() +
geom_point(data = mvmt_all_test_g, aes(E, N, color = correct_knn_np), alpha = 0.1) +
coord_sf(datum=2056) +
theme_bw()
ggplot() +
geom_sf(data = ch_boundaries) +
labs(title = "RF - Predicted trajectory points") +
#geom_path() +
geom_point(data = mvmt_all_test_g, aes(E, N, color = correct_rf)) +
coord_sf(datum=2056) +
theme_bw()
ggplot() +
geom_sf(data = ch_boundaries) +
labs(title = "RF no annotations - Predicted trajectory points") +
#geom_path() +
geom_point(data = mvmt_all_test_g, aes(E, N, color = correct_rf_np)) +
coord_sf(datum=2056) +
theme_bw()
Comparison on specific regions - Countryside
The plots below show the comparison between Knn and RF in a countryside environment. Transport mode present: car, walk , bus and train. Clicking on the points give you information on the ground truth and predicted value.
We can see that both models fail to identify a section on the top half of the map which was travelled in a bus. Only the part in the village in the North was correctly identified as bus.
We can witness differences in performances between Knn and RF in the south section (train and car). Knn has some confusion between train and car, with or without annotations. Where RF did not have any.
# Visu proximity to environment for specific days
visu_data <- mvmt_all_test_g %>% st_transform(crs=4326) %>%
filter(E > 2537700 & E < 2539620 & N > 1155376 & N < 1167736) %>%
st_transform(crs=4326)
if (visu_data$correct_knn %>% as.numeric %>%unique() %>% length() ==1) {
pal <- colorNumeric(
c("#87CEFA"),
domain = as.numeric(visu_data$correct_knn))
} else {
pal <- colorNumeric(
c("#FF0000", "#87CEFA"),
domain = as.numeric(visu_data$correct_knn))
}
m <- leaflet() %>%
addProviderTiles("Esri.WorldGrayCanvas") %>%
addCircleMarkers(data = visu_data,
opacity = 0.8,
radius = 0.2,
fillOpacity = 0.2,
popup = paste(
" Truth: ", visu_data$label,
". Predicted: ", visu_data$predicted_knn
),
color = ~pal(as.numeric(correct_knn))) %>%
addLegend(position = 'topright',
colors = c("#87CEFA", "#FF0000"),
labels = c('Correct', "False"),
title = 'Countryside: prediction Knn')
## Plot the leaflet object m
m
if (visu_data$correct_knn_np %>% as.numeric %>%unique() %>% length() ==1) {
pal <- colorNumeric(
c("#87CEFA"),
domain = as.numeric(visu_data$correct_knn_np))
} else {
pal <- colorNumeric(
c("#FF0000", "#87CEFA"),
domain = as.numeric(visu_data$correct_knn_np))
}
m <- leaflet() %>%
addProviderTiles("Esri.WorldGrayCanvas") %>%
addCircleMarkers(data = visu_data,
opacity = 0.8,
radius = 0.2,
fillOpacity = 0.2,
popup = paste(
" Truth: ", visu_data$label,
". Predicted: ", visu_data$predicted_knn_np
),
color = ~pal(as.numeric(correct_knn_np))) %>%
addLegend(position = 'topright',
colors = c("#87CEFA", "#FF0000"),
labels = c('Correct', "false"),
title = 'Countryside: prediction Knn no annotations')
## Plot the leaflet object m
m
if (visu_data$correct_rf %>% as.numeric %>%unique() %>% length() ==1) {
pal <- colorNumeric(
c("#87CEFA"),
domain = as.numeric(visu_data$correct_rf))
} else {
pal <- colorNumeric(
c("#FF0000", "#87CEFA"),
domain = as.numeric(visu_data$correct_rf))
}
m <- leaflet() %>%
addProviderTiles("Esri.WorldGrayCanvas") %>%
addCircleMarkers(data = visu_data,
opacity = 0.8,
radius = 0.2,
fillOpacity = 0.2,
popup = paste(
" Truth: ", visu_data$label,
". Predicted: ", visu_data$predicted_rf
),
color = ~pal(as.numeric(correct_rf))) %>%
addLegend(position = 'topright',
colors = c("#87CEFA", "#FF0000"),
labels = c('Correct', "false"),
title = 'Countryside: prediction RF')
## Plot the leaflet object m
m
if (visu_data$correct_rf_np %>% as.numeric %>%unique() %>% length() ==1) {
pal <- colorNumeric(
c("#87CEFA"),
domain = as.numeric(visu_data$correct_rf_np))
} else {
pal <- colorNumeric(
c("#FF0000", "#87CEFA"),
domain = as.numeric(visu_data$correct_rf_np))
}
m <- leaflet() %>%
addProviderTiles("Esri.WorldGrayCanvas") %>%
addCircleMarkers(data = visu_data,
opacity = 0.8,
radius = 0.2,
fillOpacity = 0.2,
popup = paste(
" Truth: ", visu_data$label,
". Predicted: ", visu_data$predicted_rf_np
),
color = ~pal(as.numeric(correct_rf_np))) %>%
addLegend(position = 'topright',
colors = c("#87CEFA", "#FF0000"),
labels = c('Correct', "false"),
title = 'Countryside: prediction RF no annotations')
## Plot the leaflet object m
m
Comparison on specific regions - City - Yverdon-les-Bains
The plots below show the comparison between Knn and RF in a small city environment. Transport mode present: car, walk, bus and train. Clicking on the points give you information on the ground truth and predicted value.
Knn has again issue with detecting train sections (predict as car) with or without annotations. Also for bus with car, in the area just in front of the station.
For RF, we can see the annotation helped avoiding false prediction of car in bus. With the annotation, the prediction was better.
# Visu proximity to environment for specific days
visu_data <- mvmt_all_test_g %>% st_transform(crs=4326) %>%
filter(E > 2537773 & E < 2542333 & N > 1180721 & N < 1182101) %>%
st_transform(crs=4326)
if (visu_data$correct_knn %>% as.numeric %>%unique() %>% length() ==1) {
pal <- colorNumeric(
c("#87CEFA"),
domain = as.numeric(visu_data$correct_knn))
} else {
pal <- colorNumeric(
c("#FF0000", "#87CEFA"),
domain = as.numeric(visu_data$correct_knn))
}
m <- leaflet() %>%
addProviderTiles("Esri.WorldGrayCanvas") %>%
addCircleMarkers(data = visu_data,
opacity = 0.8,
radius = 0.2,
fillOpacity = 0.2,
popup = paste(
" Truth: ", visu_data$label,
". Predicted: ", visu_data$predicted_knn
),
color = ~pal(as.numeric(correct_knn))) %>%
addLegend(position = 'topright',
colors = c("#87CEFA", "#FF0000"),
labels = c('Correct', "False"),
title = 'Yverdon-les-Bains: prediction Knn')
## Plot the leaflet object m
m
if (visu_data$correct_knn_np %>% as.numeric %>%unique() %>% length() ==1) {
pal <- colorNumeric(
c("#87CEFA"),
domain = as.numeric(visu_data$correct_knn_np))
} else {
pal <- colorNumeric(
c("#FF0000", "#87CEFA"),
domain = as.numeric(visu_data$correct_knn_np))
}
m <- leaflet() %>%
addProviderTiles("Esri.WorldGrayCanvas") %>%
addCircleMarkers(data = visu_data,
opacity = 0.8,
radius = 0.2,
fillOpacity = 0.2,
popup = paste(
" Truth: ", visu_data$label,
". Predicted: ", visu_data$predicted_knn_np
),
color = ~pal(as.numeric(correct_knn_np))) %>%
addLegend(position = 'topright',
colors = c("#87CEFA", "#FF0000"),
labels = c('Correct', "false"),
title = 'Yverdon-les-Bains: prediction Knn no annotations')
## Plot the leaflet object m
m
if (visu_data$correct_rf %>% as.numeric %>%unique() %>% length() ==1) {
pal <- colorNumeric(
c("#87CEFA"),
domain = as.numeric(visu_data$correct_rf))
} else {
pal <- colorNumeric(
c("#FF0000", "#87CEFA"),
domain = as.numeric(visu_data$correct_rf))
}
m <- leaflet() %>%
addProviderTiles("Esri.WorldGrayCanvas") %>%
addCircleMarkers(data = visu_data,
opacity = 0.8,
radius = 0.2,
fillOpacity = 0.2,
popup = paste(
" Truth: ", visu_data$label,
". Predicted: ", visu_data$predicted_rf
),
color = ~pal(as.numeric(correct_rf))) %>%
addLegend(position = 'topright',
colors = c("#87CEFA", "#FF0000"),
labels = c('Correct', "false"),
title = 'Yverdon-les-Bains: prediction RF')
## Plot the leaflet object m
m
if (visu_data$correct_rf_np %>% as.numeric %>%unique() %>% length() ==1) {
pal <- colorNumeric(
c("#87CEFA"),
domain = as.numeric(visu_data$correct_rf_np))
} else {
pal <- colorNumeric(
c("#FF0000", "#87CEFA"),
domain = as.numeric(visu_data$correct_rf_np))
}
m <- leaflet() %>%
addProviderTiles("Esri.WorldGrayCanvas") %>%
addCircleMarkers(data = visu_data,
opacity = 0.8,
radius = 0.2,
fillOpacity = 0.2,
popup = paste(
" Truth: ", visu_data$label,
". Predicted: ", visu_data$predicted_rf_np
),
color = ~pal(as.numeric(correct_rf_np))) %>%
addLegend(position = 'topright',
colors = c("#87CEFA", "#FF0000"),
labels = c('Correct', "false"),
title = 'Yverdon-les-Bains: prediction RF no annotations')
## Plot the leaflet object m
m
Comparison on specific regions - City - Zurich
The plots below show the comparison between Knn and RF in a big city environment. Transport mode present: bike, walk, bus, run and tram Clicking on the points give you information on the ground truth and predicted value.
Again, Knn had more struggle than RF. It could not correctly identified the run, some bike and walk.
For RF, there has been some misclassification of run and walk, but less than Knn. Bike was also confused into bus.
# Visu proximity to environment for specific days
visu_data <- mvmt_all_test_g %>% st_transform(crs=4326) %>%
filter(E > 2682935 & E < 2683929 & N > 1249983 & N < 1250708) %>%
st_transform(crs=4326)
if (visu_data$correct_knn %>% as.numeric %>%unique() %>% length() ==1) {
pal <- colorNumeric(
c("#87CEFA"),
domain = as.numeric(visu_data$correct_knn))
} else {
pal <- colorNumeric(
c("#FF0000", "#87CEFA"),
domain = as.numeric(visu_data$correct_knn))
}
m <- leaflet() %>%
addProviderTiles("Esri.WorldGrayCanvas") %>%
addCircleMarkers(data = visu_data,
opacity = 0.8,
radius = 0.2,
fillOpacity = 0.2,
popup = paste(
" Truth: ", visu_data$label,
". Predicted: ", visu_data$predicted_knn
),
color = ~pal(as.numeric(correct_knn))) %>%
addLegend(position = 'topright',
colors = c("#87CEFA", "#FF0000"),
labels = c('Correct', "False"),
title = 'Irchel: prediction Knn')
## Plot the leaflet object m
m
if (visu_data$correct_knn_np %>% as.numeric %>%unique() %>% length() ==1) {
pal <- colorNumeric(
c("#87CEFA"),
domain = as.numeric(visu_data$correct_knn_np))
} else {
pal <- colorNumeric(
c("#FF0000", "#87CEFA"),
domain = as.numeric(visu_data$correct_knn_np))
}
m <- leaflet() %>%
addProviderTiles("Esri.WorldGrayCanvas") %>%
addCircleMarkers(data = visu_data,
opacity = 0.8,
radius = 0.2,
fillOpacity = 0.2,
popup = paste(
" Truth: ", visu_data$label,
". Predicted: ", visu_data$predicted_knn_np
),
color = ~pal(as.numeric(correct_knn_np))) %>%
addLegend(position = 'topright',
colors = c("#87CEFA", "#FF0000"),
labels = c('Correct', "false"),
title = 'Irchel: prediction Knn no annotations')
## Plot the leaflet object m
m
if (visu_data$correct_rf %>% as.numeric %>%unique() %>% length() ==1) {
pal <- colorNumeric(
c("#87CEFA"),
domain = as.numeric(visu_data$correct_rf))
} else {
pal <- colorNumeric(
c("#FF0000", "#87CEFA"),
domain = as.numeric(visu_data$correct_rf))
}
m <- leaflet() %>%
addProviderTiles("Esri.WorldGrayCanvas") %>%
addCircleMarkers(data = visu_data,
opacity = 0.8,
radius = 0.2,
fillOpacity = 0.2,
popup = paste(
" Truth: ", visu_data$label,
". Predicted: ", visu_data$predicted_rf
),
color = ~pal(as.numeric(correct_rf))) %>%
addLegend(position = 'topright',
colors = c("#87CEFA", "#FF0000"),
labels = c('Correct', "false"),
title = 'Irchel: prediction RF')
## Plot the leaflet object m
m
if (visu_data$correct_rf_np %>% as.numeric %>%unique() %>% length() ==1) {
pal <- colorNumeric(
c("#87CEFA"),
domain = as.numeric(visu_data$correct_rf_np))
} else {
pal <- colorNumeric(
c("#FF0000", "#87CEFA"),
domain = as.numeric(visu_data$correct_rf_np))
}
m <- leaflet() %>%
addProviderTiles("Esri.WorldGrayCanvas") %>%
addCircleMarkers(data = visu_data,
opacity = 0.8,
radius = 0.2,
fillOpacity = 0.2,
popup = paste(
" Truth: ", visu_data$label,
". Predicted: ", visu_data$predicted_rf_np
),
color = ~pal(as.numeric(correct_rf_np))) %>%
addLegend(position = 'topright',
colors = c("#87CEFA", "#FF0000"),
labels = c('Correct', "false"),
title = 'Irchel: prediction RF no annotations')
## Plot the leaflet object m
m
Do the characteristics of movement trajectories differ
between different travel modes?
Could the characteristics be used to identify travel
mode?
Yes, we applied pairwise t-test on all 28 trajectory derivative between every 2 transport modes in section 2.2. Based on the result, Most trajectory derivatives we generated are significantly different between different transport modes.
How to identify different travel modes with movement trajectories?
By calculating the trajectory derivatives and applying machine learning algorithms, we were able to build a KNN(k-nearest neighbour) model and a RF(random forest) model to identify transport modes using trajectory derivatives. The result of modelling is listed in section 3.2.
How accurate and efficient could different travel modes be identified based on the aforementioned identification model?
We generated confusion matrix and calculated accuracy, precision, recall, F1 score to find the best parameters for machine learning algorithms and to assess the identification model, which could be found in section 2.3 and section 3.2. The identification models perform pretty well overall.
Posmo map matching
We had to manually unlabelized the sections in train, as Posmo
processing map matched the segments. To train our algorithms, we needed
as raw data as possible.
Not enough data in certains categories
It concerns t-bar and kick-scooter for instance.
Manual labelizing
The labelization of our data was a long and tedious process. However, it
was necessary to have correct tmode.
Statics detection
It was a challenge to remove the statics due to the differences of all
transport modes. This should be improved.
Data limitation
Our algorithms are tight to the specific area of study.
A lot of our trajectories are the same and redundant. We mainly took the same tram or bus lines (home to university, home to sport, …).
This means our algorithms have been trained on a specific region or network, which means the algorithm might not work efficiently in other area. However, this is probably only an issue for public transports modes such as bus, trams, boat, ski-lift & t-bar & cable-car and train. Other means such as walk, are not tied to a specific network and differences between regions are not expected.
Typically, in the results, we have seen there was some trouble identifying the countryside bus, because it goes on roads outside of cities with higher speed. It would be interesting to have more countryside bus to see if a difference could be identified but this is probably hard.
Another observation is that proximity to bus are correlated with proximity to tram. This is the case for Zurich, but not for others cities without trams or less.
Our project aims at defining, describing and identifying movement patterns for different travel modes.
Movement data has been collected over a period of approximately two months and a various range of travel modes have been recorded: train, tram, bus, car, walking, cycling, t_bar, cable_car, running, kick scooter, ski, ski-lift, plane and boat. A labeled point data set with travel modes was extracted from the collected raw data by cleaning, filtering. We then generated a trajectory data set by segmentation based on user, recording gaps, transport mode label, static point, and a defined threshold of time duration.
Each trajectory was then characterized by statistics of the following metrics: speed, acceleration, sinuosity (turning angle) and distance. Annotation was done by linking this data set with environmental factors related to the studied travel mode: railway tracks, tram lines, road network, walking path, slope, etc.
The difference of the aforementioned trajectory derivatives between two travel modes was tested then.
We then built a KNN model and RF model using the trajectory derivatives. After evaluating the models by confusion matrix, accuracy, precision, recall, F1 score and understanding them with importance, we could say the models performed well and some trajectory derivatives are more important than others.
Zeng, J., Yu, Y., Chen, Y., Yang, D., Zhang, L., & Wang, D. (2023). Trajectory-as-a-Sequence: A novel travel mode identification framework. Transportation Research Part C: Emerging Technologies, 146, 103957. doi:10.1016/j.trc.2022.103957
Sadeghian, P., Zhao, X., Golshan, A., & Håkansson, J. (2022). A stepwise methodology for transport mode detection in GPS tracking data. Travel Behaviour and Society, 26, 159–167. doi:10.1016/j.tbs.2021.10.004
Data sources for environment datasets: map.geoadmin.ch