-- SPATIAL DATABASE FOR GPS WILDLIFE TRACKING DATA, F. Urbano and F. Cagnacci (eds.)
-- DOI: 10.1007/978-3-319-03743-1_4, Springer International Publishing Switzerland 2014

-- Code presented in Chapter 08
-- Authors: Ferdinando Urbano, Mathieu Basille, Francesca Cagnacci
-- Version 1.0

-- The code in this book is free. You can copy, modify and distribute non-trivial part of the code 
-- with no restrictions, according to the terms of the Creative Commons CC0 1.0 licence
-- (https://creativecommons.org/publicdomain/zero/1.0/). 
-- Nevertheless, the acknowledgement of the authorship is appreciated.

-- Note: to run this code you need the database developed in the previous chapters (2,3,4,5,6,7).

-- The test data set is available in the Extra Material page of the book (http://extras.springer.com/)


-- You the new field to the table
ALTER TABLE main.gps_data_animals 
  ADD COLUMN gps_validity_code integer;
-- You create a table to store the validity codes, create the external key and insert the admitted values
CREATE TABLE lu_tables.lu_gps_validity(
  gps_validity_code integer,
  gps_validity_description character varying,
  CONSTRAINT lu_gps_validity_pkey 
    PRIMARY KEY (gps_validity_code));
COMMENT ON TABLE lu_tables.lu_gps_validity
IS 'Look up table for GPS positions validity codes.';
ALTER TABLE main.gps_data_animals
  ADD CONSTRAINT animals_lu_gps_validity 
  FOREIGN KEY (gps_validity_code)
  REFERENCES lu_tables.lu_gps_validity (gps_validity_code)
  MATCH SIMPLE ON UPDATE NO ACTION ON DELETE NO ACTION;
INSERT INTO lu_tables.lu_gps_validity 
  VALUES (0, 'Position with no coordinate');
INSERT INTO lu_tables.lu_gps_validity 
  VALUES (1, 'Valid Position');
INSERT INTO lu_tables.lu_gps_validity 
  VALUES (2, 'Position with a low degree of reliability');
INSERT INTO lu_tables.lu_gps_validity 
  VALUES (11, 'Position wrong: out of the study area');
INSERT INTO lu_tables.lu_gps_validity 
  VALUES (12, 'Position wrong: impossible spike');
INSERT INTO lu_tables.lu_gps_validity 
  VALUES (13, 'Position wrong: impossible place (e.g. lake or sea)');
INSERT INTO lu_tables.lu_gps_validity 
  VALUES (21, 'Position wrong: duplicated timestamp');
  
-- You insert a new animal, called 'test'  
INSERT INTO main.animals 
  (animals_id, animals_code, name, sex, age_class_code, species_code, note) 
  VALUES (6, 'test', 'test-ina', 'm', 3, 1, 'This is a fake animal, used to test outliers detection processes.');
-- You Insert a new sensor, called 'GSM_test'
INSERT INTO main.gps_sensors 
  (gps_sensors_id, gps_sensors_code, purchase_date, frequency, vendor, model, sim) 
  VALUES (6, 'GSM_test', '2005-01-01', 1000, 'TNT', 'top', '+391441414');
-- You insert the time interval of the deployment of the test sensor on the test animal
INSERT INTO main.gps_sensors_animals 
  (animals_id, gps_sensors_id, start_time, end_time, notes)
  VALUES (6, 6, '2005-04-04 08:00:00+02', '2005-05-06 02:00:00+02', 'test deployment');
-- You  import of the data set from the .csv file
COPY main.gps_data(
  gps_sensors_code, line_no, utc_date, utc_time, lmt_date, lmt_time, ecef_x, ecef_y, ecef_z, latitude, longitude, height, dop, nav, validated, sats_used, ch01_sat_id, ch01_sat_cnr, ch02_sat_id, ch02_sat_cnr, ch03_sat_id, ch03_sat_cnr, ch04_sat_id, ch04_sat_cnr, ch05_sat_id, ch05_sat_cnr, ch06_sat_id, ch06_sat_cnr, ch07_sat_id, ch07_sat_cnr, ch08_sat_id, ch08_sat_cnr, ch09_sat_id, ch09_sat_cnr, ch10_sat_id, ch10_sat_cnr, ch11_sat_id, ch11_sat_cnr, ch12_sat_id, ch12_sat_cnr, main_vol, bu_vol, temp, easting, northing, remarks)
FROM
  'C:\tracking_db\data\sensors_data\GSM_test.csv' 
    WITH (FORMAT csv, HEADER, DELIMITER ';');

-- You start assuming that all the GPS positions are correct
UPDATE main.gps_data_animals 
  SET gps_validity_code = 1;
  
--  You mark all the GPS positions with no coordinates with the code '0'
UPDATE main.gps_data_animals 
  SET gps_validity_code = 0 
  WHERE geom IS NULL;
  
-- You retrieve locations with duplicated timestamp
SELECT 
  x.gps_data_animals_id, x.animals_id, x.acquisition_time 
FROM 
  main.gps_data_animals x, 
  (SELECT animals_id, acquisition_time 
  FROM main.gps_data_animals
  WHERE gps_validity_code = 1
  GROUP BY animals_id, acquisition_time
  HAVING count(animals_id) > 1) a 
WHERE 
  a.animals_id = x.animals_id AND 
  a.acquisition_time = x.acquisition_time 
ORDER BY 
  x.animals_id, x.acquisition_time;  

-- You mark the erroneous locations
UPDATE main.gps_data_animals 
  SET gps_validity_code = 21 
  WHERE 
    gps_data_animals_id in 
      (SELECT x.gps_data_animals_id 
      FROM 
        main.gps_data_animals x, 
        (SELECT animals_id, acquisition_time 
        FROM main.gps_data_animals 
        WHERE gps_validity_code = 1 
        GROUP BY animals_id, acquisition_time 
        HAVING count(animals_id) > 1) a 
      WHERE 
        a.animals_id = x.animals_id AND 
        a.acquisition_time = x.acquisition_time);

-- You detect outliers outside the boundaries of the study_area layer
SELECT 
  gps_data_animals_id 
FROM 
  main.gps_data_animals 
LEFT JOIN 
  env_data.study_area 
ON 
  ST_Intersects(gps_data_animals.geom, study_area.geom) 
WHERE 
  study_area IS NULL AND 
  gps_data_animals.geom IS NOT NULL;
  
-- Detection using a buffer
WITH area_buffer_simplified AS 
  (SELECT 
    ST_Simplify(
      ST_Buffer(study_area.geom, 0.1), 0.1) AS geom 
  FROM 
    env_data.study_area)
SELECT 
  animals_id, gps_data_animals_id 
FROM 
  main.gps_data_animals 
WHERE 
  ST_Disjoint(
    gps_data_animals.geom, 
    (SELECT geom FROM area_buffer_simplified));
-- You mark the loactions outside the study area
UPDATE main.gps_data_animals 
  SET gps_validity_code = 11 
  WHERE 
    gps_data_animals_id in 
    (SELECT gps_data_animals_id 
    FROM main.gps_data_animals, env_data.study_area 
    WHERE ST_Disjoint(
      gps_data_animals.geom,
      ST_Simplify(ST_Buffer(study_area.geom, 0.1), 0.1)));

-- You explore the extreme coordinates of the dataset
SELECT 
  gps_data_animals_id, ST_X(geom) 
FROM 
  main.gps_data_animals 
WHERE 
  geom IS NOT NULL 
ORDER BY 
  ST_X(geom) 
LIMIT 10;

SELECT gps_data_animals_id, ST_X(geom) 
FROM main.gps_data_animals 
WHERE geom IS NOT NULL 
ORDER BY ST_X(geom) 
DESC LIMIT 10;

SELECT gps_data_animals_id, ST_Y(geom) 
FROM main.gps_data_animals 
WHERE geom IS NOT NULL 
ORDER BY ST_Y(geom) 
LIMIT 10;

SELECT gps_data_animals_id, ST_Y(geom) 
FROM main.gps_data_animals 
WHERE geom IS NOT NULL 
ORDER BY ST_Y(geom) DESC 
LIMIT 10;

-- You mark positions that falls inside a water body
UPDATE main.gps_data_animals 
  SET gps_validity_code = 13 
  FROM 
    env_data.corine_land_cover 
  WHERE
    ST_Intersects(
      corine_land_cover.rast,
      ST_Transform(gps_data_animals.geom, 3035)) AND
    ST_Value(
      corine_land_cover.rast, 
      ST_Transform(gps_data_animals.geom, 3035)) 
      in (40,41,42,43,44) AND 
    gps_validity_code = 1 AND 
    ST_Value(
      corine_land_cover.rast, 
      ST_Transform(gps_data_animals.geom, 3035)) != 'NaN';

-- You visualize the steps of your data sets
SELECT 
  animals_id AS id, 
  acquisition_time, 
  LEAD(acquisition_time,-1) 
    OVER (
      PARTITION BY animals_id 
      ORDER BY acquisition_time) AS acquisition_time_1,
  (EXTRACT(epoch FROM acquisition_time) - 
  LEAD(EXTRACT(epoch FROM acquisition_time), -1) 
    OVER (
      PARTITION BY animals_id 
      ORDER BY acquisition_time))::integer AS deltat,
  (ST_Distance_Spheroid(
    geom, 
    LEAD(geom, -1) 
      OVER (
        PARTITION BY animals_id 
        ORDER BY acquisition_time), 
    'SPHEROID["WGS 84",6378137,298.257223563]'))::integer AS dist,
  (ST_Distance_Spheroid(
    geom, 
    LEAD(geom, -1) 
      OVER (
        PARTITION BY animals_id 
        ORDER BY acquisition_time), 
    'SPHEROID["WGS 84",6378137,298.257223563]')/
  ((EXTRACT(epoch FROM acquisition_time) - 
  LEAD(
    EXTRACT(epoch FROM acquisition_time), -1) 
    OVER (
      PARTITION BY animals_id 
      ORDER BY acquisition_time))+1)*60*60)::numeric(8,2) AS speed
FROM main.gps_data_animals 
WHERE gps_validity_code = 1
LIMIT 10;

-- Detection of impossible and improbable movements
SELECT gps_data_animals_id 
FROM 
  (SELECT 
    gps_data_animals_id, 
    ST_Distance_Spheroid(geom, 
      ST_setsrid(ST_makepoint(
        avg(ST_X(geom)) 
          OVER (
            PARTITION BY animals_id 
            ORDER BY acquisition_time rows 
              BETWEEN 10 PRECEDING and 10 FOLLOWING), 
        avg(ST_Y(geom)) 
          OVER (
            PARTITION BY animals_id 
            ORDER BY acquisition_time rows 
          BETWEEN 10 PRECEDING and 10 FOLLOWING)),
     4326),'SPHEROID["WGS 84",6378137,298.257223563]') AS dist_to_avg 
  FROM 
    main.gps_data_animals 
  WHERE 
    gps_validity_code = 1) a 
WHERE 
  dist_to_avg > 10000;

SELECT 
  gps_data_animals_id 
FROM 
  (SELECT gps_data_animals_id, 
  ST_Distance_Spheroid(
    geom, 
    LAG(geom, 1) OVER (PARTITION BY animals_id ORDER BY acquisition_time), 'SPHEROID["WGS 84",6378137,298.257223563]') /
    (EXTRACT(epoch FROM acquisition_time) - EXTRACT (epoch FROM (lag(acquisition_time, 1) OVER (PARTITION BY animals_id ORDER BY acquisition_time))))*3600 AS speed_FROM,
  ST_Distance_Spheroid(
    geom, 
    LEAD(geom, 1) OVER (PARTITION BY animals_id ORDER BY acquisition_time), 'SPHEROID["WGS 84",6378137,298.257223563]') /
    ( - EXTRACT(epoch FROM acquisition_time) + EXTRACT (epoch FROM (lead(acquisition_time, 1) OVER (PARTITION BY animals_id ORDER BY acquisition_time))))*3600 AS speed_to,
  cos(ST_Azimuth((
    LAG(geom, 1) OVER (PARTITION BY animals_id ORDER BY acquisition_time))::geography, 
    geom::geography) - 
  ST_Azimuth(
    geom::geography, 
    (LEAD(geom, 1) OVER (PARTITION BY animals_id ORDER BY acquisition_time))::geography)) AS rel_angle
  FROM main.gps_data_animals 
  WHERE gps_validity_code = 1) a 
WHERE 
  rel_angle < -0.99 AND 
  speed_from > 2500 AND 
  speed_to > 2500;
-- You mark records with impossible and improbable movements
UPDATE 
  main.gps_data_animals 
SET 
  gps_validity_code = 12 
WHERE 
  gps_data_animals_id in
    (SELECT gps_data_animals_id 
    FROM 
      (SELECT 
        gps_data_animals_id, 
        ST_Distance_Spheroid(geom, lag(geom, 1) OVER (PARTITION BY animals_id ORDER BY acquisition_time), 'SPHEROID["WGS 84",6378137,298.257223563]') /
        (EXTRACT(epoch FROM acquisition_time) - EXTRACT (epoch FROM (lag(acquisition_time, 1) OVER (PARTITION BY animals_id ORDER BY acquisition_time))))*3600 AS speed_from,
        ST_Distance_Spheroid(geom, lead(geom, 1) OVER (PARTITION BY animals_id order by acquisition_time), 'SPHEROID["WGS 84",6378137,298.257223563]') /
        ( - EXTRACT(epoch FROM acquisition_time) + EXTRACT (epoch FROM (lead(acquisition_time, 1) OVER (PARTITION BY animals_id ORDER BY acquisition_time))))*3600 AS speed_to,
        cos(ST_Azimuth((lag(geom, 1) OVER (PARTITION BY animals_id ORDER BY acquisition_time))::geography, geom::geography) - ST_Azimuth(geom::geography, (lead(geom, 1) OVER (PARTITION BY animals_id ORDER BY acquisition_time))::geography)) AS rel_angle
      FROM main.gps_data_animals 
      WHERE gps_validity_code = 1) a 
    WHERE 
      rel_angle < -0.99 AND 
      speed_from > 2500 AND 
      speed_to > 2500);

UPDATE 
  main.gps_data_animals 
SET 
  gps_validity_code = 2 
WHERE 
  gps_data_animals_id IN 
    (SELECT gps_data_animals_id 
    FROM 
      (SELECT 
        gps_data_animals_id, 
        ST_Distance_Spheroid(geom, lag(geom, 1) OVER (PARTITION BY animals_id ORDER BY acquisition_time), 'SPHEROID["WGS 84",6378137,298.257223563]') /
        (EXTRACT(epoch FROM acquisition_time) - EXTRACT (epoch FROM (lag(acquisition_time, 1) OVER (PARTITION BY animals_id ORDER BY acquisition_time))))*3600 AS speed_FROM,
        ST_Distance_Spheroid(geom, lead(geom, 1) OVER (PARTITION BY animals_id order by acquisition_time), 'SPHEROID["WGS 84",6378137,298.257223563]') /
        ( - EXTRACT(epoch FROM acquisition_time) + EXTRACT (epoch FROM (lead(acquisition_time, 1) OVER (PARTITION BY animals_id ORDER BY acquisition_time))))*3600 AS speed_to,
        cos(ST_Azimuth((lag(geom, 1) OVER (PARTITION BY animals_id ORDER BY acquisition_time))::geography, geom::geography) - ST_Azimuth(geom::geography, (lead(geom, 1) OVER (PARTITION BY animals_id ORDER BY acquisition_time))::geography)) AS rel_angle
      FROM main.gps_data_animals 
      WHERE gps_validity_code = 1) a 
    WHERE 
      rel_angle < -0.98 AND 
      speed_from > 300 AND 
      speed_to > 300);

-- You update spatial views excluding outliers
CREATE OR REPLACE VIEW analysis.view_convex_hulls AS 
SELECT 
  gps_data_animals.animals_id,
  ST_ConvexHull(ST_Collect(gps_data_animals.geom))::geometry(Polygon,4326) AS geom
FROM 
  main.gps_data_animals
WHERE 
  gps_data_animals.gps_validity_code = 1
GROUP BY 
  gps_data_animals.animals_id
ORDER BY 
  gps_data_animals.animals_id;

  CREATE OR REPLACE VIEW analysis.view_gps_locations AS 
SELECT 
  gps_data_animals.gps_data_animals_id,
  gps_data_animals.animals_id, 
  animals.name, 
  timezone('UTC'::text, gps_data_animals.acquisition_time) AS time_utc, 
  animals.sex, 
  lu_age_class.age_class_description, 
  lu_species.species_description, 
  gps_data_animals.geom
FROM 
  main.gps_data_animals, 
  main.animals, 
  lu_tables.lu_age_class, 
  lu_tables.lu_species
WHERE 
  gps_data_animals.animals_id = animals.animals_id AND
  animals.age_class_code = lu_age_class.age_class_code AND 
  animals.species_code = lu_species.species_code AND 
  gps_data_animals.gps_validity_code = 1;

CREATE OR REPLACE VIEW analysis.view_trajectories AS 
SELECT 
  sel_subquery.animals_id,
  st_MakeLine(sel_subquery.geom)::geometry(LineString,4326) AS geom
FROM 
  (SELECT 
    gps_data_animals.animals_id, 
    gps_data_animals.geom, 
    gps_data_animals.acquisition_time
  FROM main.gps_data_animals
  WHERE gps_data_animals.gps_validity_code = 1
  ORDER BY gps_data_animals.animals_id, gps_data_animals.acquisition_time) sel_subquery
GROUP BY sel_subquery.animals_id;

-- Update the import procedure with outlier detection
CREATE OR REPLACE FUNCTION tools.new_gps_data_animals()
RETURNS trigger AS
$BODY$
DECLARE 
thegeom geometry;
BEGIN
IF NEW.longitude IS NOT NULL AND NEW.latitude IS NOT NULL THEN
  thegeom = ST_setsrid(ST_MakePoint(NEW.longitude, NEW.latitude), 4326);
  NEW.geom =thegeom;
  NEW.gps_validity_code = 1;
    IF NOT EXISTS (SELECT study_area FROM env_data.study_area WHERE ST_intersects(ST_Buffer(thegeom,0.1), study_area.geom)) THEN
      NEW.gps_validity_code = 11;
    END IF;
  NEW.pro_com = (SELECT pro_com::integer FROM env_data.adm_boundaries WHERE ST_intersects(geom,thegeom)); 
  NEW.corine_land_cover_code = (SELECT ST_Value(rast, ST_Transform(thegeom, 3035)) FROM env_data.corine_land_cover WHERE ST_Intersects(ST_Transform(thegeom,3035), rast));
  NEW.altitude_srtm = (SELECT ST_Value(rast,thegeom) FROM env_data.srtm_dem WHERE ST_intersects(thegeom, rast));
  NEW.station_id = (SELECT station_id::integer FROM env_data.meteo_stations ORDER BY ST_Distance_Spheroid(thegeom, geom, 'SPHEROID["WGS 84",6378137,298.257223563]') LIMIT 1);
  NEW.roads_dist = (SELECT ST_Distance(thegeom::geography, geom::geography)::integer FROM env_data.roads ORDER BY ST_Distance(thegeom::geography, geom::geography) LIMIT 1);
  NEW.ndvi_modis = (SELECT ST_Value(rast, thegeom)FROM env_data_ts.ndvi_modis WHERE ST_Intersects(thegeom, rast) 
AND EXTRACT(year FROM acquisition_date) = EXTRACT(year FROM NEW.acquisition_time)
AND EXTRACT(month FROM acquisition_date) = EXTRACT(month FROM NEW.acquisition_time)
and EXTRACT(day FROM acquisition_date) = CASE
WHEN EXTRACT(day FROM NEW.acquisition_time) < 11 THEN 1
WHEN EXTRACT(day FROM NEW.acquisition_time) < 21 THEN 11
ELSE 21
END);
ELSE 
NEW.gps_validity_code = 0;
END IF;
RETURN NEW;
END;$BODY$
LANGUAGE plpgsql VOLATILE
COST 100;
COMMENT ON FUNCTION tools.new_gps_data_animals() 
IS 'When called by the trigger insert_gps_locations (raised whenever a new GPS position is uploaded into gps_data_animals) this function gets the new longitude and latitude values and sets the field geom accordingly, computing a set of derived environmental information calculated intersection the GPS position with the environmental ancillary layers. GPS positions outside the study area are tagged as outliers.';