gaze  0.1.0
Perform gaze tracking with common webcams.
 All Classes Namespaces Files Functions Variables Typedefs Enumerations Enumerator Properties Friends Groups Pages
gaze_point_calculation.cpp
1 #include "gaze/pipeline_steps/gaze_point_calculation.h"
2 
3 #include <cmath>
4 #include <string>
5 
6 #include "dlib/gui_widgets.h"
7 #include "opencv2/opencv.hpp"
8 #include "yaml-cpp/yaml.h"
9 
10 #include "gaze/util/config.h"
11 #include "gaze/util/data.h"
12 #include "gaze/util/dlibcv.h"
13 #include "gaze/util/pipeline_utils.h"
14 
15 
16 namespace gaze {
17 
18 namespace util {
19 
20 double parse_aspect_ratio(std::string aspect_ratio_string) {
21  double aspect_ratio;
22  int delim = aspect_ratio_string.find(":");
23  if (delim < 0) {
24  delim = aspect_ratio_string.find("/");
25  }
26  if (delim > 0) {
27  double width_rate = std::stod(aspect_ratio_string.substr(0, delim));
28  double height_rate = std::stod(aspect_ratio_string.substr(delim + 1));
29  aspect_ratio = width_rate / height_rate;
30  } else {
31  aspect_ratio = std::stod(aspect_ratio_string);
32  }
33  return aspect_ratio;
34 }
35 
36 double clamp(double value, double min, double max) {
37  if (value < min) {
38  return min;
39  } else if (value > max) {
40  return max;
41  }
42  return value;
43 }
44 
45 } // namespace util
46 
47 namespace pipeline {
48 
49 GazePointCalculation::GazePointCalculation()
50  : eye_ball_centers(2) {
51  YAML::Node config = util::get_config(this->number);
52  YAML::Node meta_config = util::get_config()["meta"];
53 
54  // Step configuration
55  this->name = config["name"] ?
56  config["name"].as<std::string>() : "GazePoint";
57 
58  if (config["eye_ball_centers"]) {
59  this->eye_ball_centers =
60  config["eye_ball_centers"].as<std::vector<cv::Vec3d>>();
61  } else {
62  this->eye_ball_centers[0] = cv::Vec3d(-0.029205, 0.0327, -0.0395);
63  this->eye_ball_centers[1] = cv::Vec3d(0.029205, 0.0327, -0.0395);
64  }
65 
66  if (config["model"]) {
67  this->model = config["model"].as<std::vector<cv::Vec3d>>();
68  this->landmark_indices = config["landmark_indices"].as<std::vector<decltype(util::Data::landmarks.num_parts())>>(); // NOLINT
69  } else {
70  this->model = {
71  {0.0, 0.0, 0.0},
72  {0.0, -0.0636, -0.0125},
73  {-0.0433, 0.0327, -0.026},
74  {0.0433, 0.0327, -0.026},
75  {-0.0289, -0.0289, -0.0241},
76  {0.0289, -0.0289, -0.0241}
77  };
78  }
79 
80  // Camera configuration
81  YAML::Node camera_config = meta_config["camera"];
82  double sensor_aspect_ratio = camera_config["sensor_aspect_ratio"] ?
84  camera_config["sensor_aspect_ratio"].as<std::string>())
85  : 16. / 9.;
86 
87  this->set_sensor_size(
88  camera_config["sensor_size"] ?
89  camera_config["sensor_size"].as<double>() : 0.00635,
90  sensor_aspect_ratio);
91 
92  this->camera_matrix = camera_config["camera_matrix"].as<cv::Mat>();
93  this->distortion_coefficients =
94  camera_config["distortion_coefficients"].as<cv::Mat>();
95 
96  this->pixel_width = this->sensor_width /
97  camera_config["resolution"]["width"].as<double>();
98  this->pixel_height = this->sensor_height /
99  camera_config["resolution"]["height"].as<double>();
100 
101  this->focal_length = camera_config["focal_length"] ?
102  camera_config["focal_length"].as<double>() : 0.01;
103 
104  this->camera_offset_x = camera_config["position"]["x"].as<double>();
105  this->camera_offset_y = camera_config["position"]["y"].as<double>();
106 
107  // Screen configuration
108  YAML::Node screen_config = meta_config["screen"];
109  this->screen_width_m =
110  screen_config["measurements"]["width"].as<double>();
111  this->screen_height_m =
112  screen_config["measurements"]["height"].as<double>();
113  this->screen_width_px =
114  screen_config["resolution"]["width"].as<int>();
115  this->screen_height_px =
116  screen_config["resolution"]["height"].as<int>();
117 
118  if (meta_config["target"]) {
119  this->target_width = meta_config["target"]["width"].as<int>();
120  this->target_height = meta_config["target"]["height"].as<int>();
121  } else {
122  this->target_width = this->screen_width_px;
123  this->target_height = this->screen_height_px;
124  }
125 }
126 
127 double GazePointCalculation::calculate_distance(
128  const dlib::point& p0, const dlib::point& p1,
129  double expected_model_distance) {
130  double interex_dist_px = dlib::length(p0 - p1);
131  double distance = (this->focal_length * expected_model_distance) /
132  (interex_dist_px * this->pixel_width);
133  return distance;
134 }
135 
136 cv::Matx34d GazePointCalculation::invertProjection(
137  const std::vector<cv::Vec3d>& landmarks) {
138  cv::Matx34d transformation;
139  cv::Mat inliers;
140  cv::estimateAffine3D(landmarks, this->model, transformation, inliers);
141  return transformation;
142 }
143 
144 std::vector<cv::Vec3d> GazePointCalculation::unprojectPoints(
145  const std::vector<cv::Vec2d>& points, const cv::Vec3d& translation,
146  const cv::Matx33d& rotation, double distance) {
147  // Half turn about up-vector to account for camera mirror image vs. model
148  cv::Matx33d rcam;
149  rcam << -1, 0, 0,
150  0, 1, 0,
151  0, 0, -1;
152  std::vector<cv::Vec2d> restored_points;
153  cv::undistortPoints(points, restored_points,
154  this->camera_matrix, this->distortion_coefficients);
155  std::vector<cv::Vec3d> unprojectedPoints;
156  for (const cv::Vec2d& point : restored_points) {
157  cv::Vec3d unprojected(point[0], point[1], 1);
158  unprojectedPoints.push_back(
159  rcam * rotation * ((unprojected / distance) - translation));
160  }
161  return unprojectedPoints;
162 }
163 
164 cv::Vec3d GazePointCalculation::get_model_to_camera_dir(
165  const cv::Vec3d& translation, const cv::Matx33d& rotation) {
166  return cv::normalize(cv::Vec3d(0, 0, 0) - rotation.t() * translation);
167 }
168 
169 cv::Vec3d GazePointCalculation::get_camera_pos(
170  const cv::Vec3d& model_to_camera_dir, double distance) {
171  return distance * model_to_camera_dir;
172 }
173 
174 std::vector<cv::Vec3d> GazePointCalculation::get_screen_corners(
175  const cv::Vec3d& camera_pos, const cv::Matx33d& rotation) {
176  cv::Vec3d screen_tl =
177  cv::Vec3d(-this->camera_offset_x, -this->camera_offset_y, 0);
178  cv::Vec3d screen_tr = screen_tl + cv::Vec3d(this->screen_width_m, 0, 0);
179  cv::Vec3d screen_br = screen_tr + cv::Vec3d(0, this->screen_height_m, 0);
180  cv::Vec3d screen_bl = screen_br + cv::Vec3d(-this->screen_width_m, 0, 0);
181  auto transform = [&rotation, &camera_pos] (cv::Vec3d in) -> cv::Vec3d {
182  return rotation.t() * in + camera_pos;
183  };
184  return {transform(screen_tl), transform(screen_tr),
185  transform(screen_br), transform(screen_bl)};
186 }
187 
188 cv::Matx32d GazePointCalculation::calculate_gaze_point(
189  const cv::Vec3d& eye_ball_center, const cv::Vec3d& pupil,
190  const cv::Vec3d& screen_a, const cv::Vec3d& screen_b,
191  const cv::Vec3d& screen_c) {
192  cv::Matx33d raycast;
193  cv::Mat components[] = {cv::Mat(eye_ball_center - pupil),
194  cv::Mat(screen_b - screen_a),
195  cv::Mat(screen_c - screen_a)};
196  cv::hconcat(components, 3, raycast);
197  cv::Vec3d tuv = (raycast.inv() * (eye_ball_center - screen_a));
198  cv::Vec3d gaze_point = eye_ball_center + (pupil - eye_ball_center) * tuv[0];
199 
200  cv::Matx32d result;
201  cv::Mat vecs[] = {cv::Mat(gaze_point), cv::Mat(tuv)};
202  cv::hconcat(vecs, 2, result);
203  return result;
204 }
205 
206 void GazePointCalculation::set_sensor_size(
207  double sensor_diagonal,
208  double aspect_ratio) {
209  this->sensor_height = std::sqrt(sensor_diagonal * sensor_diagonal
210  / (1 + aspect_ratio * aspect_ratio));
211  this->sensor_width = aspect_ratio * this->sensor_height;
212 }
213 
214 void GazePointCalculation::process(util::Data& data) {
215  if (data.landmarks.num_parts() < 5) {
216  return;
217  }
218 
219  // Convert landmarks to OpenCV data structures
220  std::vector<cv::Vec3d> landmarks;
221  for (auto i : this->landmark_indices) {
222  cv::Vec3d landmark(data.landmarks.part(i).x(), data.landmarks.part(i).y(),
223  0);
224  landmarks.push_back(landmark);
225  }
226 
227  // Project pupils into model
228  cv::Matx34d transformation = this->invertProjection(landmarks);
229 
232 
233  for (auto i = decltype(data.pupils.size()){0};
234  i < data.pupils.size(); ++i) {
235  dlib::point eye = data.centers[i] + eyes[i].rect.tl_corner();
236  data.pupils[i] = transformation * cv::Vec4d(eye.x(), eye.y(), 0, 1);
237  }
238 
239  // Calculate distance
240  double distance = this->calculate_distance(
241  data.landmarks.part(36), data.landmarks.part(45), 0.08671);
242  cv::Matx33d R;
243  cv::Rodrigues(data.head_rotation, R);
244 
245  cv::Vec3d model_to_camera_dir =
246  this->get_model_to_camera_dir(data.head_translation, R);
247  cv::Vec3d camera_pos = this->get_camera_pos(model_to_camera_dir, distance);
248 
249  std::vector<cv::Vec3d> screen_tl_tr_br_bl =
250  this->get_screen_corners(camera_pos, R);
251 
252  // Ray cast
253  cv::Vec2d screen_coord_coeffs(0, 0);
254  for (auto i = decltype(data.gaze_points.size()){0};
255  i < data.gaze_points.size(); ++i) {
256  cv::Matx32d gaze_point = this->calculate_gaze_point(
257  this->eye_ball_centers[i], data.pupils[i],
258  screen_tl_tr_br_bl[0], screen_tl_tr_br_bl[1], screen_tl_tr_br_bl[3]);
259  data.gaze_points[i] =
260  cv::Vec3d(gaze_point(0, 0), gaze_point(1, 0), gaze_point(2, 0));
261  screen_coord_coeffs[0] += gaze_point(1, 1);
262  screen_coord_coeffs[1] += gaze_point(1, 2);
263  }
264  screen_coord_coeffs /= 2;
265  // swap left and right
266  screen_coord_coeffs[0] = 1 - util::clamp(screen_coord_coeffs[0], 0, 1);
267  screen_coord_coeffs[1] = util::clamp(screen_coord_coeffs[1], 0, 1);
268 
270  this->target_width * screen_coord_coeffs[0],
271  this->target_height * screen_coord_coeffs[1]);
272 }
273 
274 void GazePointCalculation::visualize(util::Data& data) {
275  // TODO(shoeffner): Add pipeline step to visualize 2D gaze point / area
276  this->widget->clear_overlay();
277  if (data.landmarks.num_parts() < 5) {
278  return;
279  }
280 
281  // Recalculate distance, etc.
282  double distance = this->calculate_distance(
283  data.landmarks.part(36), data.landmarks.part(45), 0.08671);
284  cv::Matx33d R;
285  cv::Rodrigues(data.head_rotation, R);
286 
287  // Move landmarks into model coordinates for an intuitive visualization of
288  // where the direction of the camera plane is
289  std::vector<cv::Vec2d> image_landmarks;
290  for (auto i = decltype(data.landmarks.num_parts()){0};
291  i < data.landmarks.num_parts(); ++i) {
292  image_landmarks.push_back(
293  cv::Vec2d(data.landmarks.part(i).x(), data.landmarks.part(i).y()));
294  }
295  std::vector<cv::Vec3d> world_landmarks = this->unprojectPoints(
296  image_landmarks, data.head_translation, R, distance);
297 
298  // Find camera direction
299  cv::Vec3d camera_dir =
300  this->get_model_to_camera_dir(data.head_translation, R);
301 
302  cv::Vec3d camera_pos = this->get_camera_pos(camera_dir, distance);
303 
304  // Calculate screen corners
305  std::vector<cv::Vec3d> screen_corners =
306  this->get_screen_corners(camera_pos, R);
307 
308  // Helpers to draw points
310  overlay_dots;
311 
312  auto add_to_overlay =
313  [&overlay_dots] (std::vector<cv::Vec3d> points,
314  dlib::rgb_pixel color) -> void {
315  for (auto p : points) {
316  overlay_dots.push_back({{p[0], p[1], p[2]}, color});
317  }
318  };
319 
320  add_to_overlay(this->model, {255, 255, 0});
321  add_to_overlay(data.pupils, {0, 255, 255});
322  add_to_overlay(this->eye_ball_centers, {255, 0, 255});
323  add_to_overlay(world_landmarks, {0, 255, 0});
324 
326  util::get_eyes_chip_details(data.landmarks);
327  std::vector<dlib::point> pupils_image = {data.centers[0], data.centers[1]};
328  for (auto i = decltype(pupils_image.size()){0}; i < pupils_image.size();
329  ++i) {
330  pupils_image[i] += chips[i].rect.tl_corner();
331  }
332  std::vector<cv::Vec3d> pupils_world =
333  this->unprojectPoints({cv::Vec2d(pupils_image[0].x(), pupils_image[0].y()),
334  cv::Vec2d(pupils_image[1].x(), pupils_image[1].y())},
335  data.head_translation, R, distance);
336 
337  add_to_overlay(pupils_world, {255, 255, 255});
338 
339  // Visualize camera position
340  cv::Vec3d camera = this->get_camera_pos(camera_dir, distance);
341  overlay_dots.push_back({{camera[0], camera[1], camera[2]},
342  dlib::rgb_pixel(255, 255, 255)});
343 
344  this->widget->add_overlay(overlay_dots);
345 
346  auto to_line = [](const cv::Vec3d& p0, const cv::Vec3d& p1,
347  dlib::rgb_pixel color) ->
348  dlib::perspective_display::overlay_line {
349  return dlib::perspective_display::overlay_line(
350  {p0[0], p0[1], p0[2]}, {p1[0], p1[1], p1[2]}, color);
351  };
352 
353  // Draw coordinate axes
354  cv::Vec3d origin(0, 0, 0);
355  cv::Vec3d xdir(.1, 0, 0);
356  cv::Vec3d ydir(0, .1, 0);
357  cv::Vec3d zdir(0, 0, .1);
358 
359  this->widget->add_overlay({
360  to_line(origin, xdir, {255, 0, 0}),
361  to_line(origin, ydir, {0, 255, 0}),
362  to_line(origin, zdir, {0, 0, 255}),
363  to_line(origin, origin + camera_dir, {255, 255, 0})});
364 
365  this->widget->add_overlay({
366  to_line(screen_corners[0], screen_corners[1], {255, 255, 0}),
367  to_line(screen_corners[1], screen_corners[2], {255, 255, 0}),
368  to_line(screen_corners[2], screen_corners[3], {255, 255, 0}),
369  to_line(screen_corners[3], screen_corners[0], {255, 255, 0})});
370 
371  this->widget->add_overlay({
372  to_line(this->eye_ball_centers[0], data.pupils[0], {0, 255, 255}),
373  to_line(this->eye_ball_centers[1], data.pupils[1], {0, 255, 255})});
374 
375  this->widget->add_overlay({
376  to_line(this->eye_ball_centers[0], data.gaze_points[0], {255, 255, 255}),
377  to_line(this->eye_ball_centers[1], data.gaze_points[1],
378  {255, 255, 255})});
379 }
380 
381 } // namespace pipeline
382 
383 } // namespace gaze
void Rodrigues(InputArray src, OutputArray dst, OutputArray jacobian=noArray())
_Tp max() const
std::vector< dlib::chip_details > get_eyes_chip_details(const dlib::full_object_detection object_detection)
double parse_aspect_ratio(std::string aspect_ratio_string)
void distance(_InputIterator __first, _InputIterator __last, _Distance &__n)
int estimateAffine3D(InputArray src, InputArray dst, OutputArray out, OutputArray inliers, double ransacThreshold=3, double confidence=0.99)
dlib::array< dlib::point > centers
Definition: data.h:75
Matx< double, n, m > inv(int method=DECOMP_LU, bool *p_is_ok=NULL) const
std::vector< cv::Vec3d > gaze_points
Definition: data.h:100
cv::Vec2d estimated_gaze_point
Definition: data.h:105
Matx< double, n, m > t() const
basic_string substr(size_type __pos=0, size_type __n=npos) const
cv::Mat head_rotation
Definition: data.h:82
size_type find(const char *__s, size_type __pos, size_type __n) const noexcept
void push_back(const value_type &__x)
dlib::full_object_detection landmarks
Definition: data.h:64
static Vec< _Tp, cn > normalize(const Vec< _Tp, cn > &v)
_OutputIterator transform(_InputIterator __first, _InputIterator __last, _OutputIterator __result, _UnaryOperation __unary_op)
cv::Mat head_translation
Definition: data.h:87
Vec< double, 2 > Vec2d
double clamp(double value, double min, double max)
void hconcat(const Mat *src, size_t nsrc, OutputArray dst)
Wraps the data acquired per frame into a single instance.
Definition: data.h:27
Vec< double, 3 > Vec3d
void undistortPoints(InputArray src, OutputArray dst, InputArray cameraMatrix, InputArray distCoeffs, InputArray R=noArray(), InputArray P=noArray())
Vec< double, 4 > Vec4d
_Tp min() const
std::vector< cv::Vec3d > pupils
Definition: data.h:95
complex< _Tp > sqrt(const complex< _Tp > &)
YAML::Node get_config()
Definition: config.in.cpp:12