// Copyright (C) 2010 Davis E. King (davis@dlib.net)
// License: Boost Software License See LICENSE.txt for the full license.
#undef DLIB_EDGE_LIST_GrAPHS_ABSTRACT_Hh_
#ifdef DLIB_EDGE_LIST_GrAPHS_ABSTRACT_Hh_
#include <vector>
#include "../string.h"
#include "sample_pair_abstract.h"
#include "ordered_sample_pair_abstract.h"
namespace dlib
{
// ----------------------------------------------------------------------------------------
template <
typename vector_type,
typename distance_function_type,
typename alloc,
typename T
>
void find_percent_shortest_edges_randomly (
const vector_type& samples,
const distance_function_type& dist_funct,
const double percent,
const unsigned long num,
const T& random_seed,
std::vector<sample_pair, alloc>& out
);
/*!
requires
- 0 < percent <= 1
- num > 0
- random_seed must be convertible to a string by dlib::cast_to_string()
- dist_funct(samples[i], samples[j]) must be a valid expression that evaluates
to a floating point number
ensures
- This function randomly samples the space of pairs of integers between
0 and samples.size()-1 inclusive. For each of these pairs, (i,j), a
sample_pair is created as follows:
sample_pair(i, j, dist_funct(samples[i], samples[j]))
num such sample_pair objects are generated, duplicates and pairs with distance
values == infinity are removed, and then the top percent of them with the
smallest distance are stored into out.
- #out.size() <= num*percent
- contains_duplicate_pairs(#out) == false
- for all valid i:
- #out[i].distance() == dist_funct(samples[#out[i].index1()], samples[#out[i].index2()])
- #out[i].distance() < std::numeric_limits<double>::infinity()
- random_seed is used to seed the random number generator used by this
function.
!*/
// ----------------------------------------------------------------------------------------
template <
typename vector_type,
typename distance_function_type,
typename alloc,
typename T
>
void find_approximate_k_nearest_neighbors (
const vector_type& samples,
const distance_function_type& dist_funct,
const unsigned long k,
const unsigned long num,
const T& random_seed,
std::vector<sample_pair, alloc>& out
);
/*!
requires
- k > 0
- num > 0
- random_seed must be convertible to a string by dlib::cast_to_string()
- dist_funct(samples[i], samples[j]) must be a valid expression that evaluates
to a floating point number
ensures
- This function computes an approximate form of k nearest neighbors. As num grows
larger the output of this function converges to the output of the
find_k_nearest_neighbors() function defined below.
- Specifically, this function randomly samples the space of pairs of integers between
0 and samples.size()-1 inclusive. For each of these pairs, (i,j), a
sample_pair is created as follows:
sample_pair(i, j, dist_funct(samples[i], samples[j]))
num such sample_pair objects are generated and then exact k-nearest-neighbors
is performed amongst these sample_pairs and the results are stored into #out.
Note that samples with an infinite distance between them are considered to
be not connected at all.
- contains_duplicate_pairs(#out) == false
- for all valid i:
- #out[i].distance() == dist_funct(samples[#out[i].index1()], samples[#out[i].index2()])
- #out[i].distance() < std::numeric_limits<double>::infinity()
- random_seed is used to seed the random number generator used by this
function.
!*/
// ----------------------------------------------------------------------------------------
template <
typename vector_type,
typename distance_function_type,
typename alloc
>
void find_k_nearest_neighbors (
const vector_type& samples,
const distance_function_type& dist_funct,
const unsigned long k,
std::vector<sample_pair, alloc>& out
);
/*!
requires
- k > 0
- dist_funct(samples[i], samples[j]) must be a valid expression that evaluates
to a floating point number
ensures
- #out == a set of sample_pair objects that represent all the k nearest
neighbors in samples according to the given distance function dist_funct.
Note that samples with an infinite distance between them are considered to
be not connected at all.
- for all valid i:
- #out[i].distance() == dist_funct(samples[#out[i].index1()], samples[#out[i].index2()])
- #out[i].distance() < std::numeric_limits<double>::infinity()
- contains_duplicate_pairs(#out) == false
!*/
// ----------------------------------------------------------------------------------------
template <
typename vector_type
>
bool contains_duplicate_pairs (
const vector_type& pairs
);
/*!
requires
- vector_type == a type with an interface compatible with std::vector and it
must in turn contain objects with an interface compatible with
dlib::sample_pair or dlib::ordered_sample_pair.
ensures
- if (pairs contains any elements that are equal according to operator==) then
- returns true
- else
- returns false
!*/
// ----------------------------------------------------------------------------------------
template <
typename vector_type
>
unsigned long max_index_plus_one (
const vector_type& pairs
);
/*!
requires
- vector_type == a type with an interface compatible with std::vector and it
must in turn contain objects with an interface compatible with
dlib::sample_pair or dlib::ordered_sample_pair.
ensures
- if (pairs.size() == 0) then
- returns 0
- else
- returns a number N such that:
- for all i: pairs[i].index1() < N && pairs[i].index2() < N
- for some j: pairs[j].index1()+1 == N || pairs[j].index2()+1 == N
!*/
// ----------------------------------------------------------------------------------------
template <
typename vector_type
>
void remove_long_edges (
vector_type& pairs,
double distance_threshold
);
/*!
requires
- vector_type == a type with an interface compatible with std::vector and it
must in turn contain objects with an interface compatible with
dlib::sample_pair or dlib::ordered_sample_pair.
ensures
- Removes all elements of pairs that have a distance value greater than the
given threshold.
- #pairs.size() <= pairs.size()
!*/
// ----------------------------------------------------------------------------------------
template <
typename vector_type
>
void remove_short_edges (
vector_type& pairs,
double distance_threshold
);
/*!
requires
- vector_type == a type with an interface compatible with std::vector and it
must in turn contain objects with an interface compatible with
dlib::sample_pair or dlib::ordered_sample_pair.
ensures
- Removes all elements of pairs that have a distance value less than the
given threshold.
- #pairs.size() <= pairs.size()
!*/
// ----------------------------------------------------------------------------------------
template <
typename vector_type
>
void remove_percent_longest_edges (
vector_type& pairs,
double percent
);
/*!
requires
- 0 <= percent < 1
- vector_type == a type with an interface compatible with std::vector and it
must in turn contain objects with an interface compatible with
dlib::sample_pair or dlib::ordered_sample_pair.
ensures
- Removes the given upper percentage of the longest edges in pairs. I.e.
this function removes the long edges from pairs.
- #pairs.size() == (1-percent)*pairs.size()
!*/
// ----------------------------------------------------------------------------------------
template <
typename vector_type
>
void remove_percent_shortest_edges (
vector_type& pairs,
double percent
);
/*!
requires
- 0 <= percent < 1
- vector_type == a type with an interface compatible with std::vector and it
must in turn contain objects with an interface compatible with
dlib::sample_pair or dlib::ordered_sample_pair.
ensures
- Removes the given upper percentage of the shortest edges in pairs. I.e.
this function removes the short edges from pairs.
- #pairs.size() == (1-percent)*pairs.size()
!*/
// ----------------------------------------------------------------------------------------
template <
typename vector_type
>
void remove_duplicate_edges (
vector_type& pairs
);
/*!
requires
- vector_type == a type with an interface compatible with std::vector and it
must in turn contain objects with an interface compatible with
dlib::sample_pair or dlib::ordered_sample_pair.
ensures
- Removes any duplicate edges from pairs. That is, for all elements of pairs,
A and B, such that A == B, only one of A or B will be in pairs after this
function terminates.
- #pairs.size() <= pairs.size()
- is_ordered_by_index(#pairs) == true
- contains_duplicate_pairs(#pairs) == false
!*/
// ----------------------------------------------------------------------------------------
template <
typename vector_type
>
bool is_ordered_by_index (
const vector_type& edges
);
/*!
requires
- vector_type == a type with an interface compatible with std::vector and it
must in turn contain objects with an interface compatible with
dlib::sample_pair or dlib::ordered_sample_pair.
ensures
- returns true if and only if the contents of edges are in sorted order
according to order_by_index(). That is, we return true if calling
std::stable_sort(edges.begin(), edges.end(), &order_by_index<T>) would not
change the ordering of elements of edges.
!*/
// ----------------------------------------------------------------------------------------
template <
typename alloc1,
typename alloc2
>
void find_neighbor_ranges (
const std::vector<ordered_sample_pair,alloc1>& edges,
std::vector<std::pair<unsigned long, unsigned long>,alloc2>& neighbors
);
/*!
requires
- is_ordered_by_index(edges) == true
(i.e. edges is sorted so that all the edges for a particular node are grouped
together)
ensures
- This function takes a graph, represented by its list of edges, and finds the
ranges that contain the edges for each node in the graph. In particular,
#neighbors[i] will tell you which edges correspond to the ith node in the
graph.
- #neighbors.size() == max_index_plus_one(edges)
(i.e. neighbors will have an entry for each node in the graph defined by the
list of edges)
- for all valid i:
- all elements of edges such that their index1() value == i are in the
range [neighbors[i].first, neighbors[i].second). That is, for all k such
that neighbors[i].first <= k < neighbors[i].second:
- edges[k].index1() == i.
- all edges outside this range have an index1() value != i
!*/
// ----------------------------------------------------------------------------------------
template <
typename alloc1,
typename alloc2
>
void convert_unordered_to_ordered (
const std::vector<sample_pair,alloc1>& edges,
std::vector<ordered_sample_pair,alloc2>& out_edges
);
/*!
ensures
- interprets edges a defining an undirected graph.
- This function populates out_edges with a directed graph that represents the
same graph as the one in edges. In particular, this means that for all valid
i we have the following:
- if (edges[i].index1() != edges[i].index2()) then
- #out_edges contains two edges corresponding to edges[i]. They
represent the two directions of this edge. The distance value from
edges[i] is also copied into the output edges.
- else
- #out_edges contains one edge corresponding to edges[i] since this is
a self edge. The distance value from edges[i] is also copied into
the output edge.
- max_index_plus_one(edges) == max_index_plus_one(#out_edges)
(i.e. both graphs have the same number of nodes)
- In all but the most trivial cases, we will have is_ordered_by_index(#out_edges) == false
- contains_duplicate_pairs(#out_edges) == contains_duplicate_pairs(edges)
!*/
// ----------------------------------------------------------------------------------------
}
#endif // DLIB_EDGE_LIST_GrAPHS_ABSTRACT_Hh_