在 C 中实现 8-连通性 Hoshen-Kopelman 算法
Implementing 8-Connectivity Hoshen-Kopelman Algorithm in C
我找到了 here Hoshen-Kopelman 算法的实现,但它只检查左上角的邻居,这意味着对角线连接不被视为连接。
如何改进此代码,使对角线连接也被视为连接?
在下面的示例中,我希望有 1 个对象而不是 7 个对象:
4 5
1 0 1 0 1
0 1 0 1 0
1 0 1 0 0
0 0 1 0 0
--input--
1 0 1 0 1
0 1 0 1 0
1 0 1 0 0
0 0 1 0 0
--output--
1 0 2 0 3
0 4 0 5 0
6 0 7 0 0
0 0 7 0 0
HK reports 7 clusters found
这是实现(可以找到完整代码here):
#include <stdio.h>
#include <stdlib.h>
#include <assert.h>
/* Implementation of Union-Find Algorithm */
/* The 'labels' array has the meaning that labels[x] is an alias for the label x; by
following this chain until x == labels[x], you can find the canonical name of an
equivalence class. The labels start at one; labels[0] is a special value indicating
the highest label already used. */
int* labels;
int n_labels = 0; /* length of the labels array */
/* uf_find returns the canonical label for the equivalence class containing x */
int uf_find(int x)
{
int y = x;
while (labels[y] != y)
y = labels[y];
while (labels[x] != x)
{
int z = labels[x];
labels[x] = y;
x = z;
}
return y;
}
/* uf_union joins two equivalence classes and returns the canonical label of the resulting class. */
int uf_union(int x, int y)
{
return labels[uf_find(x)] = uf_find(y);
}
/* uf_make_set creates a new equivalence class and returns its label */
int uf_make_set(void)
{
labels[0] ++;
assert(labels[0] < n_labels);
labels[labels[0]] = labels[0];
return labels[0];
}
/* uf_intitialize sets up the data structures needed by the union-find implementation. */
void uf_initialize(int max_labels)
{
n_labels = max_labels;
labels = calloc(sizeof(int), n_labels);
labels[0] = 0;
}
/* uf_done frees the memory used by the union-find data structures */
void uf_done(void)
{
n_labels = 0;
free(labels);
labels = 0;
}
/* End Union-Find implementation */
#define max(a,b) (a>b?a:b)
#define min(a,b) (a>b?b:a)
/* print_matrix prints out a matrix that is set up in the "pointer to pointers" scheme
(aka, an array of arrays); this is incompatible with C's usual representation of 2D
arrays, but allows for 2D arrays with dimensions determined at run-time */
void print_matrix(int** matrix, int m, int n)
{
for (int i = 0; i < m; i++)
{
for (int j = 0; j < n; j++)
printf("%3d ", matrix[i][j]);
printf("\n");
}
}
/* Label the clusters in "matrix". Return the total number of clusters found. */
int hoshen_kopelman(int** matrix, int m, int n)
{
uf_initialize(m * n / 2);
/* scan the matrix */
for (int y = 0; y < m; y++)
{
for (int x = 0; x < n; x++)
{
if (matrix[y][x])
{ // if occupied ...
int up = (y == 0 ? 0 : matrix[y - 1][x]); // look up
int left = (x == 0 ? 0 : matrix[y][x - 1]); // look left
switch (!!up + !!left)
{
case 0:
matrix[y][x] = uf_make_set(); // a new cluster
break;
case 1: // part of an existing cluster
matrix[y][x] = max(up, left); // whichever is nonzero is labelled
break;
case 2: // this site binds two clusters
matrix[y][x] = uf_union(up, left);
break;
}
}
}
}
/* apply the relabeling to the matrix */
/* This is a little bit sneaky.. we create a mapping from the canonical labels
determined by union/find into a new set of canonical labels, which are
guaranteed to be sequential. */
int* new_labels = calloc(sizeof(int), n_labels); // allocate array, initialized to zero
for (int i = 0; i < m; i++)
for (int j = 0; j < n; j++)
if (matrix[i][j])
{
int x = uf_find(matrix[i][j]);
if (new_labels[x] == 0)
{
new_labels[0]++;
new_labels[x] = new_labels[0];
}
matrix[i][j] = new_labels[x];
}
int total_clusters = new_labels[0];
free(new_labels);
uf_done();
return total_clusters;
}
/* This procedure checks to see that any occupied neighbors of an occupied site
have the same label. */
void check_labelling(int** matrix, int m, int n)
{
int N, S, E, W;
for (int i = 0; i < m; i++)
for (int j = 0; j < n; j++)
if (matrix[i][j])
{
N = (i == 0 ? 0 : matrix[i - 1][j]);
S = (i == m - 1 ? 0 : matrix[i + 1][j]);
E = (j == n - 1 ? 0 : matrix[i][j + 1]);
W = (j == 0 ? 0 : matrix[i][j - 1]);
assert(N == 0 || matrix[i][j] == N);
assert(S == 0 || matrix[i][j] == S);
assert(E == 0 || matrix[i][j] == E);
assert(W == 0 || matrix[i][j] == W);
}
}
/* The sample program reads in a matrix from standard input, runs the HK algorithm on
it, and prints out the results. The form of the input is two integers giving the
dimensions of the matrix, followed by the matrix elements (with data separated by
whitespace).
a sample input file is the following:
8 8
1 1 1 1 1 1 1 1
0 0 0 0 0 0 0 1
1 0 0 0 0 1 0 1
1 0 0 1 0 1 0 1
1 0 0 1 0 1 0 1
1 0 0 1 1 1 0 1
1 1 1 1 0 0 0 1
0 0 0 1 1 1 0 1
this sample input gives the following output:
--input--
1 1 1 1 1 1 1 1
0 0 0 0 0 0 0 1
1 0 0 0 0 1 0 1
1 0 0 1 0 1 0 1
1 0 0 1 0 1 0 1
1 0 0 1 1 1 0 1
1 1 1 1 0 0 0 1
0 0 0 1 1 1 0 1
--output--
1 1 1 1 1 1 1 1
0 0 0 0 0 0 0 1
2 0 0 0 0 2 0 1
2 0 0 2 0 2 0 1
2 0 0 2 0 2 0 1
2 0 0 2 2 2 0 1
2 2 2 2 0 0 0 1
0 0 0 2 2 2 0 1
HK reports 2 clusters found
*/
int main(int argc, char** argv)
{
int m, n;
int** matrix;
/* Read in the matrix from standard input
The whitespace-deliminated matrix input is preceeded
by the number of rows and number of columns */
while (2 == scanf_s("%d %d", &m, &n))
{ // m = rows, n = columns
matrix = (int**)calloc(m, sizeof(int*));
for (int i = 0; i < m; i++)
{
matrix[i] = (int*)calloc(n, sizeof(int));
for (int j = 0; j < n; j++)
scanf_s("%d", &(matrix[i][j]));
}
printf_s(" --input-- \n");
print_matrix(matrix, m, n);
printf(" --output-- \n");
/* Process the matrix */
int clusters = hoshen_kopelman(matrix, m, n);
/* Output the result */
print_matrix(matrix, m, n);
check_labelling(matrix, m, n);
printf("HK reports %d clusters found\n", clusters);
for (int i = 0; i < m; i++)
free(matrix[i]);
free(matrix);
}
return 0;
}
我尝试按如下所述更改函数 hoshen_kopelman
,但我仍然得到 2 个对象而不是 1 个:
int hoshen_kopelman(int** matrix, int m, int n)
{
uf_initialize(m * n / 2);
/* scan the matrix */
for (int y = 0; y < m; y++)
{
for (int x = 0; x < n; x++)
{
if (matrix[y][x])
{ // if occupied ...
int up = (y == 0 ? 0 : matrix[y - 1][x]); // look up
int left = (x == 0 ? 0 : matrix[y][x - 1]); // look left
// ----------- THE NEW CODE -------------
if (x > 0)
{
if (up == 0 && y > 0) // left+up
up = matrix[y - 1][x - 1];
if (left == 0 && y < m - 1) // left+down
left = matrix[y + 1][x - 1];
}
// ---------- END NEW CODE --------------
switch (!!up + !!left)
{
case 0:
matrix[y][x] = uf_make_set(); // a new cluster
break;
case 1: // part of an existing cluster
matrix[y][x] = max(up, left); // whichever is nonzero is labelled
break;
case 2: // this site binds two clusters
matrix[y][x] = uf_union(up, left);
break;
}
}
}
}
/* apply the relabeling to the matrix */
/* This is a little bit sneaky.. we create a mapping from the canonical labels
determined by union/find into a new set of canonical labels, which are
guaranteed to be sequential. */
int* new_labels = calloc(sizeof(int), n_labels); // allocate array, initialized to zero
for (int i = 0; i < m; i++)
for (int j = 0; j < n; j++)
if (matrix[i][j])
{
int x = uf_find(matrix[i][j]);
if (new_labels[x] == 0)
{
new_labels[0]++;
new_labels[x] = new_labels[0];
}
matrix[i][j] = new_labels[x];
}
int total_clusters = new_labels[0];
free(new_labels);
uf_done();
return total_clusters;
}
现在得到如下输出(我期待1而得到2):
4 5
1 0 1 0 1
0 1 0 1 0
1 0 1 0 0
0 0 1 0 0
--input--
1 0 1 0 1
0 1 0 1 0
1 0 1 0 0
0 0 1 0 0
--output--
1 0 1 0 1
0 1 0 1 0
2 0 1 0 0
0 0 1 0 0
HK reports 2 clusters found
更正代码以检查所有 8 个邻居的正确方法是什么?
我让你误入歧途说要检查左下方。该算法依赖于它正在检查的当前节点在它检查的所有邻居之后。所以你需要检查左、上、左上和右上。您可以使用它代替您的新代码:
if (y > 0)
{
if (left == 0 && x > 0) // left+up
left = matrix[y - 1][x - 1];
if (up == 0 && x < n-1) // right+up
up = matrix[y - 1][x + 1];
}
我找到了 here Hoshen-Kopelman 算法的实现,但它只检查左上角的邻居,这意味着对角线连接不被视为连接。
如何改进此代码,使对角线连接也被视为连接?
在下面的示例中,我希望有 1 个对象而不是 7 个对象:
4 5
1 0 1 0 1
0 1 0 1 0
1 0 1 0 0
0 0 1 0 0
--input--
1 0 1 0 1
0 1 0 1 0
1 0 1 0 0
0 0 1 0 0
--output--
1 0 2 0 3
0 4 0 5 0
6 0 7 0 0
0 0 7 0 0
HK reports 7 clusters found
这是实现(可以找到完整代码here):
#include <stdio.h>
#include <stdlib.h>
#include <assert.h>
/* Implementation of Union-Find Algorithm */
/* The 'labels' array has the meaning that labels[x] is an alias for the label x; by
following this chain until x == labels[x], you can find the canonical name of an
equivalence class. The labels start at one; labels[0] is a special value indicating
the highest label already used. */
int* labels;
int n_labels = 0; /* length of the labels array */
/* uf_find returns the canonical label for the equivalence class containing x */
int uf_find(int x)
{
int y = x;
while (labels[y] != y)
y = labels[y];
while (labels[x] != x)
{
int z = labels[x];
labels[x] = y;
x = z;
}
return y;
}
/* uf_union joins two equivalence classes and returns the canonical label of the resulting class. */
int uf_union(int x, int y)
{
return labels[uf_find(x)] = uf_find(y);
}
/* uf_make_set creates a new equivalence class and returns its label */
int uf_make_set(void)
{
labels[0] ++;
assert(labels[0] < n_labels);
labels[labels[0]] = labels[0];
return labels[0];
}
/* uf_intitialize sets up the data structures needed by the union-find implementation. */
void uf_initialize(int max_labels)
{
n_labels = max_labels;
labels = calloc(sizeof(int), n_labels);
labels[0] = 0;
}
/* uf_done frees the memory used by the union-find data structures */
void uf_done(void)
{
n_labels = 0;
free(labels);
labels = 0;
}
/* End Union-Find implementation */
#define max(a,b) (a>b?a:b)
#define min(a,b) (a>b?b:a)
/* print_matrix prints out a matrix that is set up in the "pointer to pointers" scheme
(aka, an array of arrays); this is incompatible with C's usual representation of 2D
arrays, but allows for 2D arrays with dimensions determined at run-time */
void print_matrix(int** matrix, int m, int n)
{
for (int i = 0; i < m; i++)
{
for (int j = 0; j < n; j++)
printf("%3d ", matrix[i][j]);
printf("\n");
}
}
/* Label the clusters in "matrix". Return the total number of clusters found. */
int hoshen_kopelman(int** matrix, int m, int n)
{
uf_initialize(m * n / 2);
/* scan the matrix */
for (int y = 0; y < m; y++)
{
for (int x = 0; x < n; x++)
{
if (matrix[y][x])
{ // if occupied ...
int up = (y == 0 ? 0 : matrix[y - 1][x]); // look up
int left = (x == 0 ? 0 : matrix[y][x - 1]); // look left
switch (!!up + !!left)
{
case 0:
matrix[y][x] = uf_make_set(); // a new cluster
break;
case 1: // part of an existing cluster
matrix[y][x] = max(up, left); // whichever is nonzero is labelled
break;
case 2: // this site binds two clusters
matrix[y][x] = uf_union(up, left);
break;
}
}
}
}
/* apply the relabeling to the matrix */
/* This is a little bit sneaky.. we create a mapping from the canonical labels
determined by union/find into a new set of canonical labels, which are
guaranteed to be sequential. */
int* new_labels = calloc(sizeof(int), n_labels); // allocate array, initialized to zero
for (int i = 0; i < m; i++)
for (int j = 0; j < n; j++)
if (matrix[i][j])
{
int x = uf_find(matrix[i][j]);
if (new_labels[x] == 0)
{
new_labels[0]++;
new_labels[x] = new_labels[0];
}
matrix[i][j] = new_labels[x];
}
int total_clusters = new_labels[0];
free(new_labels);
uf_done();
return total_clusters;
}
/* This procedure checks to see that any occupied neighbors of an occupied site
have the same label. */
void check_labelling(int** matrix, int m, int n)
{
int N, S, E, W;
for (int i = 0; i < m; i++)
for (int j = 0; j < n; j++)
if (matrix[i][j])
{
N = (i == 0 ? 0 : matrix[i - 1][j]);
S = (i == m - 1 ? 0 : matrix[i + 1][j]);
E = (j == n - 1 ? 0 : matrix[i][j + 1]);
W = (j == 0 ? 0 : matrix[i][j - 1]);
assert(N == 0 || matrix[i][j] == N);
assert(S == 0 || matrix[i][j] == S);
assert(E == 0 || matrix[i][j] == E);
assert(W == 0 || matrix[i][j] == W);
}
}
/* The sample program reads in a matrix from standard input, runs the HK algorithm on
it, and prints out the results. The form of the input is two integers giving the
dimensions of the matrix, followed by the matrix elements (with data separated by
whitespace).
a sample input file is the following:
8 8
1 1 1 1 1 1 1 1
0 0 0 0 0 0 0 1
1 0 0 0 0 1 0 1
1 0 0 1 0 1 0 1
1 0 0 1 0 1 0 1
1 0 0 1 1 1 0 1
1 1 1 1 0 0 0 1
0 0 0 1 1 1 0 1
this sample input gives the following output:
--input--
1 1 1 1 1 1 1 1
0 0 0 0 0 0 0 1
1 0 0 0 0 1 0 1
1 0 0 1 0 1 0 1
1 0 0 1 0 1 0 1
1 0 0 1 1 1 0 1
1 1 1 1 0 0 0 1
0 0 0 1 1 1 0 1
--output--
1 1 1 1 1 1 1 1
0 0 0 0 0 0 0 1
2 0 0 0 0 2 0 1
2 0 0 2 0 2 0 1
2 0 0 2 0 2 0 1
2 0 0 2 2 2 0 1
2 2 2 2 0 0 0 1
0 0 0 2 2 2 0 1
HK reports 2 clusters found
*/
int main(int argc, char** argv)
{
int m, n;
int** matrix;
/* Read in the matrix from standard input
The whitespace-deliminated matrix input is preceeded
by the number of rows and number of columns */
while (2 == scanf_s("%d %d", &m, &n))
{ // m = rows, n = columns
matrix = (int**)calloc(m, sizeof(int*));
for (int i = 0; i < m; i++)
{
matrix[i] = (int*)calloc(n, sizeof(int));
for (int j = 0; j < n; j++)
scanf_s("%d", &(matrix[i][j]));
}
printf_s(" --input-- \n");
print_matrix(matrix, m, n);
printf(" --output-- \n");
/* Process the matrix */
int clusters = hoshen_kopelman(matrix, m, n);
/* Output the result */
print_matrix(matrix, m, n);
check_labelling(matrix, m, n);
printf("HK reports %d clusters found\n", clusters);
for (int i = 0; i < m; i++)
free(matrix[i]);
free(matrix);
}
return 0;
}
我尝试按如下所述更改函数 hoshen_kopelman
,但我仍然得到 2 个对象而不是 1 个:
int hoshen_kopelman(int** matrix, int m, int n)
{
uf_initialize(m * n / 2);
/* scan the matrix */
for (int y = 0; y < m; y++)
{
for (int x = 0; x < n; x++)
{
if (matrix[y][x])
{ // if occupied ...
int up = (y == 0 ? 0 : matrix[y - 1][x]); // look up
int left = (x == 0 ? 0 : matrix[y][x - 1]); // look left
// ----------- THE NEW CODE -------------
if (x > 0)
{
if (up == 0 && y > 0) // left+up
up = matrix[y - 1][x - 1];
if (left == 0 && y < m - 1) // left+down
left = matrix[y + 1][x - 1];
}
// ---------- END NEW CODE --------------
switch (!!up + !!left)
{
case 0:
matrix[y][x] = uf_make_set(); // a new cluster
break;
case 1: // part of an existing cluster
matrix[y][x] = max(up, left); // whichever is nonzero is labelled
break;
case 2: // this site binds two clusters
matrix[y][x] = uf_union(up, left);
break;
}
}
}
}
/* apply the relabeling to the matrix */
/* This is a little bit sneaky.. we create a mapping from the canonical labels
determined by union/find into a new set of canonical labels, which are
guaranteed to be sequential. */
int* new_labels = calloc(sizeof(int), n_labels); // allocate array, initialized to zero
for (int i = 0; i < m; i++)
for (int j = 0; j < n; j++)
if (matrix[i][j])
{
int x = uf_find(matrix[i][j]);
if (new_labels[x] == 0)
{
new_labels[0]++;
new_labels[x] = new_labels[0];
}
matrix[i][j] = new_labels[x];
}
int total_clusters = new_labels[0];
free(new_labels);
uf_done();
return total_clusters;
}
现在得到如下输出(我期待1而得到2):
4 5
1 0 1 0 1
0 1 0 1 0
1 0 1 0 0
0 0 1 0 0
--input--
1 0 1 0 1
0 1 0 1 0
1 0 1 0 0
0 0 1 0 0
--output--
1 0 1 0 1
0 1 0 1 0
2 0 1 0 0
0 0 1 0 0
HK reports 2 clusters found
更正代码以检查所有 8 个邻居的正确方法是什么?
我让你误入歧途说要检查左下方。该算法依赖于它正在检查的当前节点在它检查的所有邻居之后。所以你需要检查左、上、左上和右上。您可以使用它代替您的新代码:
if (y > 0)
{
if (left == 0 && x > 0) // left+up
left = matrix[y - 1][x - 1];
if (up == 0 && x < n-1) // right+up
up = matrix[y - 1][x + 1];
}