C - 从函数返回指针后未设置值是结构指针
C - values is struct pointer are not set after pointer is returned from function
我正在尝试将我在 Java 中编写的 kd-tree 上的 knn(k 最近邻搜索)移植到 C。
Java 输出,如预期:
Nearest to Key: 6.0,5.0,4.0
Key:6.0,5.0,4.0,min distance:0.0
Key:5.0,4.0,3.0,min distance:3.0
Key:7.0,6.0,5.0,min distance:3.0
Key:4.0,3.0,2.0,min distance:12.0
Key:3.0,2.0,1.0,min distance:27.0
Java 代码,class(这是一个快速实现,只是为了在我开始移植之前让算法工作):
class kd_tree {
public int DEFAULT_NUMBER_OF_DIMENSIONS = 1;
static int current_number_of_data_points = 0;
static int current_global_median = 0;
kd_tree left;
kd_tree right;
float[] data;
private int k_dimensions;
float distance_to_neighbor;
}
Java knn方法:
/*===========================================================================
Function knn, knn algorithm using kd-tree & minimumNeighbor function.
Description:
==========================================================*/
public static kd_tree[] knn( kd_tree root
, float[] data_point
, int depth
, int k_dimension
, int number_of_nearest_neighbors) {
kd_tree[] all_nearests = new kd_tree[current_number_of_data_points];
kd_tree[] n_nearests = new kd_tree[current_number_of_data_points];
if (root != null) {
int nearests_counter = 0;
/* now lets traverse the tree inorder & calculate distances from the
query point based on Morris Traversal algorithm that does not
use any stacks or no recursion */
kd_tree cur = root;
kd_tree pre;
while (cur != null) {
if (cur.left == null) {
/* debugging System.out.println(cur.data[0]);
calculate distance */
cur.distance_to_neighbor = n_dimensional_euclidean(data_point, cur.data);
all_nearests[nearests_counter] = cur;
nearests_counter++;
cur = cur.right; // move to next right node
} else { // has a left subtree
pre = cur.left;
while (pre.right != null && pre.right != cur) { // find rightmost
pre = pre.right;
}
/* Make current as the right child of its inorder
predecessor */
if (pre.right == null) {
pre.right = cur;
cur = cur.left;
} else {
pre.right = null;
// debugging printf("%d ", current->data);
// calculate distance
cur.distance_to_neighbor = n_dimensional_euclidean( data_point, cur.data);
all_nearests[nearests_counter] = cur;
nearests_counter++;
cur = cur.right;
}
}
}//end while
// base cases
// sort from least to greatest
insertion_sort_based_on_distance(all_nearests);
// return on specified number_of_nearest_neighbors
for (int i = 0; i < number_of_nearest_neighbors; i++) {
n_nearests[i]=all_nearests[i];
}
}
return n_nearests;
}
相关的C代码片段:
#include <stdlib.h>
#ifndef KDTREE_H
#define KDTREE_H
/*
* Representation of a kd tree
*/
typedef struct tree_ {
struct tree* left;
struct tree* right;
float* info;
float distance_to_neighbor;
} tree;
// pre-allocated tree nodes array
static tree tree_space[KD_TREE_HEAP_SIZE];
// the knn algorithm will require memory space upto tree size
static tree* processing_space [KD_TREE_HEAP_SIZE];
tree* knn( tree* root
, float data_point[]
, int depth
, const int k_dimensions
, int number_of_nearest_neighbors
, int total_number_of_elements
, tree* n_nearests);
相关knn实现,kdtree.c:
#include <stdio.h>
#include <stdlib.h>
#include <string.h>
#include <float.h>
#include "kdtree.h"
#include "sorting_utility.h"
static int current_number_of_kd_tree_nodes = 0;
static int current_number_of_data_points = 0;
/*===========================================================================
Function knn, knn algorithm using kd-tree.
Description:
==========================================================*/
tree* knn( tree* root
, float data_point[]
, int depth
, const int k_dimensions
, int number_of_nearest_neighbors
, tree* n_nearests)
{
tree all_nearests[current_number_of_kd_tree_nodes];
tree* source_data_end_ptr = NULL;
tree* source_data_start_ptr = NULL;
tree* destination_data_start_ptr = NULL;
tree* destination_data_end_ptr = NULL;
if(NULL != root && NULL != n_nearests)
{
int nearests_counter = 0;
// now lets traverse the tree inorder & calculate distances
// from the query point
tree* cur = root;
tree* pre;
while(NULL != cur)
{
if(NULL == cur->left)
{
cur->distance_to_neighbor = n_dimensional_euclidean(data_point, cur->info);
processing_space[nearests_counter] = *cur;
nearests_counter++;
cur = cur->right; // move to next right node
}
else
{
pre = cur->left;
while(pre->right != NULL && pre->right != cur)
{ // find rightmost
pre = pre->right;
}
/* Make current as the right child of its inorder predecessor */
if(pre->right == NULL)
{
pre->right = cur;
cur = cur->left;
}
else
{
pre->right = NULL;
// calculate distance
cur->distance_to_neighbor = n_dimensional_euclidean(data_point, cur->info);
processing_space[nearests_counter] = *cur;
nearests_counter++;
cur = cur->right;
}
}
} // end while
// JUST FOR DEBUGGING START
printf ("***For debugging before sort:\n");
for(int i = 0; i < current_number_of_kd_tree_nodes; i++)
{
printf("{");
for(int c = 0; c < k_dimensions; c++)
{
if(NULL != processing_space[i].info)
{
printf("%f,", processing_space[i].info[c]);
}
else
{
break;
}
} // end for
printf("} ");
printf("min_distance=%f\n", processing_space[i].distance_to_neighbor);
} // end for
// JUST FOR DEBUGGING END
/* copy relevant range up current_number_of_kd_tree_nodes before sort,
* in order to avoid sorting beyond range that does not have any data */
source_data_start_ptr = &processing_space[0];
destination_data_start_ptr = &all_nearests[0];
destination_data_end_ptr = &all_nearests[current_number_of_kd_tree_nodes - 1];
while(destination_data_start_ptr <= destination_data_end_ptr)
{
*destination_data_start_ptr = *source_data_start_ptr;
source_data_start_ptr++;
destination_data_start_ptr++;
}
// sort based on distance from query point
quick_sort(all_nearests, 0, current_number_of_kd_tree_nodes - 1);
// JUST FOR DEBUGGING START
printf("***For debugging after sort\n");
for (int i = 0; i < current_number_of_kd_tree_nodes; i++)
{
printf("{");
for (int c = 0; c < k_dimensions; c++)
{
if (NULL != all_nearests[i].info)
{
printf ("%f,", all_nearests[i].info[c]);
}
else
{
break;
}
} // end for
printf("} ");
printf("min_distance=%f\n", all_nearests[i].distance_to_neighbor);
} // end for
// JUST FOR DEBUGGING END
/* copy only the n_nearest & ignore/ do NOT copy any empty tree nodes */
// reuse pointers
destination_data_end_ptr = &n_nearests[number_of_nearest_neighbors - 1];
source_data_end_ptr = &all_nearests[current_number_of_kd_tree_nodes - 1];
source_data_start_ptr = all_nearests;
int counter = 0;
while(counter < number_of_nearest_neighbors && source_data_start_ptr < source_data_end_ptr)
{
// do NOT copy any empty tree nodes
if(source_data_start_ptr != NULL && source_data_start_ptr->info != NULL)
{
/* ATTENTION: i checked with debugger & values for
(distance_to_neighbor,info,left,right were not zeroed or empty */
float distance_to_neighbor = source_data_start_ptr->distance_to_neighbor;
float* info = source_data_start_ptr->info;
tree* left = source_data_start_ptr->left;
tree* right = source_data_start_ptr->right;
n_nearests[counter].distance_to_neighbor = distance_to_neighbor;
n_nearests[counter].info = info;
n_nearests[counter].left = left;
n_nearests[counter].right = right;
counter++;
}
source_data_start_ptr++;
}
}
else
{
printf("Error, knn input parameter error");
}
return n_nearests;
} // end function
main.c的相关片段:
#include<kdtree.h>
int main()
{
const int query_size = 10;
const int k_dimensions = 3;
printf("knn (k nearest neighboor)\n:");
tree* n_nearests[query_size];
printf("%Nearest to Key: {%f,%f,%f}\n", query_size,query_point[0], query_point[1], query_point[2]);
knn(root, query_point, 0, k_dimensions, query_size, KD_TREE_HEAP_SIZE, n_nearests);
// print n nearest neighbors
tree* tree_ptr = &n_nearests[0];
for(int i = 0; i <query_size; i++)
{
if(NULL != tree_ptr->info)
{
printf("Key={");
for(int c=0; c < k_dimensions; c++)
{
printf("%d,", tree_ptr->info[c]);
}
printf("} ");
printf("%f min distance\n", tree_ptr->distance_to_neighbor);
}
}
return 0;
} // end main
C版输出:
knn (k nearest neighboor)
:5 Nearest neighbors to Key: {6.000000,5.000000,4.000000} are:
***For debugging before sort:
{-1.000000,-2.000000,-3.000000,} min_distance=49.000000
{0.000000,-1.000000,-2.000000,} min_distance=36.000000
{1.000000,0.000000,-1.000000,} min_distance=25.000000
{2.000000,1.000000,0.000000,} min_distance=16.000000
{3.000000,2.000000,1.000000,} min_distance=9.000000
{4.000000,3.000000,2.000000,} min_distance=4.000000
{5.000000,4.000000,3.000000,} min_distance=1.000000
{6.000000,5.000000,4.000000,} min_distance=0.000000
{7.000000,6.000000,5.000000,} min_distance=1.000000
{14.000000,13.000000,12.000000,} min_distance=64.000000
{15.000000,14.000000,13.000000,} min_distance=81.000000
{16.000000,15.000000,14.000000,} min_distance=100.000000
{} min_distance=0.000000
{} min_distance=0.000000
{} min_distance=0.000000
{} min_distance=0.000000
{} min_distance=0.000000
{} min_distance=0.000000
{} min_distance=0.000000
{} min_distance=0.000000
{} min_distance=0.000000
{} min_distance=0.000000
{} min_distance=0.000000
{} min_distance=0.000000
{} min_distance=0.000000
***For debugging after sort
{} min_distance=0.000000
{} min_distance=0.000000
{} min_distance=0.000000
{} min_distance=0.000000
{} min_distance=0.000000
{} min_distance=0.000000
{} min_distance=0.000000
{} min_distance=0.000000
{6.000000,5.000000,4.000000,} min_distance=0.000000
{} min_distance=0.000000
{} min_distance=0.000000
{} min_distance=0.000000
{} min_distance=0.000000
{} min_distance=0.000000
{5.000000,4.000000,3.000000,} min_distance=1.000000
{7.000000,6.000000,5.000000,} min_distance=1.000000
{4.000000,3.000000,2.000000,} min_distance=4.000000
{3.000000,2.000000,1.000000,} min_distance=9.000000
{2.000000,1.000000,0.000000,} min_distance=16.000000
{1.000000,0.000000,-1.000000,} min_distance=25.000000
{0.000000,-1.000000,-2.000000,} min_distance=36.000000
{-1.000000,-2.000000,-3.000000,} min_distance=49.000000
{14.000000,13.000000,12.000000,} min_distance=64.000000
{15.000000,14.000000,13.000000,} min_distance=81.000000
{16.000000,15.000000,14.000000,} min_distance=100.000000
**After function return:**
Key={0,0,0,} 0.000000 min distance
Key={0,0,0,} 0.000000 min distance
Key={0,0,0,} 0.000000 min distance
Key={0,0,0,} 0.000000 min distance
Key={0,0,0,} 0.000000 min distance
我已经在 Java 和 C 版本中测试了插入、中序遍历、搜索、删除和查找最小邻居,它们都按预期工作。
在 C 版本中,knn 函数 returns 在函数 return?
之后归零值
为什么 struct 中的值在调用 main 函数后为零(请参阅标题为 "After function return" 的输出区域?
希望其他人能发现一些明显的东西,真的很绝望地拔掉我的头发。
存在多个问题:
n_nearest
在 main
中定义为指向 tree
对象的指针数组。但是您将 knn
的参数定义为指向实际 tree
对象数组的指针,这是不兼容的。参数应定义为 tree **n_nearest
或 tree* n_nearest[]
,这是指向指针数组的指针。然后,您应该只存储对树元素的引用,而不是复制值。类似地,all_nearests
应该被定义为一个指针数组,就像在 java 中一样,而不是一个对象数组来存储树节点的副本。
- 编译器应该针对这种类型不匹配发出诊断。这不是良性错误,代码确实有未定义的行为,这可能解释了不正确的输出。始终为 C 编译器启用额外警告以检测可能不正确的代码:
gcc -Wall -Werror
或 clang -Weverything -Werror
是一个好的开始,可以节省无数小时的挫败感。
- java 类型
kd_tree
可以在 C 中定义为 typedef tree *kd_tree;
但 C 程序员发现将指针隐藏在 typedef 后面会造成混淆,其中 java 程序员为除了标量值之外的所有内容都没有问题,因为数组和 类 从不包含实际结构,只包含指向对象的指针。
- 如果使用全局
processing_space
,可以避免临时数组all_nearests
的动态分配,临时数组all_nearests
在java中是从堆上分配的,在C中是在栈上分配的。请注意,在这种情况下您不需要传递 total_number_of_elements
,因为假定 processing_space
足够大以保存对所有树节点的引用。
- 您没有 return 将元素的数量存储到目标数组中。数量可以少于请求的数量。 java 中也存在此问题,您将
n_nearest
和 all_nearests
分配为大小为 current_number_of_data_points
的 kd_tree
个对象的数组,其中许多将是 [=32] =].
- 事实上,您必须在您没有post的排序函数中测试这些空指针,如果您传递正确数量的元素进行排序,这是不必要的。
main
中的循环不会递增 tree_ptr
,因此它总是打印 n_nearests
的第一个元素。
- 在 C 版本中使用指针比使用数组符号的可读性差,并且不能确保更好的性能。 C 编译器在删除由数组偏移计算产生的公共子表达式方面非常有效。 C 代码会更接近 java 代码。
这是一个使用结构指针数组的修改版本,更接近 java 版本:
#ifndef KDTREE_H
#define KDTREE_H
/*
* Representation of a kd tree
*/
typedef struct tree {
struct tree* left;
struct tree* right;
float* info;
float distance_to_neighbor;
} tree;
// pre-allocated tree nodes array
extern tree tree_space[KD_TREE_HEAP_SIZE];
// the knn algorithm will require memory space upto tree size
extern tree* processing_space[KD_TREE_HEAP_SIZE];
// return the number of closest neighbors, <= number_of_nearest_neighbors
int knn(tree* root,
float data_point[],
int depth,
const int k_dimensions,
int number_of_nearest_neighbors,
int total_number_of_elements,
tree* n_nearests);
knn
的实施:
#include <stdio.h>
#include <stdlib.h>
#include "kdtree.h"
static int current_number_of_kd_tree_nodes = 0;
static int current_number_of_data_points = 0;
static int tree_compare_distance(const void *a, const void *b) {
tree* ap = *(tree* const *)a;
tree* bp = *(tree* const *)b;
return ((ap->distance_to_neighbor > bp->distance_to_neighbor) -
(ap->distance_to_neighbor < bp->distance_to_neighbor));
}
static void quick_sort_based_on_distance(tree** array, size_t size) {
qsort(array, size, sizeof(*array), tree_compare_distance);
}
/*===========================================================================
Function knn, knn algorithm using kd-tree.
Description:
==========================================================*/
int knn(tree* root,
float data_point[],
int depth,
const int k_dimensions,
int number_of_nearest_neighbors,
tree** n_nearests)
{
/* use the static processing space to avoid defining a potentially huge
array on the stack */
tree** all_nearests = processing_space;
if (root != NULL && n_nearests != NULL) {
int nearests_counter = 0;
// now lets traverse the tree inorder & calculate distances
// from the query point
tree* cur = root;
tree* pre;
while (cur != NULL) {
if (cur->left == NULL) {
// calculate distance
cur->distance_to_neighbor = n_dimensional_euclidean(data_point, cur->info);
all_nearests[nearests_counter] = cur;
nearests_counter++;
cur = cur->right; // move to next right node
} else { // has a left subtree
pre = cur->left;
while (pre->right != NULL && pre->right != cur) { // find rightmost
pre = pre->right;
}
/* Make current as the right child of its inorder predecessor */
if (pre->right == NULL) {
pre->right = cur;
cur = cur->left;
} else {
pre->right = NULL;
// calculate distance
cur->distance_to_neighbor = n_dimensional_euclidean(data_point, cur->info);
all_nearests[nearests_counter] = cur;
nearests_counter++;
cur = cur->right;
}
}
}
// JUST FOR DEBUGGING START
printf ("***For debugging before sort:\n");
for (int i = 0; i < nearests_counter; i++) {
printf("{");
for (int c = 0; c < k_dimensions; c++) {
printf("%f,", all_nearests[i]->info[c]);
}
printf("} ");
printf("min_distance=%f\n", all_nearests[i]->distance_to_neighbor);
}
// JUST FOR DEBUGGING END
// sort based on distance from query point
quick_sort_based_on_distance(all_nearests, nearests_counter);
// JUST FOR DEBUGGING START
printf("***For debugging after sort\n");
for (int i = 0; i < nearests_counter; i++) {
printf("{");
for (int c = 0; c < k_dimensions; c++) {
printf("%f,", all_nearests[i]->info[c]);
}
printf("} ");
printf("min_distance=%f\n", all_nearests[i]->distance_to_neighbor);
}
// JUST FOR DEBUGGING END
// return only specified number_of_nearest_neighbors
// yet do not return elements beyond nearest_counter
if (number_of_nearest_neighbors > nearest_counter)
number_of_nearest_neighbors = nearest_counter;
for (int i = 0; i < number_of_nearest_neighbors; i++) {
n_nearests[i] = all_nearests[i];
}
return number_of_nearest_neighbors;
} else {
printf("Error, knn input parameter error");
return -1;
}
}
The main code is modified too:
```c
#include <stdio.h>
#include "kdtree.h"
int main() {
const int query_size = 10;
const int k_dimensions = 3;
// ...
// get the values and the query point
// ...
printf("knn (k nearest neighboor)\n:");
tree* n_nearests[query_size];
printf("%d nearest neighbors to Key: {%f,%f,%f}\n", query_size,
query_point[0], query_point[1], query_point[2]);
int result_size = knn(root, query_point, 0, k_dimensions, query_size, n_nearests);
// print the computed nearest neighbors
for (int i = 0; i < result_size; i++) {
tree* tree_ptr = n_nearests[i];
if (tree_ptr->info != NULL) {
printf("Key={");
for (int c = 0; c < k_dimensions; c++) {
printf("%s%d", "," + !c, tree_ptr->info[c]);
}
printf("} ");
printf("%f min distance\n", tree_ptr->distance_to_neighbor);
}
}
return 0;
}
我正在尝试将我在 Java 中编写的 kd-tree 上的 knn(k 最近邻搜索)移植到 C。
Java 输出,如预期:
Nearest to Key: 6.0,5.0,4.0
Key:6.0,5.0,4.0,min distance:0.0
Key:5.0,4.0,3.0,min distance:3.0
Key:7.0,6.0,5.0,min distance:3.0
Key:4.0,3.0,2.0,min distance:12.0
Key:3.0,2.0,1.0,min distance:27.0
Java 代码,class(这是一个快速实现,只是为了在我开始移植之前让算法工作):
class kd_tree {
public int DEFAULT_NUMBER_OF_DIMENSIONS = 1;
static int current_number_of_data_points = 0;
static int current_global_median = 0;
kd_tree left;
kd_tree right;
float[] data;
private int k_dimensions;
float distance_to_neighbor;
}
Java knn方法:
/*===========================================================================
Function knn, knn algorithm using kd-tree & minimumNeighbor function.
Description:
==========================================================*/
public static kd_tree[] knn( kd_tree root
, float[] data_point
, int depth
, int k_dimension
, int number_of_nearest_neighbors) {
kd_tree[] all_nearests = new kd_tree[current_number_of_data_points];
kd_tree[] n_nearests = new kd_tree[current_number_of_data_points];
if (root != null) {
int nearests_counter = 0;
/* now lets traverse the tree inorder & calculate distances from the
query point based on Morris Traversal algorithm that does not
use any stacks or no recursion */
kd_tree cur = root;
kd_tree pre;
while (cur != null) {
if (cur.left == null) {
/* debugging System.out.println(cur.data[0]);
calculate distance */
cur.distance_to_neighbor = n_dimensional_euclidean(data_point, cur.data);
all_nearests[nearests_counter] = cur;
nearests_counter++;
cur = cur.right; // move to next right node
} else { // has a left subtree
pre = cur.left;
while (pre.right != null && pre.right != cur) { // find rightmost
pre = pre.right;
}
/* Make current as the right child of its inorder
predecessor */
if (pre.right == null) {
pre.right = cur;
cur = cur.left;
} else {
pre.right = null;
// debugging printf("%d ", current->data);
// calculate distance
cur.distance_to_neighbor = n_dimensional_euclidean( data_point, cur.data);
all_nearests[nearests_counter] = cur;
nearests_counter++;
cur = cur.right;
}
}
}//end while
// base cases
// sort from least to greatest
insertion_sort_based_on_distance(all_nearests);
// return on specified number_of_nearest_neighbors
for (int i = 0; i < number_of_nearest_neighbors; i++) {
n_nearests[i]=all_nearests[i];
}
}
return n_nearests;
}
相关的C代码片段:
#include <stdlib.h>
#ifndef KDTREE_H
#define KDTREE_H
/*
* Representation of a kd tree
*/
typedef struct tree_ {
struct tree* left;
struct tree* right;
float* info;
float distance_to_neighbor;
} tree;
// pre-allocated tree nodes array
static tree tree_space[KD_TREE_HEAP_SIZE];
// the knn algorithm will require memory space upto tree size
static tree* processing_space [KD_TREE_HEAP_SIZE];
tree* knn( tree* root
, float data_point[]
, int depth
, const int k_dimensions
, int number_of_nearest_neighbors
, int total_number_of_elements
, tree* n_nearests);
相关knn实现,kdtree.c:
#include <stdio.h>
#include <stdlib.h>
#include <string.h>
#include <float.h>
#include "kdtree.h"
#include "sorting_utility.h"
static int current_number_of_kd_tree_nodes = 0;
static int current_number_of_data_points = 0;
/*===========================================================================
Function knn, knn algorithm using kd-tree.
Description:
==========================================================*/
tree* knn( tree* root
, float data_point[]
, int depth
, const int k_dimensions
, int number_of_nearest_neighbors
, tree* n_nearests)
{
tree all_nearests[current_number_of_kd_tree_nodes];
tree* source_data_end_ptr = NULL;
tree* source_data_start_ptr = NULL;
tree* destination_data_start_ptr = NULL;
tree* destination_data_end_ptr = NULL;
if(NULL != root && NULL != n_nearests)
{
int nearests_counter = 0;
// now lets traverse the tree inorder & calculate distances
// from the query point
tree* cur = root;
tree* pre;
while(NULL != cur)
{
if(NULL == cur->left)
{
cur->distance_to_neighbor = n_dimensional_euclidean(data_point, cur->info);
processing_space[nearests_counter] = *cur;
nearests_counter++;
cur = cur->right; // move to next right node
}
else
{
pre = cur->left;
while(pre->right != NULL && pre->right != cur)
{ // find rightmost
pre = pre->right;
}
/* Make current as the right child of its inorder predecessor */
if(pre->right == NULL)
{
pre->right = cur;
cur = cur->left;
}
else
{
pre->right = NULL;
// calculate distance
cur->distance_to_neighbor = n_dimensional_euclidean(data_point, cur->info);
processing_space[nearests_counter] = *cur;
nearests_counter++;
cur = cur->right;
}
}
} // end while
// JUST FOR DEBUGGING START
printf ("***For debugging before sort:\n");
for(int i = 0; i < current_number_of_kd_tree_nodes; i++)
{
printf("{");
for(int c = 0; c < k_dimensions; c++)
{
if(NULL != processing_space[i].info)
{
printf("%f,", processing_space[i].info[c]);
}
else
{
break;
}
} // end for
printf("} ");
printf("min_distance=%f\n", processing_space[i].distance_to_neighbor);
} // end for
// JUST FOR DEBUGGING END
/* copy relevant range up current_number_of_kd_tree_nodes before sort,
* in order to avoid sorting beyond range that does not have any data */
source_data_start_ptr = &processing_space[0];
destination_data_start_ptr = &all_nearests[0];
destination_data_end_ptr = &all_nearests[current_number_of_kd_tree_nodes - 1];
while(destination_data_start_ptr <= destination_data_end_ptr)
{
*destination_data_start_ptr = *source_data_start_ptr;
source_data_start_ptr++;
destination_data_start_ptr++;
}
// sort based on distance from query point
quick_sort(all_nearests, 0, current_number_of_kd_tree_nodes - 1);
// JUST FOR DEBUGGING START
printf("***For debugging after sort\n");
for (int i = 0; i < current_number_of_kd_tree_nodes; i++)
{
printf("{");
for (int c = 0; c < k_dimensions; c++)
{
if (NULL != all_nearests[i].info)
{
printf ("%f,", all_nearests[i].info[c]);
}
else
{
break;
}
} // end for
printf("} ");
printf("min_distance=%f\n", all_nearests[i].distance_to_neighbor);
} // end for
// JUST FOR DEBUGGING END
/* copy only the n_nearest & ignore/ do NOT copy any empty tree nodes */
// reuse pointers
destination_data_end_ptr = &n_nearests[number_of_nearest_neighbors - 1];
source_data_end_ptr = &all_nearests[current_number_of_kd_tree_nodes - 1];
source_data_start_ptr = all_nearests;
int counter = 0;
while(counter < number_of_nearest_neighbors && source_data_start_ptr < source_data_end_ptr)
{
// do NOT copy any empty tree nodes
if(source_data_start_ptr != NULL && source_data_start_ptr->info != NULL)
{
/* ATTENTION: i checked with debugger & values for
(distance_to_neighbor,info,left,right were not zeroed or empty */
float distance_to_neighbor = source_data_start_ptr->distance_to_neighbor;
float* info = source_data_start_ptr->info;
tree* left = source_data_start_ptr->left;
tree* right = source_data_start_ptr->right;
n_nearests[counter].distance_to_neighbor = distance_to_neighbor;
n_nearests[counter].info = info;
n_nearests[counter].left = left;
n_nearests[counter].right = right;
counter++;
}
source_data_start_ptr++;
}
}
else
{
printf("Error, knn input parameter error");
}
return n_nearests;
} // end function
main.c的相关片段:
#include<kdtree.h>
int main()
{
const int query_size = 10;
const int k_dimensions = 3;
printf("knn (k nearest neighboor)\n:");
tree* n_nearests[query_size];
printf("%Nearest to Key: {%f,%f,%f}\n", query_size,query_point[0], query_point[1], query_point[2]);
knn(root, query_point, 0, k_dimensions, query_size, KD_TREE_HEAP_SIZE, n_nearests);
// print n nearest neighbors
tree* tree_ptr = &n_nearests[0];
for(int i = 0; i <query_size; i++)
{
if(NULL != tree_ptr->info)
{
printf("Key={");
for(int c=0; c < k_dimensions; c++)
{
printf("%d,", tree_ptr->info[c]);
}
printf("} ");
printf("%f min distance\n", tree_ptr->distance_to_neighbor);
}
}
return 0;
} // end main
C版输出:
knn (k nearest neighboor)
:5 Nearest neighbors to Key: {6.000000,5.000000,4.000000} are:
***For debugging before sort:
{-1.000000,-2.000000,-3.000000,} min_distance=49.000000
{0.000000,-1.000000,-2.000000,} min_distance=36.000000
{1.000000,0.000000,-1.000000,} min_distance=25.000000
{2.000000,1.000000,0.000000,} min_distance=16.000000
{3.000000,2.000000,1.000000,} min_distance=9.000000
{4.000000,3.000000,2.000000,} min_distance=4.000000
{5.000000,4.000000,3.000000,} min_distance=1.000000
{6.000000,5.000000,4.000000,} min_distance=0.000000
{7.000000,6.000000,5.000000,} min_distance=1.000000
{14.000000,13.000000,12.000000,} min_distance=64.000000
{15.000000,14.000000,13.000000,} min_distance=81.000000
{16.000000,15.000000,14.000000,} min_distance=100.000000
{} min_distance=0.000000
{} min_distance=0.000000
{} min_distance=0.000000
{} min_distance=0.000000
{} min_distance=0.000000
{} min_distance=0.000000
{} min_distance=0.000000
{} min_distance=0.000000
{} min_distance=0.000000
{} min_distance=0.000000
{} min_distance=0.000000
{} min_distance=0.000000
{} min_distance=0.000000
***For debugging after sort
{} min_distance=0.000000
{} min_distance=0.000000
{} min_distance=0.000000
{} min_distance=0.000000
{} min_distance=0.000000
{} min_distance=0.000000
{} min_distance=0.000000
{} min_distance=0.000000
{6.000000,5.000000,4.000000,} min_distance=0.000000
{} min_distance=0.000000
{} min_distance=0.000000
{} min_distance=0.000000
{} min_distance=0.000000
{} min_distance=0.000000
{5.000000,4.000000,3.000000,} min_distance=1.000000
{7.000000,6.000000,5.000000,} min_distance=1.000000
{4.000000,3.000000,2.000000,} min_distance=4.000000
{3.000000,2.000000,1.000000,} min_distance=9.000000
{2.000000,1.000000,0.000000,} min_distance=16.000000
{1.000000,0.000000,-1.000000,} min_distance=25.000000
{0.000000,-1.000000,-2.000000,} min_distance=36.000000
{-1.000000,-2.000000,-3.000000,} min_distance=49.000000
{14.000000,13.000000,12.000000,} min_distance=64.000000
{15.000000,14.000000,13.000000,} min_distance=81.000000
{16.000000,15.000000,14.000000,} min_distance=100.000000
**After function return:**
Key={0,0,0,} 0.000000 min distance
Key={0,0,0,} 0.000000 min distance
Key={0,0,0,} 0.000000 min distance
Key={0,0,0,} 0.000000 min distance
Key={0,0,0,} 0.000000 min distance
我已经在 Java 和 C 版本中测试了插入、中序遍历、搜索、删除和查找最小邻居,它们都按预期工作。
在 C 版本中,knn 函数 returns 在函数 return?
之后归零值为什么 struct 中的值在调用 main 函数后为零(请参阅标题为 "After function return" 的输出区域?
希望其他人能发现一些明显的东西,真的很绝望地拔掉我的头发。
存在多个问题:
n_nearest
在main
中定义为指向tree
对象的指针数组。但是您将knn
的参数定义为指向实际tree
对象数组的指针,这是不兼容的。参数应定义为tree **n_nearest
或tree* n_nearest[]
,这是指向指针数组的指针。然后,您应该只存储对树元素的引用,而不是复制值。类似地,all_nearests
应该被定义为一个指针数组,就像在 java 中一样,而不是一个对象数组来存储树节点的副本。- 编译器应该针对这种类型不匹配发出诊断。这不是良性错误,代码确实有未定义的行为,这可能解释了不正确的输出。始终为 C 编译器启用额外警告以检测可能不正确的代码:
gcc -Wall -Werror
或clang -Weverything -Werror
是一个好的开始,可以节省无数小时的挫败感。 - java 类型
kd_tree
可以在 C 中定义为typedef tree *kd_tree;
但 C 程序员发现将指针隐藏在 typedef 后面会造成混淆,其中 java 程序员为除了标量值之外的所有内容都没有问题,因为数组和 类 从不包含实际结构,只包含指向对象的指针。 - 如果使用全局
processing_space
,可以避免临时数组all_nearests
的动态分配,临时数组all_nearests
在java中是从堆上分配的,在C中是在栈上分配的。请注意,在这种情况下您不需要传递total_number_of_elements
,因为假定processing_space
足够大以保存对所有树节点的引用。 - 您没有 return 将元素的数量存储到目标数组中。数量可以少于请求的数量。 java 中也存在此问题,您将
n_nearest
和all_nearests
分配为大小为current_number_of_data_points
的kd_tree
个对象的数组,其中许多将是 [=32] =]. - 事实上,您必须在您没有post的排序函数中测试这些空指针,如果您传递正确数量的元素进行排序,这是不必要的。
main
中的循环不会递增tree_ptr
,因此它总是打印n_nearests
的第一个元素。- 在 C 版本中使用指针比使用数组符号的可读性差,并且不能确保更好的性能。 C 编译器在删除由数组偏移计算产生的公共子表达式方面非常有效。 C 代码会更接近 java 代码。
这是一个使用结构指针数组的修改版本,更接近 java 版本:
#ifndef KDTREE_H
#define KDTREE_H
/*
* Representation of a kd tree
*/
typedef struct tree {
struct tree* left;
struct tree* right;
float* info;
float distance_to_neighbor;
} tree;
// pre-allocated tree nodes array
extern tree tree_space[KD_TREE_HEAP_SIZE];
// the knn algorithm will require memory space upto tree size
extern tree* processing_space[KD_TREE_HEAP_SIZE];
// return the number of closest neighbors, <= number_of_nearest_neighbors
int knn(tree* root,
float data_point[],
int depth,
const int k_dimensions,
int number_of_nearest_neighbors,
int total_number_of_elements,
tree* n_nearests);
knn
的实施:
#include <stdio.h>
#include <stdlib.h>
#include "kdtree.h"
static int current_number_of_kd_tree_nodes = 0;
static int current_number_of_data_points = 0;
static int tree_compare_distance(const void *a, const void *b) {
tree* ap = *(tree* const *)a;
tree* bp = *(tree* const *)b;
return ((ap->distance_to_neighbor > bp->distance_to_neighbor) -
(ap->distance_to_neighbor < bp->distance_to_neighbor));
}
static void quick_sort_based_on_distance(tree** array, size_t size) {
qsort(array, size, sizeof(*array), tree_compare_distance);
}
/*===========================================================================
Function knn, knn algorithm using kd-tree.
Description:
==========================================================*/
int knn(tree* root,
float data_point[],
int depth,
const int k_dimensions,
int number_of_nearest_neighbors,
tree** n_nearests)
{
/* use the static processing space to avoid defining a potentially huge
array on the stack */
tree** all_nearests = processing_space;
if (root != NULL && n_nearests != NULL) {
int nearests_counter = 0;
// now lets traverse the tree inorder & calculate distances
// from the query point
tree* cur = root;
tree* pre;
while (cur != NULL) {
if (cur->left == NULL) {
// calculate distance
cur->distance_to_neighbor = n_dimensional_euclidean(data_point, cur->info);
all_nearests[nearests_counter] = cur;
nearests_counter++;
cur = cur->right; // move to next right node
} else { // has a left subtree
pre = cur->left;
while (pre->right != NULL && pre->right != cur) { // find rightmost
pre = pre->right;
}
/* Make current as the right child of its inorder predecessor */
if (pre->right == NULL) {
pre->right = cur;
cur = cur->left;
} else {
pre->right = NULL;
// calculate distance
cur->distance_to_neighbor = n_dimensional_euclidean(data_point, cur->info);
all_nearests[nearests_counter] = cur;
nearests_counter++;
cur = cur->right;
}
}
}
// JUST FOR DEBUGGING START
printf ("***For debugging before sort:\n");
for (int i = 0; i < nearests_counter; i++) {
printf("{");
for (int c = 0; c < k_dimensions; c++) {
printf("%f,", all_nearests[i]->info[c]);
}
printf("} ");
printf("min_distance=%f\n", all_nearests[i]->distance_to_neighbor);
}
// JUST FOR DEBUGGING END
// sort based on distance from query point
quick_sort_based_on_distance(all_nearests, nearests_counter);
// JUST FOR DEBUGGING START
printf("***For debugging after sort\n");
for (int i = 0; i < nearests_counter; i++) {
printf("{");
for (int c = 0; c < k_dimensions; c++) {
printf("%f,", all_nearests[i]->info[c]);
}
printf("} ");
printf("min_distance=%f\n", all_nearests[i]->distance_to_neighbor);
}
// JUST FOR DEBUGGING END
// return only specified number_of_nearest_neighbors
// yet do not return elements beyond nearest_counter
if (number_of_nearest_neighbors > nearest_counter)
number_of_nearest_neighbors = nearest_counter;
for (int i = 0; i < number_of_nearest_neighbors; i++) {
n_nearests[i] = all_nearests[i];
}
return number_of_nearest_neighbors;
} else {
printf("Error, knn input parameter error");
return -1;
}
}
The main code is modified too:
```c
#include <stdio.h>
#include "kdtree.h"
int main() {
const int query_size = 10;
const int k_dimensions = 3;
// ...
// get the values and the query point
// ...
printf("knn (k nearest neighboor)\n:");
tree* n_nearests[query_size];
printf("%d nearest neighbors to Key: {%f,%f,%f}\n", query_size,
query_point[0], query_point[1], query_point[2]);
int result_size = knn(root, query_point, 0, k_dimensions, query_size, n_nearests);
// print the computed nearest neighbors
for (int i = 0; i < result_size; i++) {
tree* tree_ptr = n_nearests[i];
if (tree_ptr->info != NULL) {
printf("Key={");
for (int c = 0; c < k_dimensions; c++) {
printf("%s%d", "," + !c, tree_ptr->info[c]);
}
printf("} ");
printf("%f min distance\n", tree_ptr->distance_to_neighbor);
}
}
return 0;
}