中位数算法的中位数不能始终如一地工作
Median of Medians algorithm not working consistently
我已经实现了 select/median 中位数算法,使用以下作为参考 http://www.ics.uci.edu/~eppstein/161/960130.html (this has previously been linked here Median of Medians in Java)。
我的代码似乎适用于小型数组 (~100),甚至适用于大小为 100001 的数组 http://pastebin.com/mwRc4Hig (answer 5008), but then fails on an input array of size 10001 http://pastebin.com/YwVBmgDk(答案 4960,我的代码输出 4958)。
请注意,以上文本的正确答案相当于对数组进行排序并返回数组[array.length / 2]处的元素,无论数组大小是偶数还是奇数。
我不确定如何调试这个问题。该功能似乎是任意的,我只是迷路了。下面是我的代码:
public class MedianOfMedians {
public static void main(String[] args) {
MedianOfMedians mds = new MedianOfMedians();
mds.run();
}
private void run() {
Scanner in = new Scanner(System.in);
int n = in.nextInt();
int[] numArray = new int[n];
for (int i = 0; i < n; i++) {
numArray[i] = in.nextInt();
}
int median = select(numArray, numArray.length / 2);
System.out.print(median);
}
private int select(int[] numArray, int k) {
if (numArray.length <= 10) {
int[] sorted = insertionSort(numArray);
return sorted[k];
}
int divCount = (numArray.length % 5 == 0) ? numArray.length / 5 - 1 : numArray.length / 5;
int[] medOfMed = new int[divCount + 1];
int counter = 0;
int[] subArray;
while (counter <= divCount) {
subArray = splitByFive(counter, divCount, numArray);
medOfMed[counter] = select(subArray, subArray.length / 2);
counter++;
}
int M = select(medOfMed, numArray.length / 10);
List<Integer> lt = new ArrayList<>();
List<Integer> eq = new ArrayList<>();
List<Integer> gt = new ArrayList<>();
for (int i : numArray) {
if (i < M) {
lt.add(i);
} else if (i == M) {
eq.add(i);
} else {
gt.add(i);
}
}
if (k < lt.size()) {
return select(createArray(lt), k);
} else if (k > lt.size() + eq.size()) {
return select(createArray(gt), k - lt.size() - eq.size());
} else {
return M;
}
}
private int[] splitByFive(int splitIter, int divisions, int[] toSplit) {
int numToCopy;
if (splitIter == divisions) {
numToCopy = toSplit.length - (5 * splitIter);
} else {
numToCopy = 5;
}
int[] subArray = new int[numToCopy];
System.arraycopy(toSplit, splitIter * 5, subArray, 0, numToCopy);
return subArray;
}
private int[] createArray(List<Integer> list) {
int[] result = new int[list.size()];
for (int i = 0; i < list.size(); i++) {
result[i] = list.get(i);
}
return result;
}
private int[] insertionSort(int[] numArray) {
for (int i = 1; i < numArray.length; i++) {
int j = i;
while (j - 1 >= 0 && numArray[j] < numArray[j - 1]) {
int temp = numArray[j];
numArray[j] = numArray[j - 1];
numArray[j - 1] = temp;
j--;
}
}
return numArray;
}
}
我没有时间调试你的代码,但也许我可以提供一种调试技术供你自己尝试,这对像这样的递归算法很有用。
如果存在算法失败的输入(正如您所发现的那样),则存在最小的此类输入——并且此输入越小,就越容易找出问题所在。因为算法是递归的,所以你有一个很好的方法来隔离第一个出错的地方:你可以测试你即将从 select()
return 得到的结果是否正确(使用一个缓慢的,可信的方法,例如将数据复制到临时缓冲区,对其进行排序,然后获取中间元素)只是在return值之前。如果您重新安排函数以仅使用单个 return
语句,则这样做会容易得多,例如:
private int select(int[] numArray, int k) {
int knownCorrectAnswer = selectSlowlyButDefinitelyCorrectly(numArray, k);
int willReturn;
if (numArray.length <= 10) {
int[] sorted = insertionSort(numArray);
willReturn = sorted[k]; // Just remember what we will return
} else { // Need to add else branch here now
...
if (k < lt.size()) {
willReturn = select(createArray(lt), k);
} else if (k > lt.size() + eq.size()) {
willReturn = select(createArray(gt), k - lt.size() - eq.size());
} else {
willReturn = M;
}
} // End of inserted else branch
if (willReturn == knownCorrectAnswer) {
return willReturn;
} else {
yell("First problem occurs with numArray=<...> and k=<...>!");
}
}
yell()
应该打印出整个问题实例并停止程序(例如通过抛出异常)。这个设置的好处是你知道当 yell()
被调用时,每个已经完成的对 select()
的调用都是正确的——因为如果它不是't, yell()
已经被调用并且程序会在此之前停止。因此 yell()
产生的输出保证是出现问题的 第一个 (不一定是最小的,但通常也是最小的)子问题。
我已经实现了 select/median 中位数算法,使用以下作为参考 http://www.ics.uci.edu/~eppstein/161/960130.html (this has previously been linked here Median of Medians in Java)。
我的代码似乎适用于小型数组 (~100),甚至适用于大小为 100001 的数组 http://pastebin.com/mwRc4Hig (answer 5008), but then fails on an input array of size 10001 http://pastebin.com/YwVBmgDk(答案 4960,我的代码输出 4958)。
请注意,以上文本的正确答案相当于对数组进行排序并返回数组[array.length / 2]处的元素,无论数组大小是偶数还是奇数。
我不确定如何调试这个问题。该功能似乎是任意的,我只是迷路了。下面是我的代码:
public class MedianOfMedians {
public static void main(String[] args) {
MedianOfMedians mds = new MedianOfMedians();
mds.run();
}
private void run() {
Scanner in = new Scanner(System.in);
int n = in.nextInt();
int[] numArray = new int[n];
for (int i = 0; i < n; i++) {
numArray[i] = in.nextInt();
}
int median = select(numArray, numArray.length / 2);
System.out.print(median);
}
private int select(int[] numArray, int k) {
if (numArray.length <= 10) {
int[] sorted = insertionSort(numArray);
return sorted[k];
}
int divCount = (numArray.length % 5 == 0) ? numArray.length / 5 - 1 : numArray.length / 5;
int[] medOfMed = new int[divCount + 1];
int counter = 0;
int[] subArray;
while (counter <= divCount) {
subArray = splitByFive(counter, divCount, numArray);
medOfMed[counter] = select(subArray, subArray.length / 2);
counter++;
}
int M = select(medOfMed, numArray.length / 10);
List<Integer> lt = new ArrayList<>();
List<Integer> eq = new ArrayList<>();
List<Integer> gt = new ArrayList<>();
for (int i : numArray) {
if (i < M) {
lt.add(i);
} else if (i == M) {
eq.add(i);
} else {
gt.add(i);
}
}
if (k < lt.size()) {
return select(createArray(lt), k);
} else if (k > lt.size() + eq.size()) {
return select(createArray(gt), k - lt.size() - eq.size());
} else {
return M;
}
}
private int[] splitByFive(int splitIter, int divisions, int[] toSplit) {
int numToCopy;
if (splitIter == divisions) {
numToCopy = toSplit.length - (5 * splitIter);
} else {
numToCopy = 5;
}
int[] subArray = new int[numToCopy];
System.arraycopy(toSplit, splitIter * 5, subArray, 0, numToCopy);
return subArray;
}
private int[] createArray(List<Integer> list) {
int[] result = new int[list.size()];
for (int i = 0; i < list.size(); i++) {
result[i] = list.get(i);
}
return result;
}
private int[] insertionSort(int[] numArray) {
for (int i = 1; i < numArray.length; i++) {
int j = i;
while (j - 1 >= 0 && numArray[j] < numArray[j - 1]) {
int temp = numArray[j];
numArray[j] = numArray[j - 1];
numArray[j - 1] = temp;
j--;
}
}
return numArray;
}
}
我没有时间调试你的代码,但也许我可以提供一种调试技术供你自己尝试,这对像这样的递归算法很有用。
如果存在算法失败的输入(正如您所发现的那样),则存在最小的此类输入——并且此输入越小,就越容易找出问题所在。因为算法是递归的,所以你有一个很好的方法来隔离第一个出错的地方:你可以测试你即将从 select()
return 得到的结果是否正确(使用一个缓慢的,可信的方法,例如将数据复制到临时缓冲区,对其进行排序,然后获取中间元素)只是在return值之前。如果您重新安排函数以仅使用单个 return
语句,则这样做会容易得多,例如:
private int select(int[] numArray, int k) {
int knownCorrectAnswer = selectSlowlyButDefinitelyCorrectly(numArray, k);
int willReturn;
if (numArray.length <= 10) {
int[] sorted = insertionSort(numArray);
willReturn = sorted[k]; // Just remember what we will return
} else { // Need to add else branch here now
...
if (k < lt.size()) {
willReturn = select(createArray(lt), k);
} else if (k > lt.size() + eq.size()) {
willReturn = select(createArray(gt), k - lt.size() - eq.size());
} else {
willReturn = M;
}
} // End of inserted else branch
if (willReturn == knownCorrectAnswer) {
return willReturn;
} else {
yell("First problem occurs with numArray=<...> and k=<...>!");
}
}
yell()
应该打印出整个问题实例并停止程序(例如通过抛出异常)。这个设置的好处是你知道当 yell()
被调用时,每个已经完成的对 select()
的调用都是正确的——因为如果它不是't, yell()
已经被调用并且程序会在此之前停止。因此 yell()
产生的输出保证是出现问题的 第一个 (不一定是最小的,但通常也是最小的)子问题。