对大量数据使用合并排序时出现 TypeError (python)
TypeError when using mergesort on large amounts of data (python)
所以,我想做一个基准测试,比较不同算法在不同大小数组上的处理速度。我有以下脚本,它应该对大小为 10、100、1000、10000、100000、1000000 输入数组使用 mergeSort:
import sys
import time
import random
def merge(arr, l, m, r):
n1 = m - l + 1
n2 = r- m
# create temp arrays
L = [0] * (n1)
R = [0] * (n2)
# Copy data to temp arrays L[] and R[]
for i in range(0 , n1):
L[i] = arr[l + i]
for j in range(0 , n2):
R[j] = arr[m + 1 + j]
# Merge the temp arrays back into arr[l..r]
i = 0 # Initial index of first subarray
j = 0 # Initial index of second subarray
k = l # Initial index of merged subarray
while i < n1 and j < n2 :
if L[i] <= R[j]:
arr[k] = L[i]
i += 1
else:
arr[k] = R[j]
j += 1
k += 1
# Copy the remaining elements of L[], if there
# are any
while i < n1:
arr[k] = L[i]
i += 1
k += 1
# Copy the remaining elements of R[], if there
# are any
while j < n2:
arr[k] = R[j]
j += 1
k += 1
# l is for left index and r is right index of the
# sub-array of arr to be sorted
def mergeSort(arr,l,r):
if l < r:
# Same as (l+r)/2, but avoids overflow for
# large l and h
m = (l+(r-1))/2
# Sort first and second halves
mergeSort(arr, l, m)
mergeSort(arr, m+1, r)
merge(arr, l, m, r)
data = []
L10 = []
L100 = []
L1000 = []
L10000 = []
L100000 = []
L1000000 = []
inf = open("10.txt", "r")
inputData = inf.readlines()
for line in inputData:
L10.append(int(line.rstrip()))
data.append(L10)
inf = open("100.txt", "r")
inputData = inf.readlines()
for line in inputData:
L100.append(int(line.rstrip()))
data.append(L100)
inf = open("1000.txt", "r")
inputData = inf.readlines()
for line in inputData:
L1000.append(int(line.rstrip()))
data.append(L1000)
inf = open("10000.txt", "r")
inputData = inf.readlines()
for line in inputData:
L10000.append(int(line.rstrip()))
data.append(L10000)
inf = open("100000.txt", "r")
inputData = inf.readlines()
for line in inputData:
L100000.append(int(line.rstrip()))
data.append(L100000)
inf = open("1000000.txt", "r")
inputData = inf.readlines()
for line in inputData:
L1000000.append(int(line.rstrip()))
data.append(L1000000)
for numList in data:
start = time.time()
mergeSort(numList, 0, len(numList)-1)
end = time.time()
print("Sort time for {} size list: {}".format(len(numList), end - start))
错误:
Traceback (most recent call last):
File "C:\Users\witcher\Documents\NJIT\CS 288\mergesort.py", line 110, in <module>
mergeSort(numList, 0, len(numList)-1)
File "C:\Users\witcher\Documents\NJIT\CS 288\mergesort.py", line 58, in mergeSort
mergeSort(arr, l, m)
File "C:\Users\witcher\Documents\NJIT\CS 288\mergesort.py", line 58, in mergeSort
mergeSort(arr, l, m)
File "C:\Users\witcher\Documents\NJIT\CS 288\mergesort.py", line 58, in mergeSort
mergeSort(arr, l, m)
File "C:\Users\witcher\Documents\NJIT\CS 288\mergesort.py", line 60, in mergeSort
merge(arr, l, m, r)
File "C:\Users\witcher\Documents\NJIT\CS 288\mergesort.py", line 10, in merge
L = [0] * (n1)
TypeError: can't multiply sequence by non-int of type 'float
我不知道是什么原因导致了这个问题。该算法直接来自教程网站,并且在处理小列表时效果很好。我相信初始函数调用也是正确的。输入数据只是一个文件,其中每一行都是一个随机整数,这是我用来创建这些脚本的脚本:
import math
import random
for num in [10, 100, 1000, 10000, 10000, 100000, 1000000]:
outf = open(str(num)+".txt", "w")
for i in range(num):
outf.write(str(random.randint(1,999))+"\n")
outf.close()
是的,我手动删除了每个文件末尾的额外换行符。感谢任何帮助。
看起来你在 Python 3?我猜错误源于这一行:
m = (l+(r-1))/2
在 Python 3 中,此除法将创建一个浮点数(与常规整数相反——Python 2 中的行为)。如果你还想创建整数,你可以使用:
m = (l+(r-1)) // 2
这是一个楼层划分,会给你一个整数,它应该适用于你的用例。
所以,我想做一个基准测试,比较不同算法在不同大小数组上的处理速度。我有以下脚本,它应该对大小为 10、100、1000、10000、100000、1000000 输入数组使用 mergeSort:
import sys
import time
import random
def merge(arr, l, m, r):
n1 = m - l + 1
n2 = r- m
# create temp arrays
L = [0] * (n1)
R = [0] * (n2)
# Copy data to temp arrays L[] and R[]
for i in range(0 , n1):
L[i] = arr[l + i]
for j in range(0 , n2):
R[j] = arr[m + 1 + j]
# Merge the temp arrays back into arr[l..r]
i = 0 # Initial index of first subarray
j = 0 # Initial index of second subarray
k = l # Initial index of merged subarray
while i < n1 and j < n2 :
if L[i] <= R[j]:
arr[k] = L[i]
i += 1
else:
arr[k] = R[j]
j += 1
k += 1
# Copy the remaining elements of L[], if there
# are any
while i < n1:
arr[k] = L[i]
i += 1
k += 1
# Copy the remaining elements of R[], if there
# are any
while j < n2:
arr[k] = R[j]
j += 1
k += 1
# l is for left index and r is right index of the
# sub-array of arr to be sorted
def mergeSort(arr,l,r):
if l < r:
# Same as (l+r)/2, but avoids overflow for
# large l and h
m = (l+(r-1))/2
# Sort first and second halves
mergeSort(arr, l, m)
mergeSort(arr, m+1, r)
merge(arr, l, m, r)
data = []
L10 = []
L100 = []
L1000 = []
L10000 = []
L100000 = []
L1000000 = []
inf = open("10.txt", "r")
inputData = inf.readlines()
for line in inputData:
L10.append(int(line.rstrip()))
data.append(L10)
inf = open("100.txt", "r")
inputData = inf.readlines()
for line in inputData:
L100.append(int(line.rstrip()))
data.append(L100)
inf = open("1000.txt", "r")
inputData = inf.readlines()
for line in inputData:
L1000.append(int(line.rstrip()))
data.append(L1000)
inf = open("10000.txt", "r")
inputData = inf.readlines()
for line in inputData:
L10000.append(int(line.rstrip()))
data.append(L10000)
inf = open("100000.txt", "r")
inputData = inf.readlines()
for line in inputData:
L100000.append(int(line.rstrip()))
data.append(L100000)
inf = open("1000000.txt", "r")
inputData = inf.readlines()
for line in inputData:
L1000000.append(int(line.rstrip()))
data.append(L1000000)
for numList in data:
start = time.time()
mergeSort(numList, 0, len(numList)-1)
end = time.time()
print("Sort time for {} size list: {}".format(len(numList), end - start))
错误:
Traceback (most recent call last):
File "C:\Users\witcher\Documents\NJIT\CS 288\mergesort.py", line 110, in <module>
mergeSort(numList, 0, len(numList)-1)
File "C:\Users\witcher\Documents\NJIT\CS 288\mergesort.py", line 58, in mergeSort
mergeSort(arr, l, m)
File "C:\Users\witcher\Documents\NJIT\CS 288\mergesort.py", line 58, in mergeSort
mergeSort(arr, l, m)
File "C:\Users\witcher\Documents\NJIT\CS 288\mergesort.py", line 58, in mergeSort
mergeSort(arr, l, m)
File "C:\Users\witcher\Documents\NJIT\CS 288\mergesort.py", line 60, in mergeSort
merge(arr, l, m, r)
File "C:\Users\witcher\Documents\NJIT\CS 288\mergesort.py", line 10, in merge
L = [0] * (n1)
TypeError: can't multiply sequence by non-int of type 'float
我不知道是什么原因导致了这个问题。该算法直接来自教程网站,并且在处理小列表时效果很好。我相信初始函数调用也是正确的。输入数据只是一个文件,其中每一行都是一个随机整数,这是我用来创建这些脚本的脚本:
import math
import random
for num in [10, 100, 1000, 10000, 10000, 100000, 1000000]:
outf = open(str(num)+".txt", "w")
for i in range(num):
outf.write(str(random.randint(1,999))+"\n")
outf.close()
是的,我手动删除了每个文件末尾的额外换行符。感谢任何帮助。
看起来你在 Python 3?我猜错误源于这一行:
m = (l+(r-1))/2
在 Python 3 中,此除法将创建一个浮点数(与常规整数相反——Python 2 中的行为)。如果你还想创建整数,你可以使用:
m = (l+(r-1)) // 2
这是一个楼层划分,会给你一个整数,它应该适用于你的用例。