创建大量对象（神经元）并使用字典随机连接

Question

我正在尝试根据这些标准创建一种新型神经网络：

每个神经元必须是一个单独的对象。
每个神经元都应该有自己的线程。
网络必须部分随机连接（在启动时）。
神经元必须运行异步计算其输出、更新其权重等

这些是我在 Julia 和 Python:

中的实现尝试

Python

import random
import itertools
import time
import signal
from threading import Thread
from multiprocessing import Pool
import multiprocessing

POTENTIAL_RANGE = 110000 # Resting potential: -70 mV Membrane potential range: +40 mV to -70 mV --- Difference: 110 mV = 110000 microVolt --- https://en.wikipedia.org/wiki/Membrane_potential
ACTION_POTENTIAL = 15000 # Resting potential: -70 mV Action potential: -55 mV --- Difference: 15mV = 15000 microVolt --- https://faculty.washington.edu/chudler/ap.html
AVERAGE_SYNAPSES_PER_NEURON = 8200 # The average number of synapses per neuron: 8,200 --- http://www.ncbi.nlm.nih.gov/pubmed/2778101

# https://en.wikipedia.org/wiki/Neuron

class Neuron():

    neurons = []

    def __init__(self):
        self.connections = {}
        self.potential = 0.0
        self.error = 0.0
        #self.create_connections()
        #self.create_axon_terminals()
        Neuron.neurons.append(self)
        self.thread = Thread(target = self.activate)
        #self.thread.start()
        #self.process = multiprocessing.Process(target=self.activate)

    def fully_connect(self):
        for neuron in Neuron.neurons[len(self.connections):]:
            if id(neuron) != id(self):
                self.connections[id(neuron)] = round(random.uniform(0.1, 1.0), 2)

    def partially_connect(self):
        if len(self.connections) == 0:
            neuron_count = len(Neuron.neurons)
            for neuron in Neuron.neurons[len(self.connections):]:
                if id(neuron) != id(self):
                    if random.randint(1,neuron_count/100) == 1:
                        self.connections[id(neuron)] = round(random.uniform(0.1, 1.0), 2)
            print "Neuron ID: " + str(id(self))
            print "    Potential: " + str(self.potential)
            print "    Error: " + str(self.error)
            print "    Connections: " + str(len(self.connections))

    def activate(self):
        while True:
            '''
            for dendritic_spine in self.connections:
                if dendritic_spine.axon_terminal is not None:
                    dendritic_spine.potential = dendritic_spine.axon_terminal.potential
                    print dendritic_spine.potential
                self.neuron_potential += dendritic_spine.potential * dendritic_spine.excitement
            terminal_potential = self.neuron_potential / len(self.axon_terminals)
            for axon_terminal in self.axon_terminals:
                axon_terminal.potential = terminal_potential
            '''
            #if len(self.connections) == 0:
            #   self.partially_connect()
            #else:
            self.partially_connect()
            pass

            '''
            if abs(len(Neuron.neurons) - len(self.connections) + 1) > 0:
                self.create_connections()

            if abs(len(Neuron.neurons) - len(self.axon_terminals) + 1) > 0:
                self.create_axon_terminals()
            '''

class Supercluster():

    def __init__(self,size):
        for i in range(size):
            Neuron()
        print str(size) + " neurons created."
        self.n = 0
        self.build_connections()
        #pool = Pool(4, self.init_worker)
        #pool.apply_async(self.build_connections(), arguments)
        #map(lambda x: x.partially_connect(),Neuron.neurons)
        #map(lambda x: x.create_connections(),Neuron.neurons)
        #map(lambda x: x.create_axon_terminals(),Neuron.neurons)

    def build_connections(self):
        for neuron in Neuron.neurons:
            self.n += 1
            #neuron.thread.start()
            neuron.partially_connect()
            print "Counter: " + str(self.n)

Supercluster(10000)

朱莉娅

global neurons = []

type Neuron
    connections::Dict{UInt64,Float16}
    potential::Float16
    error::Float16

    function Neuron(arg1,arg2,arg3)
        self = new(arg1,arg2,arg3)
        push!(neurons, self)
    end

end

function fully_connect(self)
    for neuron in neurons
        if object_id(neuron) != object_id(self)
            self.connections[object_id(neuron)] = rand(1:100)/100
            #push!(self.connections, rand(1:100)/100)
        end
    end
end

function partially_connect(self)
    if isempty(self.connections)
        neuron_count = length(neurons)
        for neuron in neurons
            if object_id(neuron) != object_id(self)
                if rand(1:neuron_count/100) == 1
                    self.connections[object_id(neuron)] = rand(1:100)/100
                    #push!(self.connections, rand(1:100)/100)
                end
            end
        end
        println("Neuron ID: ",object_id(self))
        println("    Potential: ",self.potential)
        println("    Error: ",self.error)
        println("    Connections: ",length(self.connections))
    end
end

function Build()
    for i = 1:10000
        Neuron(Dict(),0.0,0.0)
    end
    println(length(neurons), " neurons created.")
    n = 0
    @parallel for neuron in neurons
        n += 1
        partially_connect(neuron)
        println("Counter: ",n)
    end
end

Build()

首先，这些部分在每个神经元之间部分随机地建立连接，花费太多时间。 我怎样才能加快这个 process/part？

Python

def build_connections(self):
    for neuron in Neuron.neurons:
        self.n += 1
        #neuron.thread.start()
        neuron.partially_connect()
        print "Counter: " + str(self.n)

朱莉娅

n = 0
@parallel for neuron in neurons
    n += 1
    partially_connect(neuron)
    println("Counter: ",n)

其次，当我的目标是创建至少一百万个神经元时，为每个神经元提供自己的线程是否是个好主意？这意味着它会像一百万个线程。

这里我想做的是严格意义上模仿生物神经网络，而不是使用矩阵计算。

加法：

新版本partially_connect函数根据答案：

def partially_connect(self):
    if len(self.connections) == 0:
        neuron_count = len(Neuron.neurons)
        #for neuron in Neuron.neurons:
        elected = random.sample(Neuron.neurons,100)
        for neuron in elected:
            if id(neuron) != id(self):
                #if random.randint(1,neuron_count/100) == 1:
                self.connections[id(neuron)] = round(random.uniform(0.1, 1.0), 2)
        print "Neuron ID: " + str(id(self))
        print "    Potential: " + str(self.potential)
        print "    Error: " + str(self.error)
        print "    Connections: " + str(len(self.connections))

性能显着提高。

Answer 1

看看这段代码：

def partially_connect(self):
    if len(self.connections) == 0:
        neuron_count = len(Neuron.neurons)
        for neuron in Neuron.neurons[len(self.connections):]:
            if id(neuron) != id(self):
                if random.randint(1,neuron_count/100) == 1:
                    self.connections[id(neuron)] = round(random.uniform(0.1, 1.0), 2)

根据你对我对 OP 的评论的回复，这里有几件事：

当您使用像 L[0:] 这样的语法时，您正在复制列表。切片语法为每次调用您的函数制作 Neuron.neurons 数组的浅表副本。这是一个 O(n) 操作，并且由于您为 build_connections 函数中的每个神经元调用了一次 partially_connect，这使得它成为 O( n²)。（赞！）
您在 Python 中所做的工作可以而且应该在库中完成（我们希望在 C 中！）。看看例如random.paretovariate() 和 random.sample() 函数。您可以轻松计算 num_connections = random.paretovariate(1.0) * 100，然后说 connected_nodes = random.sample(neurons, num_connections)。从 connected_nodes 中过滤掉 self 就完成了。

我认为您可以通过消除 n² 行为和使用内置库例程来大幅提升性能。

加法

响应您的添加，考虑一下：

def partially_connect(self):
    if len(self.connections) == 0:
        elected = random.sample(Neuron.neurons,100)
        try:
            elected.remove(self)
        except ValueError:
            pass

        for neuron in elected:
            self.connections[id(neuron)] = round(random.uniform(0.1, 1.0), 2)

（我暂时忽略指纹。）

我不知道如何在不迭代所有神经元寻找 id() 值匹配的情况下从一个神经元与其连接的神经元进行通信。我建议您将对连接对象的引用存储为键，并使用权重作为值：

self.connections = [n:round(random.uniform(0.1, 1.0), 2) for n in elected]

当然，这假定您需要遍历从源到目标的链接。

至于线程解决方案，我没有好的建议。通过谷歌搜索，我找到了一些旧的电子邮件线程（嘿！），其中提到 405 和 254 等数字是线程创建限制。我没有看到任何文件说 "Python threading is now UNLIMITED!" 或其他任何内容，所以我怀疑您将不得不改变实现解决方案的方式。

Answer 2

在 Julia 中，如果性能很重要：不要使用全局变量（参见您的 neurons 数组）并且不要使用无类型数组（再次参见您的 neurons 数组）。参见performance tips。您还应该分析以确定您的瓶颈在哪里。我强烈建议在没有 @parallel 的情况下尝试它，直到你可以快速获得它。

我自己看了看，除了这些我还发现了一些令人吃惊的瓶颈：

rand(1:neuron_count/100) 创建浮点范围，而不是整数范围。这是一个巨大的瓶颈，分析立即识别出来。使用 rand(1:neuron_count÷100).
最好不要调用object_id，直接使用!(neuron === self)。或者甚至更好，将 neurons 作为数组和要修改的条目的整数索引传递。

修复这些项目后，我设法将你的程序的执行时间（在去掉 @parallel 之后，这不太可能有帮助，并注释掉文本显示）从大约 140秒到 4 秒。几乎所有的运行时间都只是花在生成随机数上；您可以通过一次生成一个大型池而不是一个一个地生成它们来加速这一过程。

这使用 ProgressMeter 包（您必须安装）来显示进度。

using ProgressMeter

type Neuron
    connections::Dict{UInt64,Float16}
    potential::Float16
    error::Float16
end

function fully_connect(self, neurons)
    for neuron in neurons
        if object_id(neuron) != object_id(self)
            self.connections[object_id(neuron)] = rand(1:100)/100
            #push!(self.connections, rand(1:100)/100)
        end
    end
end

function partially_connect(self, neurons)
    if isempty(self.connections)
        neuron_count = length(neurons)
        for neuron in neurons
            if !(neuron === self)
                if rand(1:neuron_count÷100) == 1
                    self.connections[object_id(neuron)] = rand(1:100)/100
                    #push!(self.connections, rand(1:100)/100)
                end
            end
        end
#         println("Neuron ID: ",object_id(self))
#         println("    Potential: ",self.potential)
#         println("    Error: ",self.error)
#         println("    Connections: ",length(self.connections))
    end
end

function Build()
    neurons = [Neuron(Dict(),0.0,0.0) for i = 1:10000]
    println(length(neurons), " neurons created.")
    @showprogress 1 "Connecting neurons..." for neuron in neurons
        partially_connect(neuron, neurons)
    end
    neurons
end

neurons = Build()

创建大量对象（神经元）并使用字典随机连接

Creating a huge amount of objects(neuron) and connecting randomly using dictionaries

python

multithreading

perceptron

neural-network

julia