这是进行并行编程的更好方法吗?

Is it a better way to do parallel programming that this?

我制作此脚本是为了从 instagram

获取 "influencers" 的关注者数量

我从中得到的 "runtime" 数字在 550-750 毫秒之间。 它并没有那么糟糕,但我想知道它是否可以更好(因为我是一个 golang 菜鸟 - 只学了 3 周)

package main

import (
    "encoding/json"
    "fmt"
    "io/ioutil"
    "log"
    "net/http"
    "sync"
    "time"
)

type user struct {
    User userData `json:"user"`
}

type userData struct {
    Followers count `json:"followed_by"`
}

type count struct {
    Count int `json:"count"`
}

func getFollowerCount(in <-chan string) <-chan int {
    out := make(chan int)
    go func() {
        for un := range in {
            URL := "https://www.instagram.com/" + un + "/?__a=1"
            resp, err := http.Get(URL)
            if err != nil {
                // handle error
                fmt.Println(err)
            }
            defer resp.Body.Close()
            body, err := ioutil.ReadAll(resp.Body)
            var u user
            err = json.Unmarshal(body, &u)
            if err != nil {
                fmt.Println(err)
            }
            // return u.User.Followers.Count
            out <- u.User.Followers.Count
        }
        close(out)
    }()
    return out
}

func merge(cs ...<-chan int) <-chan int {
    var wg sync.WaitGroup
    out := make(chan int)
    output := func(c <-chan int) {
        for n := range c {
            out <- n
        }
        wg.Done()
    }

    wg.Add(len(cs))
    for _, c := range cs {
        go output(c)
    }
    go func() {
        wg.Wait()
        close(out)
    }()
    return out
}

func gen(users ...string) <-chan string {
    out := make(chan string)
    go func() {
        for _, u := range users {
            out <- u
        }
        close(out)
    }()
    return out
}

func main() {
    start := time.Now()
    fmt.Println("STARTING UP")
    usrs := []string{"kanywest", "kimkardashian", "groovyq", "kendricklamar", "barackobama", "asaprocky", "champagnepapi", "eminem", "drdre", "g_eazy", "skrillex"}
    in := gen(usrs...)
    d1 := getFollowerCount(in)
    d2 := getFollowerCount(in)
    d3 := getFollowerCount(in)
    d4 := getFollowerCount(in)
    d5 := getFollowerCount(in)
    d6 := getFollowerCount(in)
    d7 := getFollowerCount(in)
    d8 := getFollowerCount(in)
    d9 := getFollowerCount(in)
    d10 := getFollowerCount(in)

    for d := range merge(d1, d2, d3, d4, d5, d6, d7, d8, d9, d10) {
        fmt.Println(d)
    }

    elapsed := time.Since(start)
    log.Println("runtime", elapsed)
}

欢迎使用Go,学习愉快。

你做的很好,你可以通过很多方式改进你的程序(比如json解码器,less no of chan等)。以下是其中一种方法。执行时间在 352-446 毫秒之间(请谨慎考虑,因为您的代码中涉及网络调用。可能因服务器响应时间而异)。

您的更新代码:

package main

import (
    "encoding/json"
    "fmt"
    "log"
    "net/http"
    "sync"
    "time"
)

type user struct {
    User userData `json:"user"`
}

type userData struct {
    Followers count `json:"followed_by"`
}

type count struct {
    Count int `json:"count"`
}

func getFollowerCount(username string, result chan<- int, wg *sync.WaitGroup) {
    defer wg.Done()
    reqURL := "https://www.instagram.com/" + username + "/?__a=1"
    resp, err := http.Get(reqURL)
    if err != nil {
        log.Println(err)
        return
    }
    defer resp.Body.Close()

    var u user
    if err := json.NewDecoder(resp.Body).Decode(&u); err != nil {
        log.Println(err)
        return
    }
    result <- u.User.Followers.Count
}

func execute(users []string, result chan<- int) {
    wg := &sync.WaitGroup{}
    for _, username := range users {
        wg.Add(1)
        go getFollowerCount(username, result, wg)
    }
    wg.Wait()
    result <- -1
}

func main() {
    start := time.Now()
    fmt.Println("STARTING UP")
    usrs := []string{"kanywest", "kimkardashian", "groovyq", "kendricklamar", "barackobama", "asaprocky", "champagnepapi", "eminem", "drdre", "g_eazy", "skrillex"}

    result := make(chan int)
    go execute(usrs, result)

    for v := range result {
        if v == -1 {
            break
        }
        fmt.Println(v)
    }

    elapsed := time.Since(start)
    fmt.Println("runtime:", elapsed)
}

我同意 jeevatkm,有很多方法可以实现你的任务并改进它。一些注意事项:

  1. 将实际执行工作的功能(即从远程服务获取结果)与负责协调所有工作的功能分开。
  2. 最好将 error 传播给调用者,而不是在要调用的函数中使用(处理)它。
  3. 由于作业是在 并行 中完成的,结果可能会以不确定的顺序返回。因此,除了关注者数量外,结果还应包含其他相关信息。

以下实现可能是一种替代方法:

package main

import (
    "encoding/json"
    "errors"
    "fmt"
    "net/http"
    "sync"
    "time"
)

type user struct {
    User userData `json:"user"`
}

type userData struct {
    Followers count `json:"followed_by"`
}

type count struct {
    Count int `json:"count"`
}

//Wrap username, count, and error. See (3) above.
type follower struct {
    Username string
    Count    int
    Error    error
}

//GetFollowerCountFunc is a function for
//fetching follower count of a specific user.
type GetFollowerCountFunc func(string) (int, error)

//Mockup function for test
func mockGetFollowerCountFor(userName string) (int, error) {
    if len(userName) < 9 {
        return -1, errors.New("mocking error in get follower count")
    }
    return 10, nil
}

//Fetch result from remote service. See (1) above.
func getFollowerCountFor(userName string) (int, error) {
    URL := "https://www.instagram.com/" + userName + "/?__a=1"
    resp, err := http.Get(URL)
    if err != nil {
        return -1, err
    }
    defer resp.Body.Close()

    var u user
    if err := json.NewDecoder(resp.Body).Decode(&u); err != nil {
        return -1, err
    }
    return u.User.Followers.Count, nil
}

//Function that coordinates/distributes the jobs. See (1), (2) above.
func getFollowersAsync(users []string, fn GetFollowerCountFunc) <-chan follower {
    //allocate channels for storing result
    //number of allocated channels define the maximum *parallel* worker
    followers := make(chan follower, len(users))
    //The following is also valid
    //followers := make(chan follower, 5)

    //Do the job distribution in goroutine (Asynchronously)
    go func() {
        var wg sync.WaitGroup
        wg.Add(len(users))
        for _, u := range users {
            //Run a *parallel* worker
            go func(uid string) {
                cnt, err := fn(uid)
                if err != nil {
                    followers <- follower{uid, -1, err}
                } else {
                    followers <- follower{uid, cnt, nil}
                }
                wg.Done()
            }(u)
        }
        //wait all workers finish
        wg.Wait()

        //close the channels so the `for ... range` will exit gracefully
        close(followers)
    }()

    //This function will returns immediately
    return followers
}

func main() {
    start := time.Now()
    fmt.Println("STARTING UP")
    usrs := []string{"kanywest", "kimkardashian", "groovyq", "kendricklamar", "barackobama", "asaprocky", "champagnepapi", "eminem", "drdre", "g_eazy", "skrillex"}

    results := getFollowersAsync(usrs, getFollowerCountFor)
    //For TESTING:
    //results := getFollowersAsync(usrs, mockGetFollowerCountFor)
    for r := range results {
        if r.Error != nil {
            fmt.Printf("Error for user '%s' => %v", r.Username, r.Error)
        } else {
            fmt.Printf("%s: %d\n", r.Username, r.Count)
        }
    }

    elapsed := time.Since(start)
    fmt.Println("runtime", elapsed)
}