рдкреЗрд╢ рд╣реИ рдЧреЛ, рднрд╛рдЧ 2: рдПрдХ рдмрд╛рд▓ рдХрд▓рд╛рдХрд╛рд░ рдФрд░ рд╡рд┐рдХреГрддрд┐рдпреЛрдВ рдХреЗ рд╕рд╛рде рдПрдХ рдЗрдореЗрдЬ рдЧреНрд░реИрдмрд░ рд▓рд┐рдЦрдирд╛

рдХрд╛рд░реНрдп


рд╣рд╛рд▓ рд╣реА рдореЗрдВ, рдореИрдВрдиреЗ рдмрд╛рдд рдХреА рдХрд┐ рдХреИрд╕реЗ рдореИрдВрдиреЗ рдПрдХ рдЧреБрдкреНрдд рдорд┐рд╢рди рдХреЛ рдЕрдВрдЬрд╛рдо рджрд┐рдпрд╛ рдФрд░ рдЧреЛ рдХреА рдорджрдж рд╕реЗ рдореИрдВрдиреЗ "рд╢реВрдиреНрдпрддрд╛" рд╕реЗ рдПрдХ рдЙрджреНрдзрд░рдг рдбрдВрдк рдбрд╛рдЙрдирд▓реЛрдб рдХрд┐рдпрд╛ ред рдлрд┐рд░ рд╕реЗ рд▓рдбрд╝рд╛рдИ рдореЗрдВ рд╢рд╛рдорд┐рд▓ рд╣реЛрдиреЗ рдХрд╛ рд╕рдордп рдЖ рдЧрдпрд╛ рд╣реИ, рдЗрд╕ рдмрд╛рд░ рдпрд╣ рдЕрддрд╛рдд рдХреА рдЪрд┐рдВрддрд╛ рдХрд░рддрд╛ рд╣реИ, рдФрд░ рди рдХреЗрд╡рд▓ рдЗрд╕рд▓рд┐рдП рдХрд┐ рдпрд╣ рд╢реВрдиреНрдпрддрд╛ рдХреЗ рд╕рд╛рде рдЧрд╛рдпрд╛ рдЬрд╛рддрд╛ рд╣реИред

рдЬреЛ рд▓реЛрдЧ рдЪреВрдХ рдЧрдП рдереЗ - рдПрдХ рд╕рдордп рдореЗрдВ "рдерд┐рдореЗрдЯрд┐рдХ рдореАрдбрд┐рдпрд╛" (рд╣рд╛рдБ, рд╣рдм рдХреЗ рдорд╛рд▓рд┐рдХ) рдиреЗ рдкреНрд░реЛрдЬреЗрдХреНрдЯ "рд░рд┐рд╕реНрдкреЗрдХреНрдЯ" (рдПрдХ рд╕реЗрдХреНрд╢рди рдХреЗ рд╕рд╛рде рдЧреНрд▓реИрдорд░рд╕ рдЗрдореЗрдЬрдмреЛрд░реНрдб / рдЯреЙрдЧрд▓ рд╕реНрд╡рд┐рдЪ рдХрд╛ рдПрдХ рдкреНрд░рдХрд╛рд░) рд▓реЙрдиреНрдЪ рдХрд┐рдпрд╛ рдерд╛, рд╕рд░рд▓ рд▓рдбрд╝рдХрд┐рдпреЛрдВ рдФрд░ рдкреНрд░реЛрдЧреНрд░рд╛рдорд░реНрд╕ рдХреЛ рдмреИрдирд░реЛрдВ рдХрд╛ рдЗрдВрддрдЬрд╛рд░ рдХрд░рдХреЗ рд╡рд╣рд╛рдБ рдлреБрд╕рд▓рд╛рдпрд╛ рдЧрдпрд╛ рдерд╛ рдФрд░ рд╕реНрдкрд╖реНрдЯ рд░реВрдк рд╕реЗ рдЙрдореНрдореАрдж рдереА рдХрд┐ рдПрдХ рд╕рд╛рде рдмрдврд╝реЗрдВред рдпрд╣ рдПрдХ рд╕рд╛рде рд╡рд┐рдХрд╕рд┐рдд рдирд╣реАрдВ рд╣реБрдЖ, рдФрд░ рдереЛрдбрд╝реА рджреЗрд░ рдХреЗ рдмрд╛рдж рд░реЗрд╕реНрдкреЗрдХреНрдЯрд╛ рдХреЛ рдПрдЯреИрдЯ рдореЗрдВ рдЙрддреНрдкрд░рд┐рд╡рд░реНрддрд┐рдд рдХрд┐рдпрд╛ рдЧрдпрд╛, рдЬрд┐рд╕рдореЗрдВ рдкрд╣рд▓реЗ рд╕реЗ рд╣реА рдХрд╕реНрдЯрдо рдмреЛрд░реНрдб рд╕реНрдЯреНрд░реАрдо рдмрдирд╛рдиреЗ рдХреЗ рд▓рд┐рдП рдХрд╛рд░реНрдпрдХреНрд╖рдорддрд╛ рдереА, рд╡рд┐рд╖рдпрдЧрдд рдзрд╛рд░рд╛рдУрдВ рдХреЗ рд╕рдВрдпреЛрдЬрди рдХреЗ рд▓рд┐рдП рд╣рдм, рдФрд░ рдЗрд╕реА рддрд░рд╣ред рдХреБрдЫ рд╕рдордп рдХреЗ рд▓рд┐рдП рдкрд░рд┐рдпреЛрдЬрдирд╛ рд╡рд┐рдХрд╕рд┐рдд рд╣реБрдИ, рд▓реЗрдХрд┐рди рд▓рдбрд╝рдХрд┐рдпреЛрдВ рдиреЗ рдЫреЛрдбрд╝ рджрд┐рдпрд╛, рдкреНрд░реЛрдЧреНрд░рд╛рдорд░ рдмрдиреЗ рд░рд╣реЗ, рдФрд░ рдЙрдирдореЗрдВ рд╕реЗ рдмрд╣реБрдд рдХрдо рдереЗред рдЕрдм рдЗрд╕ рдЬрдЧрд╣ рдХреЛ рд▓рдЧрднрдЧ рдЫреЛрдбрд╝ рджрд┐рдпрд╛ рдЧрдпрд╛ рд╣реИ, рдкреНрд▓реЗрдЧ рд╣рд╡рд╛рдПрдВ, рдкрд░рд┐рддреНрдпрдХреНрдд рд░реЗрд▓рдЧрд╛рдбрд╝рд┐рдпрд╛рдВ, рдЕрдВрдзреЗрд░рд╛ рд╣реИ рдФрд░ рдХрдИ рдЬреАрд╡рд┐рдд рдмрдЪреЗ рдкреБрд░рд╛рдиреЗ рдЬрд╛рдирд╡рд░реЛрдВ рдХреА рдХреЗрд╡рд▓ рджреБрд░реНрд▓рдн рд░реЛрдПрдВ рдХрднреА-рдХрднреА рд░рд╛рдд рдореЗрдВ html рдХреЗ рдорд╛рдзреНрдпрдо рд╕реЗ рдХрдЯ рдЬрд╛рддреА рд╣реИрдВред

рдФрд░ рдареАрдХ рд╣реИ, рдЙрд╕рдХреЗ рд╕рд╛рде рдпрд╣ рдЕрдЯрд╛рдЯ, рдЗрд╕ рдЕрдЯрд╛рдЯ рдХреЗ рд╕рд╛рдеред рд▓реЗрдХрд┐рди рддрд╕реНрд╡реАрд░реЛрдВ рдХреЗ рд░реВрдк рдореЗрдВ рдЙрддреНрдХреГрд╖реНрдЯ рд╕рд╛рдордЧреНрд░реА рдХрд╛ рдПрдХ рдмрд╣реБрдд рдХреБрдЫ рд╣реИ, рдЗрд╕рдХреЗ рд▓рд╛рдпрдХ рд╡реЙрд▓рдкреЗрдкрд░ рдХреЗ рд╕рд╛рде рдПрдХ рдзрд╛рдЧрд╛! рдЗрд╕рд▓рд┐рдП, рдпрд╣ рдЖрд╢реНрдЪрд░реНрдпрдЬрдирдХ рдирд╣реАрдВ рд╣реИ рдХрд┐ рдореБрдЦреНрдпрд╛рд▓рдп рд╕реЗ рдирд┐рдореНрди рдорд┐рд╢рди рдирд┐рдореНрдирд╛рдиреБрд╕рд╛рд░ рджреЗрдЦрд╛ рдЧрдпрд╛ рд╣реИ:
тАЬрддрддреНрдХрд╛рд▓ # 949 # 291 рдзрд╛рд░рд╛ рд╕реЗ рд╕рднреА рдЫрд╡рд┐рдпреЛрдВ рдХреЛ рдмрдЪрд╛рдиреЗ рдХреЗ рд▓рд┐рдПред psред рд╡рд┐рдХреГрдд рд╣реЛ рдЧрдпрд╛ред тАЭ

рдЦреИрд░, рдПрдХ рдХрд╛рд░реНрдп рд╣реИ - рдЖрдкрдХреЛ рдХрд╛рдо рдХрд░рдиреЗ рдХреА рдЖрд╡рд╢реНрдпрдХрддрд╛ рд╣реИред рд╡рд╛рд╕реНрддрд╡ рдореЗрдВ, рд╣рдорд╛рд░реЗ рдкрд╛рд╕ рдкрд╣рд▓реЗ рд╕реЗ рд╣реА рд╡рд┐рдХреГрдд рд╕рдВрд░рдЪрдирд╛рдУрдВ рдореЗрдВ рдХреБрдЫ рдкреНрд░рдХрд╛рд░ рдХреЗ рдХреЙрдлреА рдЪреИрдВрдкрд┐рдпрди рд╣реИрдВ, рд▓реЗрдХрд┐рди рдЖрдк рдЧреЛ рдкрд░ рдХреБрдЫ рднреА рд╕рдордЭ рд╕рдХрддреЗ рд╣реИрдВ! рдкреЗрдВрдЯ рдореЗрдВ 9 рдШрдВрдЯреЗ рдХреЗ рдмрд╛рдж, рдореЗрд░реЗ рдкрд╛рд╕ рдирд┐рдореНрдирд▓рд┐рдЦрд┐рдд рд╣рдорд▓реЗ рдХреА рдпреЛрдЬрдирд╛ рдереА:



рд╡рд┐рдХреГрддрд┐ рдХреА рдбрд┐рдЧреНрд░реА рдЕрджреНрднреБрдд рд╣реИ, рдЕрдзрд┐рдХрд╛рд░рд┐рдпреЛрдВ рдХреЛ рдХрд╛рдлреА рд╣реЛрдЧрд╛! рдЖрдЗрдП рд╡рд┐рдЪрд╛рд░ рдХрд░реЗрдВ рдХрд┐ рдпрд╣рд╛рдВ рдХреНрдпрд╛ рд╣реИ рдФрд░ рдХреИрд╕реЗ:

"рдЬреЗрдирд░реЗрдЯрд░" HTML рдлрд╝рд╛рдЗрд▓ рд╕реЗ рдкреЗрдЬ рдХреЛрдб рдбрд╛рдЙрдирд▓реЛрдб рдХрд░реЗрдЧрд╛, рдЪрд┐рддреНрд░реЛрдВ рдХреЗ url рдХреЛ рдЦреАрдВрдЪреЗрдЧрд╛ рдФрд░ рдЙрдиреНрд╣реЗрдВ "Balancer" рдореЗрдВ рд╕реНрдерд╛рдирд╛рдВрддрд░рд┐рдд рдХрд░ рджреЗрдЧрд╛, рдЬреЛ рдЗрди urls рдХреЛ "рд╡рд░реНрдХрд░реНрд╕" рдХреА рдПрдХ рдирд┐рд╢реНрдЪрд┐рдд рд╕рдВрдЦреНрдпрд╛ рддрдХ рдкрд╣реБрдВрдЪрд╛рддрд╛ рд╣реИ (рдЬрд┐рдирдореЗрдВ рд╕реЗ рдкреНрд░рддреНрдпреЗрдХ рдХреА рдЕрдкрдиреА рдЫреЛрдЯреА рд╕реА рдХрддрд╛рд░ рдХрддрд╛рд░ рд╣реИ), рдпрд╣ рд╕реБрдирд┐рд╢реНрдЪрд┐рдд рдХрд░рддреЗ рд╣реБрдП рдХрд┐ рд╕рднреА рдХрд╛рд░реНрдпрдХрд░реНрддрд╛ рд╕рдорд╛рди рд░реВрдк рд╕реЗ рд▓реЛрдб рдХрд┐рдП рдЧрдП рд╣реИрдВ ред рдЦреИрд░, "рдХрд╛рд░реНрдпрдХрд░реНрддрд╛" рдЪрд┐рддреНрд░ рдЕрдкрд▓реЛрдб рдХрд░рддреЗ рд╣реИрдВ рдФрд░ рдПрдХ рдЫреБрдЖ рдмрд╛рд▓реНрдХрди рдХреА рдЖрдВрдЦ рдХреЛ рдкреНрд░рд╕рдиреНрди рдХрд░рддреЗ рд╣реИрдВред рдпрд╣ рдмреЗрдорд╛рдиреА рд▓рдЧ рд░рд╣рд╛ рд╣реИ, рд▓реЗрдХрд┐рди рдпреБрджреНрдз рдХреЗ рдмрд╛рд░реЗ рдореЗрдВ! рдЖрдЗрдП рдЬрд╛рдиреЗ:

рдЬрдирдХ


рдпрд╣ рд╕рдмрд╕реЗ рдЖрд╕рд╛рди рд╣реЛрдЧрд╛, рдПрдХ рдЪрдХреНрд░ рдореЗрдВ рдЬрдм рд╣рдо рдкреГрд╖реНрдареЛрдВ рдХреЛ рд▓реЛрдб рдХрд░рддреЗ рд╣реИрдВ, рдЙрдиреНрд╣реЗрдВ рдкрд╛рд░реНрд╕ рдХрд░рддреЗ рд╣реИрдВ рдФрд░ рдПрдХ рдЪреИрдирд▓ рдореЗрдВ рдЪрд┐рддреНрд░реЛрдВ рдХреЗ рд▓рдореНрдмреА рдпреВрдЖрд░рдПрд▓ рдХреЛ рдлреАрдб рдХрд░рддреЗ рд╣реИрдВ (рдореИрдВрдиреЗ рдкрд┐рдЫрд▓реЗ рд▓реЗрдЦ рдореЗрдВ рдЪреИрдирд▓, рд▓реМрдХреА рдФрд░ рдХреБрдЫ рдЕрдиреНрдп рдЪреАрдЬреЛрдВ рдХреЗ рдмрд╛рд░реЗ рдореЗрдВ рд╕рддрд╣реА рд░реВрдк рд╕реЗ рдмрд╛рдд рдХреА рдереА, рдЗрд╕рд▓рд┐рдП рдореИрдВрдиреЗ рдпрд╣рд╛рдВ рджреЛрд╣рд░рд╛ рдирд╣реАрдВ рдХрд┐рдпрд╛)ред
Atat рдХреЛ рд╡рд░реНрдЧреЛрдВ рдореЗрдВ рд╡рд┐рднрд╛рдЬрд┐рдд рдХрд┐рдпрд╛ рдЬрд╛рддрд╛ рд╣реИ (рдЬрд┐рд╕реЗ "рд╕реНрдЯреНрд░реАрдо" рдХрд╣рд╛ рдЬрд╛рддрд╛ рд╣реИ), рдкреНрд░рддреНрдпреЗрдХ "рд╕реНрдЯреНрд░реАрдо" рдХреЗ рдПрдХ рдкреГрд╖реНрда рдореЗрдВ 20 рдкреЛрд╕реНрдЯ рд╣реЛрддреЗ рд╣реИрдВред рд▓рд┐рдВрдХ рдЗрд╕ рддрд░рд╣ рджрд┐рдЦрддреЗ рд╣реИрдВ: " home.atata.com/streams/291?order=date&from=40 ", рдЬрд╣рд╛рдВ 291 рдкреНрд░рд╡рд╛рд╣ рдЖрдИрдбреА рд╣реИ рдФрд░ 40 рдЕрдВрдд рд╕реЗ рдкрджреЛрдВ рдореЗрдВ рдЗрдВрдбреЗрдВрдЯреЗрд╢рди рд╣реИ (рдЕрд░реНрдерд╛рдд, рдкрд╣рд▓реЗ рдЬрд┐рддрдирд╛ рдЕрдзрд┐рдХ)ред рдкреЛрд╕реНрдЯ рдЯреЗрдореНрдкрд▓реЗрдЯ рдХреЛрдб рдЬрд┐рд╕реЗ рд╣рдо рдЗрд╕ рддрд░рд╣ рд╕реЗ рджреЗрдЦрдирд╛ рдЪрд╛рд╣рддреЗ рд╣реИрдВ:

<figure> <figcaption></figcaption> <a class='image' href=''> <img class='thumb' rel='' src='' /> </a> </figure> 

рд╣рдо url рдореЗрдВ " рд╕реЗ " рдмрдврд╝рддреЗ рдкреГрд╖реНрдареЛрдВ рдХреЗ рдорд╛рдзреНрдпрдо рд╕реЗ рдЬрд╛рдПрдВрдЧреЗ, рдЫрд╡рд┐ рд╡рд░реНрдЧ рдХреЗ рд╕рд╛рде рд▓рд┐рдВрдХ рдЦреАрдВрдЪреЗрдВрдЧреЗ рдФрд░ рдЙрдиреНрд╣реЗрдВ рдЪреИрдирд▓ рдкрд░ рднреЗрдЬреЗрдВрдЧреЗред рд▓реЗрдХрд┐рди рдХрдм рддрдХ? рдпрд╣ рдкрддрд╛ рдЪрд▓рддрд╛ рд╣реИ рдХрд┐ рдзрд╛рд░рд╛ рдХреЗ рдЕрдВрддрд┐рдо рдкреГрд╖реНрда рдкрд░ рдЕрдЧрд▓реЗ рдмреИрдЪ рдХреЗ рдкрджреЛрдВ рдХреЗ рд▓рд┐рдП рдПрдХ рдЫрд┐рдкреА рдбрд╛рдЙрдирд▓реЛрдб рдмрдЯрди рдХреЗ рд░реВрдк рдореЗрдВ рдПрдХ рдкрд╣рдЪрд╛рди рдЪрд┐рд╣реНрди рд╣реИ (
 ),      .                 .  ,  ,         ,        . 

, , goquery (http://github.com/opesun/goquery). , :

// atatagrab project main.go package main import ( "fmt" // : "github.com/opesun/goquery" "strconv" ) const ( // : ENDMESSAGE = "TooLateToDieYoung" ) // func generator(out chan string, stream, start int) { for pos := start; ; pos += 20 { // : x, err := goquery.ParseUrl("http://home.atata.com/streams/" + strconv.Itoa(stream) + "?order=date&from=" + strconv.Itoa(pos)) if err == nil { // : for _, url := range x.Find("figure a.image").Attrs("href") { out <- "http://atata.com/" + url } // - .. if len(x.Find("li.last.hide")) > 0 { out <- ENDMESSAGE //.. return } } } }



,
, , ( ). - , , . , , , :

" , - ?"

, . PMFC (Poor Man's Flow Control " ". , ( ) ).

" , , ?"

! - " " " ". ? , :
( ) - . - ( - ?).
. "":

// type Worker struct { urls chan string // pending int // - index int // wg *sync.WaitGroup // }
: Worker , struct - record .
" ?" - ? Go : . , - , , :

- - - . " " ( WaitGroup ) sync . : WaitGroup , , . , .
, Worker'a.

"" work ( , "", this. ):

// func (w *Worker) work(done chan *Worker) { for { url := <-w.urls // w.wg.Add(1) // download(url) // w.wg.Done() // done <- w // } }
sync :

import ( "sync" )
: done , work() . , ( ).
download() - , . .

Worker Pool , , ( Wikipedia: ( ) ), . " container/heap ", ( - ).

: - , :

package main import ("fmt") // type Animal interface { Say() string } type Cat struct{} func (c Cat) Say() string { return "!" } type Parrot struct { name string } func (p Parrot) Say() string { return p.name + " !" } func main() { kitty := Cat{} popka := Parrot{name: ""} animals := []Animal{kitty, popka} for _, animal := range animals { fmt.Println(animal.Say()) } }
, container.Heap ( http://golang.org/pkg/container/heap/#Interface ) :

type Interface interface { sort.Interface Push(x interface{}) // add x as element Len() Pop() interface{} // remove and return element Len() - 1. }
, Push Pop sort.Interface ( http://golang.org/pkg/sort/#Interface ), :

type Interface interface { // Len is the number of elements in the collection. Len() int // Less returns whether the element with index i should sort // before the element with index j. Less(i, j int) bool // Swap swaps the elements with indexes i and j. Swap(i, j int) }
, :

// "": type Pool []*Worker // - : func (p Pool) Less(i, j int) bool { return p[i].pending < p[j].pending } // : func (p Pool) Len() int { return len(p) } // : func (p Pool) Swap(i, j int) { if i >= 0 && i < len(p) && j >= 0 && j < len(p) { p[i], p[j] = p[j], p[i] p[i].index, p[j].index = i, j } } // : func (p *Pool) Push(x interface{}) { n := len(*p) worker := x.(*Worker) worker.index = n *p = append(*p, worker) } // : func (p *Pool) Pop() interface{} { old := *p n := len(old) item := old[n-1] item.index = -1 *p = old[0 : n-1] return item }
, interface{} - , ( , interface{}). any , . : http://research.swtch.com/interfaces
, heap.* Pool, "" .
"container/heap" :

import ( "container/heap" )
. , , , :

// type Balancer struct { pool Pool // "" done chan *Worker // requests chan string // flowctrl chan bool // PMFC queue int // wg *sync.WaitGroup // }
, init() :

// . func (b *Balancer) init(in chan string) { b.requests = make(chan string) b.flowctrl = make(chan bool) b.done = make(chan *Worker) b.wg = new(sync.WaitGroup) // Flow Control: go func() { for { b.requests <- <-in // <-b.flowctrl // } }() // : heap.Init(&b.pool) for i := 0; i < WORKERS; i++ { w := &Worker{ urls: make(chan string, WORKERSCAP), index: 0, pending: 0, wg: b.wg, } go w.work(b.done) // heap.Push(&b.pool, w) // } }
- "" . , - - !
, : make() , . , - " " ; , .

init() WORKERS WORKERSCAP , :

var ( WORKERS = 5 // WORKERSCAP = 5 // )
. , , ( , . , . , , ) :

// func (b *Balancer) balance(quit chan bool) { lastjobs := false // , for { select { // : case <-quit: // b.wg.Wait() // .. quit <- true //.. case url := <-b.requests: // ( flow controller) if url != ENDMESSAGE { // - ? b.dispatch(url) // , } else { lastjobs = true // } case w := <-b.done: // , b.completed(w) // if lastjobs { if w.pending == 0 { // .. heap.Remove(&b.pool, w.index) // } if len(b.pool) == 0 { // // quit <- true // } } } } }
:

// func (b *Balancer) dispatch(url string) { w := heap.Pop(&b.pool).(*Worker) // .. w.urls <- url //.. . w.pending++ // "".. heap.Push(&b.pool, w) //.. if b.queue++; b.queue < WORKERS*WORKERSCAP { b.flowctrl <- true } }
.. . : , , , .
completed , :

// func (b *Balancer) completed(w *Worker) { w.pending-- heap.Remove(&b.pool, w.index) heap.Push(&b.pool, w) if b.queue--; b.queue == WORKERS*WORKERSCAP-1 { b.flowctrl <- true } }
, , , . : , dispatch , , , - ?
, . - download(), :

// func download(url string) { fileName := IMGDIR + "/" + url[strings.LastIndex(url, "/")+1:] output, err := os.Create(fileName) defer output.Close() response, err := http.Get(url) if err != nil { fmt.Println("Error while downloading", url, "-", err) return } defer response.Body.Close() io.Copy(output, response.Body) }
- - ( ) ( , ):

// : var ( WORKERS = 5 // WORKERSCAP = 5 // ATATASTREAM = 291 //id ATATAPOS = 0 // IMGDIR = "img" // ) // : func init() { flag.IntVar(&WORKERS, "w", WORKERS, " ") flag.IntVar(&ATATASTREAM, "s", ATATASTREAM, "id ") flag.IntVar(&ATATAPOS, "p", ATATAPOS, " ") flag.StringVar(&IMGDIR, "d", IMGDIR, " ") }
- . , , os/signal , ctrl-c :

import ( "io" "os" "os/signal" "sync" "flag" "fmt" "container/heap" "strconv" "strings" "github.com/opesun/goquery" "net/http" )
!


func main() { // flag.Parse() // , if err := os.MkdirAll(IMGDIR, 666); err != nil { panic(err) } // links := make(chan string) quit := make(chan bool) b := new(Balancer) b.init(links) // keys keys := make(chan os.Signal, 1) signal.Notify(keys, os.Interrupt) // go b.balance(quit) go generator(links, ATATASTREAM, ATATAPOS) fmt.Println(" ") // : for { select { case <-keys: // : fmt.Println("CTRL-C: ") quit <- true // case <-quit: // fmt.Println(" !") return } } }
.. ( )



.. . , - ! .

time.Sleep , . , "" .
UPD. - ( 3 744 , 3.07Gb ) .

, .

pastbin.com

PS , #291 () "" 10 , ( "-p=200" ) .

UPD2: PPS "" : - Go, , . , , - + . , : , .

,
vimeo.com/49718712 - Rob Pike - 'Concurrency Is Not Parallelism'
sites.google.com/site/gopatterns - Go Language Patterns
golangtutorials.blogspot.ru/2011/05/table-of-contents.html - GoLang Tutorials
talks.golang.org - Go

, !
), . . , , , .

, , goquery (http://github.com/opesun/goquery). , :

// atatagrab project main.go package main import ( "fmt" // : "github.com/opesun/goquery" "strconv" ) const ( // : ENDMESSAGE = "TooLateToDieYoung" ) // func generator(out chan string, stream, start int) { for pos := start; ; pos += 20 { // : x, err := goquery.ParseUrl("http://home.atata.com/streams/" + strconv.Itoa(stream) + "?order=date&from=" + strconv.Itoa(pos)) if err == nil { // : for _, url := range x.Find("figure a.image").Attrs("href") { out <- "http://atata.com/" + url } // - .. if len(x.Find("li.last.hide")) > 0 { out <- ENDMESSAGE //.. return } } } }



,
, , ( ). - , , . , , , :

" , - ?"

, . PMFC (Poor Man's Flow Control " ". , ( ) ).

" , , ?"

! - " " " ". ? , :
( ) - . - ( - ?).
. "":

// type Worker struct { urls chan string // pending int // - index int // wg *sync.WaitGroup // }
: Worker , struct - record .
" ?" - ? Go : . , - , , :

- - - . " " ( WaitGroup ) sync . : WaitGroup , , . , .
, Worker'a.

"" work ( , "", this. ):

// func (w *Worker) work(done chan *Worker) { for { url := <-w.urls // w.wg.Add(1) // download(url) // w.wg.Done() // done <- w // } }
sync :

import ( "sync" )
: done , work() . , ( ).
download() - , . .

Worker Pool , , ( Wikipedia: ( ) ), . " container/heap ", ( - ).

: - , :

package main import ("fmt") // type Animal interface { Say() string } type Cat struct{} func (c Cat) Say() string { return "!" } type Parrot struct { name string } func (p Parrot) Say() string { return p.name + " !" } func main() { kitty := Cat{} popka := Parrot{name: ""} animals := []Animal{kitty, popka} for _, animal := range animals { fmt.Println(animal.Say()) } }
, container.Heap ( http://golang.org/pkg/container/heap/#Interface ) :

type Interface interface { sort.Interface Push(x interface{}) // add x as element Len() Pop() interface{} // remove and return element Len() - 1. }
, Push Pop sort.Interface ( http://golang.org/pkg/sort/#Interface ), :

type Interface interface { // Len is the number of elements in the collection. Len() int // Less returns whether the element with index i should sort // before the element with index j. Less(i, j int) bool // Swap swaps the elements with indexes i and j. Swap(i, j int) }
, :

// "": type Pool []*Worker // - : func (p Pool) Less(i, j int) bool { return p[i].pending < p[j].pending } // : func (p Pool) Len() int { return len(p) } // : func (p Pool) Swap(i, j int) { if i >= 0 && i < len(p) && j >= 0 && j < len(p) { p[i], p[j] = p[j], p[i] p[i].index, p[j].index = i, j } } // : func (p *Pool) Push(x interface{}) { n := len(*p) worker := x.(*Worker) worker.index = n *p = append(*p, worker) } // : func (p *Pool) Pop() interface{} { old := *p n := len(old) item := old[n-1] item.index = -1 *p = old[0 : n-1] return item }
, interface{} - , ( , interface{}). any , . : http://research.swtch.com/interfaces
, heap.* Pool, "" .
"container/heap" :

import ( "container/heap" )
. , , , :

// type Balancer struct { pool Pool // "" done chan *Worker // requests chan string // flowctrl chan bool // PMFC queue int // wg *sync.WaitGroup // }
, init() :

// . func (b *Balancer) init(in chan string) { b.requests = make(chan string) b.flowctrl = make(chan bool) b.done = make(chan *Worker) b.wg = new(sync.WaitGroup) // Flow Control: go func() { for { b.requests <- <-in // <-b.flowctrl // } }() // : heap.Init(&b.pool) for i := 0; i < WORKERS; i++ { w := &Worker{ urls: make(chan string, WORKERSCAP), index: 0, pending: 0, wg: b.wg, } go w.work(b.done) // heap.Push(&b.pool, w) // } }
- "" . , - - !
, : make() , . , - " " ; , .

init() WORKERS WORKERSCAP , :

var ( WORKERS = 5 // WORKERSCAP = 5 // )
. , , ( , . , . , , ) :

// func (b *Balancer) balance(quit chan bool) { lastjobs := false // , for { select { // : case <-quit: // b.wg.Wait() // .. quit <- true //.. case url := <-b.requests: // ( flow controller) if url != ENDMESSAGE { // - ? b.dispatch(url) // , } else { lastjobs = true // } case w := <-b.done: // , b.completed(w) // if lastjobs { if w.pending == 0 { // .. heap.Remove(&b.pool, w.index) // } if len(b.pool) == 0 { // // quit <- true // } } } } }
:

// func (b *Balancer) dispatch(url string) { w := heap.Pop(&b.pool).(*Worker) // .. w.urls <- url //.. . w.pending++ // "".. heap.Push(&b.pool, w) //.. if b.queue++; b.queue < WORKERS*WORKERSCAP { b.flowctrl <- true } }
.. . : , , , .
completed , :

// func (b *Balancer) completed(w *Worker) { w.pending-- heap.Remove(&b.pool, w.index) heap.Push(&b.pool, w) if b.queue--; b.queue == WORKERS*WORKERSCAP-1 { b.flowctrl <- true } }
, , , . : , dispatch , , , - ?
, . - download(), :

// func download(url string) { fileName := IMGDIR + "/" + url[strings.LastIndex(url, "/")+1:] output, err := os.Create(fileName) defer output.Close() response, err := http.Get(url) if err != nil { fmt.Println("Error while downloading", url, "-", err) return } defer response.Body.Close() io.Copy(output, response.Body) }
- - ( ) ( , ):

// : var ( WORKERS = 5 // WORKERSCAP = 5 // ATATASTREAM = 291 //id ATATAPOS = 0 // IMGDIR = "img" // ) // : func init() { flag.IntVar(&WORKERS, "w", WORKERS, " ") flag.IntVar(&ATATASTREAM, "s", ATATASTREAM, "id ") flag.IntVar(&ATATAPOS, "p", ATATAPOS, " ") flag.StringVar(&IMGDIR, "d", IMGDIR, " ") }
- . , , os/signal , ctrl-c :

import ( "io" "os" "os/signal" "sync" "flag" "fmt" "container/heap" "strconv" "strings" "github.com/opesun/goquery" "net/http" )
!


func main() { // flag.Parse() // , if err := os.MkdirAll(IMGDIR, 666); err != nil { panic(err) } // links := make(chan string) quit := make(chan bool) b := new(Balancer) b.init(links) // keys keys := make(chan os.Signal, 1) signal.Notify(keys, os.Interrupt) // go b.balance(quit) go generator(links, ATATASTREAM, ATATAPOS) fmt.Println(" ") // : for { select { case <-keys: // : fmt.Println("CTRL-C: ") quit <- true // case <-quit: // fmt.Println(" !") return } } }
.. ( )



.. . , - ! .

time.Sleep , . , "" .
UPD. - ( 3 744 , 3.07Gb ) .

, .

pastbin.com

PS , #291 () "" 10 , ( "-p=200" ) .

UPD2: PPS "" : - Go, , . , , - + . , : , .

,
vimeo.com/49718712 - Rob Pike - 'Concurrency Is Not Parallelism'
sites.google.com/site/gopatterns - Go Language Patterns
golangtutorials.blogspot.ru/2011/05/table-of-contents.html - GoLang Tutorials
talks.golang.org - Go

, !

), . . , , , .

, , goquery (http://github.com/opesun/goquery). , :

// atatagrab project main.go package main import ( "fmt" // : "github.com/opesun/goquery" "strconv" ) const ( // : ENDMESSAGE = "TooLateToDieYoung" ) // func generator(out chan string, stream, start int) { for pos := start; ; pos += 20 { // : x, err := goquery.ParseUrl("http://home.atata.com/streams/" + strconv.Itoa(stream) + "?order=date&from=" + strconv.Itoa(pos)) if err == nil { // : for _, url := range x.Find("figure a.image").Attrs("href") { out <- "http://atata.com/" + url } // - .. if len(x.Find("li.last.hide")) > 0 { out <- ENDMESSAGE //.. return } } } }



,
, , ( ). - , , . , , , :

" , - ?"

, . PMFC (Poor Man's Flow Control " ". , ( ) ).

" , , ?"

! - " " " ". ? , :
( ) - . - ( - ?).
. "":

// type Worker struct { urls chan string // pending int // - index int // wg *sync.WaitGroup // }
: Worker , struct - record .
" ?" - ? Go : . , - , , :

- - - . " " ( WaitGroup ) sync . : WaitGroup , , . , .
, Worker'a.

"" work ( , "", this. ):

// func (w *Worker) work(done chan *Worker) { for { url := <-w.urls // w.wg.Add(1) // download(url) // w.wg.Done() // done <- w // } }
sync :

import ( "sync" )
: done , work() . , ( ).
download() - , . .

Worker Pool , , ( Wikipedia: ( ) ), . " container/heap ", ( - ).

: - , :

package main import ("fmt") // type Animal interface { Say() string } type Cat struct{} func (c Cat) Say() string { return "!" } type Parrot struct { name string } func (p Parrot) Say() string { return p.name + " !" } func main() { kitty := Cat{} popka := Parrot{name: ""} animals := []Animal{kitty, popka} for _, animal := range animals { fmt.Println(animal.Say()) } }
, container.Heap ( http://golang.org/pkg/container/heap/#Interface ) :

type Interface interface { sort.Interface Push(x interface{}) // add x as element Len() Pop() interface{} // remove and return element Len() - 1. }
, Push Pop sort.Interface ( http://golang.org/pkg/sort/#Interface ), :

type Interface interface { // Len is the number of elements in the collection. Len() int // Less returns whether the element with index i should sort // before the element with index j. Less(i, j int) bool // Swap swaps the elements with indexes i and j. Swap(i, j int) }
, :

// "": type Pool []*Worker // - : func (p Pool) Less(i, j int) bool { return p[i].pending < p[j].pending } // : func (p Pool) Len() int { return len(p) } // : func (p Pool) Swap(i, j int) { if i >= 0 && i < len(p) && j >= 0 && j < len(p) { p[i], p[j] = p[j], p[i] p[i].index, p[j].index = i, j } } // : func (p *Pool) Push(x interface{}) { n := len(*p) worker := x.(*Worker) worker.index = n *p = append(*p, worker) } // : func (p *Pool) Pop() interface{} { old := *p n := len(old) item := old[n-1] item.index = -1 *p = old[0 : n-1] return item }
, interface{} - , ( , interface{}). any , . : http://research.swtch.com/interfaces
, heap.* Pool, "" .
"container/heap" :

import ( "container/heap" )
. , , , :

// type Balancer struct { pool Pool // "" done chan *Worker // requests chan string // flowctrl chan bool // PMFC queue int // wg *sync.WaitGroup // }
, init() :

// . func (b *Balancer) init(in chan string) { b.requests = make(chan string) b.flowctrl = make(chan bool) b.done = make(chan *Worker) b.wg = new(sync.WaitGroup) // Flow Control: go func() { for { b.requests <- <-in // <-b.flowctrl // } }() // : heap.Init(&b.pool) for i := 0; i < WORKERS; i++ { w := &Worker{ urls: make(chan string, WORKERSCAP), index: 0, pending: 0, wg: b.wg, } go w.work(b.done) // heap.Push(&b.pool, w) // } }
- "" . , - - !
, : make() , . , - " " ; , .

init() WORKERS WORKERSCAP , :

var ( WORKERS = 5 // WORKERSCAP = 5 // )
. , , ( , . , . , , ) :

// func (b *Balancer) balance(quit chan bool) { lastjobs := false // , for { select { // : case <-quit: // b.wg.Wait() // .. quit <- true //.. case url := <-b.requests: // ( flow controller) if url != ENDMESSAGE { // - ? b.dispatch(url) // , } else { lastjobs = true // } case w := <-b.done: // , b.completed(w) // if lastjobs { if w.pending == 0 { // .. heap.Remove(&b.pool, w.index) // } if len(b.pool) == 0 { // // quit <- true // } } } } }
:

// func (b *Balancer) dispatch(url string) { w := heap.Pop(&b.pool).(*Worker) // .. w.urls <- url //.. . w.pending++ // "".. heap.Push(&b.pool, w) //.. if b.queue++; b.queue < WORKERS*WORKERSCAP { b.flowctrl <- true } }
.. . : , , , .
completed , :

// func (b *Balancer) completed(w *Worker) { w.pending-- heap.Remove(&b.pool, w.index) heap.Push(&b.pool, w) if b.queue--; b.queue == WORKERS*WORKERSCAP-1 { b.flowctrl <- true } }
, , , . : , dispatch , , , - ?
, . - download(), :

// func download(url string) { fileName := IMGDIR + "/" + url[strings.LastIndex(url, "/")+1:] output, err := os.Create(fileName) defer output.Close() response, err := http.Get(url) if err != nil { fmt.Println("Error while downloading", url, "-", err) return } defer response.Body.Close() io.Copy(output, response.Body) }
- - ( ) ( , ):

// : var ( WORKERS = 5 // WORKERSCAP = 5 // ATATASTREAM = 291 //id ATATAPOS = 0 // IMGDIR = "img" // ) // : func init() { flag.IntVar(&WORKERS, "w", WORKERS, " ") flag.IntVar(&ATATASTREAM, "s", ATATASTREAM, "id ") flag.IntVar(&ATATAPOS, "p", ATATAPOS, " ") flag.StringVar(&IMGDIR, "d", IMGDIR, " ") }
- . , , os/signal , ctrl-c :

import ( "io" "os" "os/signal" "sync" "flag" "fmt" "container/heap" "strconv" "strings" "github.com/opesun/goquery" "net/http" )
!


func main() { // flag.Parse() // , if err := os.MkdirAll(IMGDIR, 666); err != nil { panic(err) } // links := make(chan string) quit := make(chan bool) b := new(Balancer) b.init(links) // keys keys := make(chan os.Signal, 1) signal.Notify(keys, os.Interrupt) // go b.balance(quit) go generator(links, ATATASTREAM, ATATAPOS) fmt.Println(" ") // : for { select { case <-keys: // : fmt.Println("CTRL-C: ") quit <- true // case <-quit: // fmt.Println(" !") return } } }
.. ( )



.. . , - ! .

time.Sleep , . , "" .
UPD. - ( 3 744 , 3.07Gb ) .

, .

pastbin.com

PS , #291 () "" 10 , ( "-p=200" ) .

UPD2: PPS "" : - Go, , . , , - + . , : , .

,
vimeo.com/49718712 - Rob Pike - 'Concurrency Is Not Parallelism'
sites.google.com/site/gopatterns - Go Language Patterns
golangtutorials.blogspot.ru/2011/05/table-of-contents.html - GoLang Tutorials
talks.golang.org - Go

, !

), . . , , , .

, , goquery (http://github.com/opesun/goquery). , :

// atatagrab project main.go package main import ( "fmt" // : "github.com/opesun/goquery" "strconv" ) const ( // : ENDMESSAGE = "TooLateToDieYoung" ) // func generator(out chan string, stream, start int) { for pos := start; ; pos += 20 { // : x, err := goquery.ParseUrl("http://home.atata.com/streams/" + strconv.Itoa(stream) + "?order=date&from=" + strconv.Itoa(pos)) if err == nil { // : for _, url := range x.Find("figure a.image").Attrs("href") { out <- "http://atata.com/" + url } // - .. if len(x.Find("li.last.hide")) > 0 { out <- ENDMESSAGE //.. return } } } }



,
, , ( ). - , , . , , , :

" , - ?"

, . PMFC (Poor Man's Flow Control " ". , ( ) ).

" , , ?"

! - " " " ". ? , :
( ) - . - ( - ?).
. "":

// type Worker struct { urls chan string // pending int // - index int // wg *sync.WaitGroup // }
: Worker , struct - record .
" ?" - ? Go : . , - , , :

- - - . " " ( WaitGroup ) sync . : WaitGroup , , . , .
, Worker'a.

"" work ( , "", this. ):

// func (w *Worker) work(done chan *Worker) { for { url := <-w.urls // w.wg.Add(1) // download(url) // w.wg.Done() // done <- w // } }
sync :

import ( "sync" )
: done , work() . , ( ).
download() - , . .

Worker Pool , , ( Wikipedia: ( ) ), . " container/heap ", ( - ).

: - , :

package main import ("fmt") // type Animal interface { Say() string } type Cat struct{} func (c Cat) Say() string { return "!" } type Parrot struct { name string } func (p Parrot) Say() string { return p.name + " !" } func main() { kitty := Cat{} popka := Parrot{name: ""} animals := []Animal{kitty, popka} for _, animal := range animals { fmt.Println(animal.Say()) } }
, container.Heap ( http://golang.org/pkg/container/heap/#Interface ) :

type Interface interface { sort.Interface Push(x interface{}) // add x as element Len() Pop() interface{} // remove and return element Len() - 1. }
, Push Pop sort.Interface ( http://golang.org/pkg/sort/#Interface ), :

type Interface interface { // Len is the number of elements in the collection. Len() int // Less returns whether the element with index i should sort // before the element with index j. Less(i, j int) bool // Swap swaps the elements with indexes i and j. Swap(i, j int) }
, :

// "": type Pool []*Worker // - : func (p Pool) Less(i, j int) bool { return p[i].pending < p[j].pending } // : func (p Pool) Len() int { return len(p) } // : func (p Pool) Swap(i, j int) { if i >= 0 && i < len(p) && j >= 0 && j < len(p) { p[i], p[j] = p[j], p[i] p[i].index, p[j].index = i, j } } // : func (p *Pool) Push(x interface{}) { n := len(*p) worker := x.(*Worker) worker.index = n *p = append(*p, worker) } // : func (p *Pool) Pop() interface{} { old := *p n := len(old) item := old[n-1] item.index = -1 *p = old[0 : n-1] return item }
, interface{} - , ( , interface{}). any , . : http://research.swtch.com/interfaces
, heap.* Pool, "" .
"container/heap" :

import ( "container/heap" )
. , , , :

// type Balancer struct { pool Pool // "" done chan *Worker // requests chan string // flowctrl chan bool // PMFC queue int // wg *sync.WaitGroup // }
, init() :

// . func (b *Balancer) init(in chan string) { b.requests = make(chan string) b.flowctrl = make(chan bool) b.done = make(chan *Worker) b.wg = new(sync.WaitGroup) // Flow Control: go func() { for { b.requests <- <-in // <-b.flowctrl // } }() // : heap.Init(&b.pool) for i := 0; i < WORKERS; i++ { w := &Worker{ urls: make(chan string, WORKERSCAP), index: 0, pending: 0, wg: b.wg, } go w.work(b.done) // heap.Push(&b.pool, w) // } }
- "" . , - - !
, : make() , . , - " " ; , .

init() WORKERS WORKERSCAP , :

var ( WORKERS = 5 // WORKERSCAP = 5 // )
. , , ( , . , . , , ) :

// func (b *Balancer) balance(quit chan bool) { lastjobs := false // , for { select { // : case <-quit: // b.wg.Wait() // .. quit <- true //.. case url := <-b.requests: // ( flow controller) if url != ENDMESSAGE { // - ? b.dispatch(url) // , } else { lastjobs = true // } case w := <-b.done: // , b.completed(w) // if lastjobs { if w.pending == 0 { // .. heap.Remove(&b.pool, w.index) // } if len(b.pool) == 0 { // // quit <- true // } } } } }
:

// func (b *Balancer) dispatch(url string) { w := heap.Pop(&b.pool).(*Worker) // .. w.urls <- url //.. . w.pending++ // "".. heap.Push(&b.pool, w) //.. if b.queue++; b.queue < WORKERS*WORKERSCAP { b.flowctrl <- true } }
.. . : , , , .
completed , :

// func (b *Balancer) completed(w *Worker) { w.pending-- heap.Remove(&b.pool, w.index) heap.Push(&b.pool, w) if b.queue--; b.queue == WORKERS*WORKERSCAP-1 { b.flowctrl <- true } }
, , , . : , dispatch , , , - ?
, . - download(), :

// func download(url string) { fileName := IMGDIR + "/" + url[strings.LastIndex(url, "/")+1:] output, err := os.Create(fileName) defer output.Close() response, err := http.Get(url) if err != nil { fmt.Println("Error while downloading", url, "-", err) return } defer response.Body.Close() io.Copy(output, response.Body) }
- - ( ) ( , ):

// : var ( WORKERS = 5 // WORKERSCAP = 5 // ATATASTREAM = 291 //id ATATAPOS = 0 // IMGDIR = "img" // ) // : func init() { flag.IntVar(&WORKERS, "w", WORKERS, " ") flag.IntVar(&ATATASTREAM, "s", ATATASTREAM, "id ") flag.IntVar(&ATATAPOS, "p", ATATAPOS, " ") flag.StringVar(&IMGDIR, "d", IMGDIR, " ") }
- . , , os/signal , ctrl-c :

import ( "io" "os" "os/signal" "sync" "flag" "fmt" "container/heap" "strconv" "strings" "github.com/opesun/goquery" "net/http" )
!


func main() { // flag.Parse() // , if err := os.MkdirAll(IMGDIR, 666); err != nil { panic(err) } // links := make(chan string) quit := make(chan bool) b := new(Balancer) b.init(links) // keys keys := make(chan os.Signal, 1) signal.Notify(keys, os.Interrupt) // go b.balance(quit) go generator(links, ATATASTREAM, ATATAPOS) fmt.Println(" ") // : for { select { case <-keys: // : fmt.Println("CTRL-C: ") quit <- true // case <-quit: // fmt.Println(" !") return } } }
.. ( )



.. . , - ! .

time.Sleep , . , "" .
UPD. - ( 3 744 , 3.07Gb ) .

, .

pastbin.com

PS , #291 () "" 10 , ( "-p=200" ) .

UPD2: PPS "" : - Go, , . , , - + . , : , .

,
vimeo.com/49718712 - Rob Pike - 'Concurrency Is Not Parallelism'
sites.google.com/site/gopatterns - Go Language Patterns
golangtutorials.blogspot.ru/2011/05/table-of-contents.html - GoLang Tutorials
talks.golang.org - Go

, !
), . . , , , .

, , goquery (http://github.com/opesun/goquery). , :

// atatagrab project main.go package main import ( "fmt" // : "github.com/opesun/goquery" "strconv" ) const ( // : ENDMESSAGE = "TooLateToDieYoung" ) // func generator(out chan string, stream, start int) { for pos := start; ; pos += 20 { // : x, err := goquery.ParseUrl("http://home.atata.com/streams/" + strconv.Itoa(stream) + "?order=date&from=" + strconv.Itoa(pos)) if err == nil { // : for _, url := range x.Find("figure a.image").Attrs("href") { out <- "http://atata.com/" + url } // - .. if len(x.Find("li.last.hide")) > 0 { out <- ENDMESSAGE //.. return } } } }



,
, , ( ). - , , . , , , :

" , - ?"

, . PMFC (Poor Man's Flow Control " ". , ( ) ).

" , , ?"

! - " " " ". ? , :
( ) - . - ( - ?).
. "":

// type Worker struct { urls chan string // pending int // - index int // wg *sync.WaitGroup // }
: Worker , struct - record .
" ?" - ? Go : . , - , , :

- - - . " " ( WaitGroup ) sync . : WaitGroup , , . , .
, Worker'a.

"" work ( , "", this. ):

// func (w *Worker) work(done chan *Worker) { for { url := <-w.urls // w.wg.Add(1) // download(url) // w.wg.Done() // done <- w // } }
sync :

import ( "sync" )
: done , work() . , ( ).
download() - , . .

Worker Pool , , ( Wikipedia: ( ) ), . " container/heap ", ( - ).

: - , :

package main import ("fmt") // type Animal interface { Say() string } type Cat struct{} func (c Cat) Say() string { return "!" } type Parrot struct { name string } func (p Parrot) Say() string { return p.name + " !" } func main() { kitty := Cat{} popka := Parrot{name: ""} animals := []Animal{kitty, popka} for _, animal := range animals { fmt.Println(animal.Say()) } }
, container.Heap ( http://golang.org/pkg/container/heap/#Interface ) :

type Interface interface { sort.Interface Push(x interface{}) // add x as element Len() Pop() interface{} // remove and return element Len() - 1. }
, Push Pop sort.Interface ( http://golang.org/pkg/sort/#Interface ), :

type Interface interface { // Len is the number of elements in the collection. Len() int // Less returns whether the element with index i should sort // before the element with index j. Less(i, j int) bool // Swap swaps the elements with indexes i and j. Swap(i, j int) }
, :

// "": type Pool []*Worker // - : func (p Pool) Less(i, j int) bool { return p[i].pending < p[j].pending } // : func (p Pool) Len() int { return len(p) } // : func (p Pool) Swap(i, j int) { if i >= 0 && i < len(p) && j >= 0 && j < len(p) { p[i], p[j] = p[j], p[i] p[i].index, p[j].index = i, j } } // : func (p *Pool) Push(x interface{}) { n := len(*p) worker := x.(*Worker) worker.index = n *p = append(*p, worker) } // : func (p *Pool) Pop() interface{} { old := *p n := len(old) item := old[n-1] item.index = -1 *p = old[0 : n-1] return item }
, interface{} - , ( , interface{}). any , . : http://research.swtch.com/interfaces
, heap.* Pool, "" .
"container/heap" :

import ( "container/heap" )
. , , , :

// type Balancer struct { pool Pool // "" done chan *Worker // requests chan string // flowctrl chan bool // PMFC queue int // wg *sync.WaitGroup // }
, init() :

// . func (b *Balancer) init(in chan string) { b.requests = make(chan string) b.flowctrl = make(chan bool) b.done = make(chan *Worker) b.wg = new(sync.WaitGroup) // Flow Control: go func() { for { b.requests <- <-in // <-b.flowctrl // } }() // : heap.Init(&b.pool) for i := 0; i < WORKERS; i++ { w := &Worker{ urls: make(chan string, WORKERSCAP), index: 0, pending: 0, wg: b.wg, } go w.work(b.done) // heap.Push(&b.pool, w) // } }
- "" . , - - !
, : make() , . , - " " ; , .

init() WORKERS WORKERSCAP , :

var ( WORKERS = 5 // WORKERSCAP = 5 // )
. , , ( , . , . , , ) :

// func (b *Balancer) balance(quit chan bool) { lastjobs := false // , for { select { // : case <-quit: // b.wg.Wait() // .. quit <- true //.. case url := <-b.requests: // ( flow controller) if url != ENDMESSAGE { // - ? b.dispatch(url) // , } else { lastjobs = true // } case w := <-b.done: // , b.completed(w) // if lastjobs { if w.pending == 0 { // .. heap.Remove(&b.pool, w.index) // } if len(b.pool) == 0 { // // quit <- true // } } } } }
:

// func (b *Balancer) dispatch(url string) { w := heap.Pop(&b.pool).(*Worker) // .. w.urls <- url //.. . w.pending++ // "".. heap.Push(&b.pool, w) //.. if b.queue++; b.queue < WORKERS*WORKERSCAP { b.flowctrl <- true } }
.. . : , , , .
completed , :

// func (b *Balancer) completed(w *Worker) { w.pending-- heap.Remove(&b.pool, w.index) heap.Push(&b.pool, w) if b.queue--; b.queue == WORKERS*WORKERSCAP-1 { b.flowctrl <- true } }
, , , . : , dispatch , , , - ?
, . - download(), :

// func download(url string) { fileName := IMGDIR + "/" + url[strings.LastIndex(url, "/")+1:] output, err := os.Create(fileName) defer output.Close() response, err := http.Get(url) if err != nil { fmt.Println("Error while downloading", url, "-", err) return } defer response.Body.Close() io.Copy(output, response.Body) }
- - ( ) ( , ):

// : var ( WORKERS = 5 // WORKERSCAP = 5 // ATATASTREAM = 291 //id ATATAPOS = 0 // IMGDIR = "img" // ) // : func init() { flag.IntVar(&WORKERS, "w", WORKERS, " ") flag.IntVar(&ATATASTREAM, "s", ATATASTREAM, "id ") flag.IntVar(&ATATAPOS, "p", ATATAPOS, " ") flag.StringVar(&IMGDIR, "d", IMGDIR, " ") }
- . , , os/signal , ctrl-c :

import ( "io" "os" "os/signal" "sync" "flag" "fmt" "container/heap" "strconv" "strings" "github.com/opesun/goquery" "net/http" )
!


func main() { // flag.Parse() // , if err := os.MkdirAll(IMGDIR, 666); err != nil { panic(err) } // links := make(chan string) quit := make(chan bool) b := new(Balancer) b.init(links) // keys keys := make(chan os.Signal, 1) signal.Notify(keys, os.Interrupt) // go b.balance(quit) go generator(links, ATATASTREAM, ATATAPOS) fmt.Println(" ") // : for { select { case <-keys: // : fmt.Println("CTRL-C: ") quit <- true // case <-quit: // fmt.Println(" !") return } } }
.. ( )



.. . , - ! .

time.Sleep , . , "" .
UPD. - ( 3 744 , 3.07Gb ) .

, .

pastbin.com

PS , #291 () "" 10 , ( "-p=200" ) .

UPD2: PPS "" : - Go, , . , , - + . , : , .

,
vimeo.com/49718712 - Rob Pike - 'Concurrency Is Not Parallelism'
sites.google.com/site/gopatterns - Go Language Patterns
golangtutorials.blogspot.ru/2011/05/table-of-contents.html - GoLang Tutorials
talks.golang.org - Go

, !
 ),      .                 .  ,  ,         ,        . 

, , goquery (http://github.com/opesun/goquery). , :

// atatagrab project main.go package main import ( "fmt" // : "github.com/opesun/goquery" "strconv" ) const ( // : ENDMESSAGE = "TooLateToDieYoung" ) // func generator(out chan string, stream, start int) { for pos := start; ; pos += 20 { // : x, err := goquery.ParseUrl("http://home.atata.com/streams/" + strconv.Itoa(stream) + "?order=date&from=" + strconv.Itoa(pos)) if err == nil { // : for _, url := range x.Find("figure a.image").Attrs("href") { out <- "http://atata.com/" + url } // - .. if len(x.Find("li.last.hide")) > 0 { out <- ENDMESSAGE //.. return } } } }



,
, , ( ). - , , . , , , :

" , - ?"

, . PMFC (Poor Man's Flow Control " ". , ( ) ).

" , , ?"

! - " " " ". ? , :
( ) - . - ( - ?).
. "":

// type Worker struct { urls chan string // pending int // - index int // wg *sync.WaitGroup // }
: Worker , struct - record .
" ?" - ? Go : . , - , , :

- - - . " " ( WaitGroup ) sync . : WaitGroup , , . , .
, Worker'a.

"" work ( , "", this. ):

// func (w *Worker) work(done chan *Worker) { for { url := <-w.urls // w.wg.Add(1) // download(url) // w.wg.Done() // done <- w // } }
sync :

import ( "sync" )
: done , work() . , ( ).
download() - , . .

Worker Pool , , ( Wikipedia: ( ) ), . " container/heap ", ( - ).

: - , :

package main import ("fmt") // type Animal interface { Say() string } type Cat struct{} func (c Cat) Say() string { return "!" } type Parrot struct { name string } func (p Parrot) Say() string { return p.name + " !" } func main() { kitty := Cat{} popka := Parrot{name: ""} animals := []Animal{kitty, popka} for _, animal := range animals { fmt.Println(animal.Say()) } }
, container.Heap ( http://golang.org/pkg/container/heap/#Interface ) :

type Interface interface { sort.Interface Push(x interface{}) // add x as element Len() Pop() interface{} // remove and return element Len() - 1. }
, Push Pop sort.Interface ( http://golang.org/pkg/sort/#Interface ), :

type Interface interface { // Len is the number of elements in the collection. Len() int // Less returns whether the element with index i should sort // before the element with index j. Less(i, j int) bool // Swap swaps the elements with indexes i and j. Swap(i, j int) }
, :

// "": type Pool []*Worker // - : func (p Pool) Less(i, j int) bool { return p[i].pending < p[j].pending } // : func (p Pool) Len() int { return len(p) } // : func (p Pool) Swap(i, j int) { if i >= 0 && i < len(p) && j >= 0 && j < len(p) { p[i], p[j] = p[j], p[i] p[i].index, p[j].index = i, j } } // : func (p *Pool) Push(x interface{}) { n := len(*p) worker := x.(*Worker) worker.index = n *p = append(*p, worker) } // : func (p *Pool) Pop() interface{} { old := *p n := len(old) item := old[n-1] item.index = -1 *p = old[0 : n-1] return item }
, interface{} - , ( , interface{}). any , . : http://research.swtch.com/interfaces
, heap.* Pool, "" .
"container/heap" :

import ( "container/heap" )
. , , , :

// type Balancer struct { pool Pool // "" done chan *Worker // requests chan string // flowctrl chan bool // PMFC queue int // wg *sync.WaitGroup // }
, init() :

// . func (b *Balancer) init(in chan string) { b.requests = make(chan string) b.flowctrl = make(chan bool) b.done = make(chan *Worker) b.wg = new(sync.WaitGroup) // Flow Control: go func() { for { b.requests <- <-in // <-b.flowctrl // } }() // : heap.Init(&b.pool) for i := 0; i < WORKERS; i++ { w := &Worker{ urls: make(chan string, WORKERSCAP), index: 0, pending: 0, wg: b.wg, } go w.work(b.done) // heap.Push(&b.pool, w) // } }
- "" . , - - !
, : make() , . , - " " ; , .

init() WORKERS WORKERSCAP , :

var ( WORKERS = 5 // WORKERSCAP = 5 // )
. , , ( , . , . , , ) :

// func (b *Balancer) balance(quit chan bool) { lastjobs := false // , for { select { // : case <-quit: // b.wg.Wait() // .. quit <- true //.. case url := <-b.requests: // ( flow controller) if url != ENDMESSAGE { // - ? b.dispatch(url) // , } else { lastjobs = true // } case w := <-b.done: // , b.completed(w) // if lastjobs { if w.pending == 0 { // .. heap.Remove(&b.pool, w.index) // } if len(b.pool) == 0 { // // quit <- true // } } } } }
:

// func (b *Balancer) dispatch(url string) { w := heap.Pop(&b.pool).(*Worker) // .. w.urls <- url //.. . w.pending++ // "".. heap.Push(&b.pool, w) //.. if b.queue++; b.queue < WORKERS*WORKERSCAP { b.flowctrl <- true } }
.. . : , , , .
completed , :

// func (b *Balancer) completed(w *Worker) { w.pending-- heap.Remove(&b.pool, w.index) heap.Push(&b.pool, w) if b.queue--; b.queue == WORKERS*WORKERSCAP-1 { b.flowctrl <- true } }
, , , . : , dispatch , , , - ?
, . - download(), :

// func download(url string) { fileName := IMGDIR + "/" + url[strings.LastIndex(url, "/")+1:] output, err := os.Create(fileName) defer output.Close() response, err := http.Get(url) if err != nil { fmt.Println("Error while downloading", url, "-", err) return } defer response.Body.Close() io.Copy(output, response.Body) }
- - ( ) ( , ):

// : var ( WORKERS = 5 // WORKERSCAP = 5 // ATATASTREAM = 291 //id ATATAPOS = 0 // IMGDIR = "img" // ) // : func init() { flag.IntVar(&WORKERS, "w", WORKERS, " ") flag.IntVar(&ATATASTREAM, "s", ATATASTREAM, "id ") flag.IntVar(&ATATAPOS, "p", ATATAPOS, " ") flag.StringVar(&IMGDIR, "d", IMGDIR, " ") }
- . , , os/signal , ctrl-c :

import ( "io" "os" "os/signal" "sync" "flag" "fmt" "container/heap" "strconv" "strings" "github.com/opesun/goquery" "net/http" )
!


func main() { // flag.Parse() // , if err := os.MkdirAll(IMGDIR, 666); err != nil { panic(err) } // links := make(chan string) quit := make(chan bool) b := new(Balancer) b.init(links) // keys keys := make(chan os.Signal, 1) signal.Notify(keys, os.Interrupt) // go b.balance(quit) go generator(links, ATATASTREAM, ATATAPOS) fmt.Println(" ") // : for { select { case <-keys: // : fmt.Println("CTRL-C: ") quit <- true // case <-quit: // fmt.Println(" !") return } } }
.. ( )



.. . , - ! .

time.Sleep , . , "" .
UPD. - ( 3 744 , 3.07Gb ) .

, .

pastbin.com

PS , #291 () "" 10 , ( "-p=200" ) .

UPD2: PPS "" : - Go, , . , , - + . , : , .

,
vimeo.com/49718712 - Rob Pike - 'Concurrency Is Not Parallelism'
sites.google.com/site/gopatterns - Go Language Patterns
golangtutorials.blogspot.ru/2011/05/table-of-contents.html - GoLang Tutorials
talks.golang.org - Go

, !
), . . , , , .

, , goquery (http://github.com/opesun/goquery). , :

// atatagrab project main.go package main import ( "fmt" // : "github.com/opesun/goquery" "strconv" ) const ( // : ENDMESSAGE = "TooLateToDieYoung" ) // func generator(out chan string, stream, start int) { for pos := start; ; pos += 20 { // : x, err := goquery.ParseUrl("http://home.atata.com/streams/" + strconv.Itoa(stream) + "?order=date&from=" + strconv.Itoa(pos)) if err == nil { // : for _, url := range x.Find("figure a.image").Attrs("href") { out <- "http://atata.com/" + url } // - .. if len(x.Find("li.last.hide")) > 0 { out <- ENDMESSAGE //.. return } } } }



,
, , ( ). - , , . , , , :

" , - ?"

, . PMFC (Poor Man's Flow Control " ". , ( ) ).

" , , ?"

! - " " " ". ? , :
( ) - . - ( - ?).
. "":

// type Worker struct { urls chan string // pending int // - index int // wg *sync.WaitGroup // }
: Worker , struct - record .
" ?" - ? Go : . , - , , :

- - - . " " ( WaitGroup ) sync . : WaitGroup , , . , .
, Worker'a.

"" work ( , "", this. ):

// func (w *Worker) work(done chan *Worker) { for { url := <-w.urls // w.wg.Add(1) // download(url) // w.wg.Done() // done <- w // } }
sync :

import ( "sync" )
: done , work() . , ( ).
download() - , . .

Worker Pool , , ( Wikipedia: ( ) ), . " container/heap ", ( - ).

: - , :

package main import ("fmt") // type Animal interface { Say() string } type Cat struct{} func (c Cat) Say() string { return "!" } type Parrot struct { name string } func (p Parrot) Say() string { return p.name + " !" } func main() { kitty := Cat{} popka := Parrot{name: ""} animals := []Animal{kitty, popka} for _, animal := range animals { fmt.Println(animal.Say()) } }
, container.Heap ( http://golang.org/pkg/container/heap/#Interface ) :

type Interface interface { sort.Interface Push(x interface{}) // add x as element Len() Pop() interface{} // remove and return element Len() - 1. }
, Push Pop sort.Interface ( http://golang.org/pkg/sort/#Interface ), :

type Interface interface { // Len is the number of elements in the collection. Len() int // Less returns whether the element with index i should sort // before the element with index j. Less(i, j int) bool // Swap swaps the elements with indexes i and j. Swap(i, j int) }
, :

// "": type Pool []*Worker // - : func (p Pool) Less(i, j int) bool { return p[i].pending < p[j].pending } // : func (p Pool) Len() int { return len(p) } // : func (p Pool) Swap(i, j int) { if i >= 0 && i < len(p) && j >= 0 && j < len(p) { p[i], p[j] = p[j], p[i] p[i].index, p[j].index = i, j } } // : func (p *Pool) Push(x interface{}) { n := len(*p) worker := x.(*Worker) worker.index = n *p = append(*p, worker) } // : func (p *Pool) Pop() interface{} { old := *p n := len(old) item := old[n-1] item.index = -1 *p = old[0 : n-1] return item }
, interface{} - , ( , interface{}). any , . : http://research.swtch.com/interfaces
, heap.* Pool, "" .
"container/heap" :

import ( "container/heap" )
. , , , :

// type Balancer struct { pool Pool // "" done chan *Worker // requests chan string // flowctrl chan bool // PMFC queue int // wg *sync.WaitGroup // }
, init() :

// . func (b *Balancer) init(in chan string) { b.requests = make(chan string) b.flowctrl = make(chan bool) b.done = make(chan *Worker) b.wg = new(sync.WaitGroup) // Flow Control: go func() { for { b.requests <- <-in // <-b.flowctrl // } }() // : heap.Init(&b.pool) for i := 0; i < WORKERS; i++ { w := &Worker{ urls: make(chan string, WORKERSCAP), index: 0, pending: 0, wg: b.wg, } go w.work(b.done) // heap.Push(&b.pool, w) // } }
- "" . , - - !
, : make() , . , - " " ; , .

init() WORKERS WORKERSCAP , :

var ( WORKERS = 5 // WORKERSCAP = 5 // )
. , , ( , . , . , , ) :

// func (b *Balancer) balance(quit chan bool) { lastjobs := false // , for { select { // : case <-quit: // b.wg.Wait() // .. quit <- true //.. case url := <-b.requests: // ( flow controller) if url != ENDMESSAGE { // - ? b.dispatch(url) // , } else { lastjobs = true // } case w := <-b.done: // , b.completed(w) // if lastjobs { if w.pending == 0 { // .. heap.Remove(&b.pool, w.index) // } if len(b.pool) == 0 { // // quit <- true // } } } } }
:

// func (b *Balancer) dispatch(url string) { w := heap.Pop(&b.pool).(*Worker) // .. w.urls <- url //.. . w.pending++ // "".. heap.Push(&b.pool, w) //.. if b.queue++; b.queue < WORKERS*WORKERSCAP { b.flowctrl <- true } }
.. . : , , , .
completed , :

// func (b *Balancer) completed(w *Worker) { w.pending-- heap.Remove(&b.pool, w.index) heap.Push(&b.pool, w) if b.queue--; b.queue == WORKERS*WORKERSCAP-1 { b.flowctrl <- true } }
, , , . : , dispatch , , , - ?
, . - download(), :

// func download(url string) { fileName := IMGDIR + "/" + url[strings.LastIndex(url, "/")+1:] output, err := os.Create(fileName) defer output.Close() response, err := http.Get(url) if err != nil { fmt.Println("Error while downloading", url, "-", err) return } defer response.Body.Close() io.Copy(output, response.Body) }
- - ( ) ( , ):

// : var ( WORKERS = 5 // WORKERSCAP = 5 // ATATASTREAM = 291 //id ATATAPOS = 0 // IMGDIR = "img" // ) // : func init() { flag.IntVar(&WORKERS, "w", WORKERS, " ") flag.IntVar(&ATATASTREAM, "s", ATATASTREAM, "id ") flag.IntVar(&ATATAPOS, "p", ATATAPOS, " ") flag.StringVar(&IMGDIR, "d", IMGDIR, " ") }
- . , , os/signal , ctrl-c :

import ( "io" "os" "os/signal" "sync" "flag" "fmt" "container/heap" "strconv" "strings" "github.com/opesun/goquery" "net/http" )
!


func main() { // flag.Parse() // , if err := os.MkdirAll(IMGDIR, 666); err != nil { panic(err) } // links := make(chan string) quit := make(chan bool) b := new(Balancer) b.init(links) // keys keys := make(chan os.Signal, 1) signal.Notify(keys, os.Interrupt) // go b.balance(quit) go generator(links, ATATASTREAM, ATATAPOS) fmt.Println(" ") // : for { select { case <-keys: // : fmt.Println("CTRL-C: ") quit <- true // case <-quit: // fmt.Println(" !") return } } }
.. ( )



.. . , - ! .

time.Sleep , . , "" .
UPD. - ( 3 744 , 3.07Gb ) .

, .

pastbin.com

PS , #291 () "" 10 , ( "-p=200" ) .

UPD2: PPS "" : - Go, , . , , - + . , : , .

,
vimeo.com/49718712 - Rob Pike - 'Concurrency Is Not Parallelism'
sites.google.com/site/gopatterns - Go Language Patterns
golangtutorials.blogspot.ru/2011/05/table-of-contents.html - GoLang Tutorials
talks.golang.org - Go

, !
 ),      .                 .  ,  ,         ,        . 

, , goquery (http://github.com/opesun/goquery). , :

// atatagrab project main.go package main import ( "fmt" // : "github.com/opesun/goquery" "strconv" ) const ( // : ENDMESSAGE = "TooLateToDieYoung" ) // func generator(out chan string, stream, start int) { for pos := start; ; pos += 20 { // : x, err := goquery.ParseUrl("http://home.atata.com/streams/" + strconv.Itoa(stream) + "?order=date&from=" + strconv.Itoa(pos)) if err == nil { // : for _, url := range x.Find("figure a.image").Attrs("href") { out <- "http://atata.com/" + url } // - .. if len(x.Find("li.last.hide")) > 0 { out <- ENDMESSAGE //.. return } } } }



,
, , ( ). - , , . , , , :

" , - ?"

, . PMFC (Poor Man's Flow Control " ". , ( ) ).

" , , ?"

! - " " " ". ? , :
( ) - . - ( - ?).
. "":

// type Worker struct { urls chan string // pending int // - index int // wg *sync.WaitGroup // }
: Worker , struct - record .
" ?" - ? Go : . , - , , :

- - - . " " ( WaitGroup ) sync . : WaitGroup , , . , .
, Worker'a.

"" work ( , "", this. ):

// func (w *Worker) work(done chan *Worker) { for { url := <-w.urls // w.wg.Add(1) // download(url) // w.wg.Done() // done <- w // } }
sync :

import ( "sync" )
: done , work() . , ( ).
download() - , . .

Worker Pool , , ( Wikipedia: ( ) ), . " container/heap ", ( - ).

: - , :

package main import ("fmt") // type Animal interface { Say() string } type Cat struct{} func (c Cat) Say() string { return "!" } type Parrot struct { name string } func (p Parrot) Say() string { return p.name + " !" } func main() { kitty := Cat{} popka := Parrot{name: ""} animals := []Animal{kitty, popka} for _, animal := range animals { fmt.Println(animal.Say()) } }
, container.Heap ( http://golang.org/pkg/container/heap/#Interface ) :

type Interface interface { sort.Interface Push(x interface{}) // add x as element Len() Pop() interface{} // remove and return element Len() - 1. }
, Push Pop sort.Interface ( http://golang.org/pkg/sort/#Interface ), :

type Interface interface { // Len is the number of elements in the collection. Len() int // Less returns whether the element with index i should sort // before the element with index j. Less(i, j int) bool // Swap swaps the elements with indexes i and j. Swap(i, j int) }
, :

// "": type Pool []*Worker // - : func (p Pool) Less(i, j int) bool { return p[i].pending < p[j].pending } // : func (p Pool) Len() int { return len(p) } // : func (p Pool) Swap(i, j int) { if i >= 0 && i < len(p) && j >= 0 && j < len(p) { p[i], p[j] = p[j], p[i] p[i].index, p[j].index = i, j } } // : func (p *Pool) Push(x interface{}) { n := len(*p) worker := x.(*Worker) worker.index = n *p = append(*p, worker) } // : func (p *Pool) Pop() interface{} { old := *p n := len(old) item := old[n-1] item.index = -1 *p = old[0 : n-1] return item }
, interface{} - , ( , interface{}). any , . : http://research.swtch.com/interfaces
, heap.* Pool, "" .
"container/heap" :

import ( "container/heap" )
. , , , :

// type Balancer struct { pool Pool // "" done chan *Worker // requests chan string // flowctrl chan bool // PMFC queue int // wg *sync.WaitGroup // }
, init() :

// . func (b *Balancer) init(in chan string) { b.requests = make(chan string) b.flowctrl = make(chan bool) b.done = make(chan *Worker) b.wg = new(sync.WaitGroup) // Flow Control: go func() { for { b.requests <- <-in // <-b.flowctrl // } }() // : heap.Init(&b.pool) for i := 0; i < WORKERS; i++ { w := &Worker{ urls: make(chan string, WORKERSCAP), index: 0, pending: 0, wg: b.wg, } go w.work(b.done) // heap.Push(&b.pool, w) // } }
- "" . , - - !
, : make() , . , - " " ; , .

init() WORKERS WORKERSCAP , :

var ( WORKERS = 5 // WORKERSCAP = 5 // )
. , , ( , . , . , , ) :

// func (b *Balancer) balance(quit chan bool) { lastjobs := false // , for { select { // : case <-quit: // b.wg.Wait() // .. quit <- true //.. case url := <-b.requests: // ( flow controller) if url != ENDMESSAGE { // - ? b.dispatch(url) // , } else { lastjobs = true // } case w := <-b.done: // , b.completed(w) // if lastjobs { if w.pending == 0 { // .. heap.Remove(&b.pool, w.index) // } if len(b.pool) == 0 { // // quit <- true // } } } } }
:

// func (b *Balancer) dispatch(url string) { w := heap.Pop(&b.pool).(*Worker) // .. w.urls <- url //.. . w.pending++ // "".. heap.Push(&b.pool, w) //.. if b.queue++; b.queue < WORKERS*WORKERSCAP { b.flowctrl <- true } }
.. . : , , , .
completed , :

// func (b *Balancer) completed(w *Worker) { w.pending-- heap.Remove(&b.pool, w.index) heap.Push(&b.pool, w) if b.queue--; b.queue == WORKERS*WORKERSCAP-1 { b.flowctrl <- true } }
, , , . : , dispatch , , , - ?
, . - download(), :

// func download(url string) { fileName := IMGDIR + "/" + url[strings.LastIndex(url, "/")+1:] output, err := os.Create(fileName) defer output.Close() response, err := http.Get(url) if err != nil { fmt.Println("Error while downloading", url, "-", err) return } defer response.Body.Close() io.Copy(output, response.Body) }
- - ( ) ( , ):

// : var ( WORKERS = 5 // WORKERSCAP = 5 // ATATASTREAM = 291 //id ATATAPOS = 0 // IMGDIR = "img" // ) // : func init() { flag.IntVar(&WORKERS, "w", WORKERS, " ") flag.IntVar(&ATATASTREAM, "s", ATATASTREAM, "id ") flag.IntVar(&ATATAPOS, "p", ATATAPOS, " ") flag.StringVar(&IMGDIR, "d", IMGDIR, " ") }
- . , , os/signal , ctrl-c :

import ( "io" "os" "os/signal" "sync" "flag" "fmt" "container/heap" "strconv" "strings" "github.com/opesun/goquery" "net/http" )
!


func main() { // flag.Parse() // , if err := os.MkdirAll(IMGDIR, 666); err != nil { panic(err) } // links := make(chan string) quit := make(chan bool) b := new(Balancer) b.init(links) // keys keys := make(chan os.Signal, 1) signal.Notify(keys, os.Interrupt) // go b.balance(quit) go generator(links, ATATASTREAM, ATATAPOS) fmt.Println(" ") // : for { select { case <-keys: // : fmt.Println("CTRL-C: ") quit <- true // case <-quit: // fmt.Println(" !") return } } }
.. ( )



.. . , - ! .

time.Sleep , . , "" .
UPD. - ( 3 744 , 3.07Gb ) .

, .

pastbin.com

PS , #291 () "" 10 , ( "-p=200" ) .

UPD2: PPS "" : - Go, , . , , - + . , : , .

,
vimeo.com/49718712 - Rob Pike - 'Concurrency Is Not Parallelism'
sites.google.com/site/gopatterns - Go Language Patterns
golangtutorials.blogspot.ru/2011/05/table-of-contents.html - GoLang Tutorials
talks.golang.org - Go

, !
), . . , , , .

, , goquery (http://github.com/opesun/goquery). , :

// atatagrab project main.go package main import ( "fmt" // : "github.com/opesun/goquery" "strconv" ) const ( // : ENDMESSAGE = "TooLateToDieYoung" ) // func generator(out chan string, stream, start int) { for pos := start; ; pos += 20 { // : x, err := goquery.ParseUrl("http://home.atata.com/streams/" + strconv.Itoa(stream) + "?order=date&from=" + strconv.Itoa(pos)) if err == nil { // : for _, url := range x.Find("figure a.image").Attrs("href") { out <- "http://atata.com/" + url } // - .. if len(x.Find("li.last.hide")) > 0 { out <- ENDMESSAGE //.. return } } } }



,
, , ( ). - , , . , , , :

" , - ?"

, . PMFC (Poor Man's Flow Control " ". , ( ) ).

" , , ?"

! - " " " ". ? , :
( ) - . - ( - ?).
. "":

// type Worker struct { urls chan string // pending int // - index int // wg *sync.WaitGroup // }
: Worker , struct - record .
" ?" - ? Go : . , - , , :

- - - . " " ( WaitGroup ) sync . : WaitGroup , , . , .
, Worker'a.

"" work ( , "", this. ):

// func (w *Worker) work(done chan *Worker) { for { url := <-w.urls // w.wg.Add(1) // download(url) // w.wg.Done() // done <- w // } }
sync :

import ( "sync" )
: done , work() . , ( ).
download() - , . .

Worker Pool , , ( Wikipedia: ( ) ), . " container/heap ", ( - ).

: - , :

package main import ("fmt") // type Animal interface { Say() string } type Cat struct{} func (c Cat) Say() string { return "!" } type Parrot struct { name string } func (p Parrot) Say() string { return p.name + " !" } func main() { kitty := Cat{} popka := Parrot{name: ""} animals := []Animal{kitty, popka} for _, animal := range animals { fmt.Println(animal.Say()) } }
, container.Heap ( http://golang.org/pkg/container/heap/#Interface ) :

type Interface interface { sort.Interface Push(x interface{}) // add x as element Len() Pop() interface{} // remove and return element Len() - 1. }
, Push Pop sort.Interface ( http://golang.org/pkg/sort/#Interface ), :

type Interface interface { // Len is the number of elements in the collection. Len() int // Less returns whether the element with index i should sort // before the element with index j. Less(i, j int) bool // Swap swaps the elements with indexes i and j. Swap(i, j int) }
, :

// "": type Pool []*Worker // - : func (p Pool) Less(i, j int) bool { return p[i].pending < p[j].pending } // : func (p Pool) Len() int { return len(p) } // : func (p Pool) Swap(i, j int) { if i >= 0 && i < len(p) && j >= 0 && j < len(p) { p[i], p[j] = p[j], p[i] p[i].index, p[j].index = i, j } } // : func (p *Pool) Push(x interface{}) { n := len(*p) worker := x.(*Worker) worker.index = n *p = append(*p, worker) } // : func (p *Pool) Pop() interface{} { old := *p n := len(old) item := old[n-1] item.index = -1 *p = old[0 : n-1] return item }
, interface{} - , ( , interface{}). any , . : http://research.swtch.com/interfaces
, heap.* Pool, "" .
"container/heap" :

import ( "container/heap" )
. , , , :

// type Balancer struct { pool Pool // "" done chan *Worker // requests chan string // flowctrl chan bool // PMFC queue int // wg *sync.WaitGroup // }
, init() :

// . func (b *Balancer) init(in chan string) { b.requests = make(chan string) b.flowctrl = make(chan bool) b.done = make(chan *Worker) b.wg = new(sync.WaitGroup) // Flow Control: go func() { for { b.requests <- <-in // <-b.flowctrl // } }() // : heap.Init(&b.pool) for i := 0; i < WORKERS; i++ { w := &Worker{ urls: make(chan string, WORKERSCAP), index: 0, pending: 0, wg: b.wg, } go w.work(b.done) // heap.Push(&b.pool, w) // } }
- "" . , - - !
, : make() , . , - " " ; , .

init() WORKERS WORKERSCAP , :

var ( WORKERS = 5 // WORKERSCAP = 5 // )
. , , ( , . , . , , ) :

// func (b *Balancer) balance(quit chan bool) { lastjobs := false // , for { select { // : case <-quit: // b.wg.Wait() // .. quit <- true //.. case url := <-b.requests: // ( flow controller) if url != ENDMESSAGE { // - ? b.dispatch(url) // , } else { lastjobs = true // } case w := <-b.done: // , b.completed(w) // if lastjobs { if w.pending == 0 { // .. heap.Remove(&b.pool, w.index) // } if len(b.pool) == 0 { // // quit <- true // } } } } }
:

// func (b *Balancer) dispatch(url string) { w := heap.Pop(&b.pool).(*Worker) // .. w.urls <- url //.. . w.pending++ // "".. heap.Push(&b.pool, w) //.. if b.queue++; b.queue < WORKERS*WORKERSCAP { b.flowctrl <- true } }
.. . : , , , .
completed , :

// func (b *Balancer) completed(w *Worker) { w.pending-- heap.Remove(&b.pool, w.index) heap.Push(&b.pool, w) if b.queue--; b.queue == WORKERS*WORKERSCAP-1 { b.flowctrl <- true } }
, , , . : , dispatch , , , - ?
, . - download(), :

// func download(url string) { fileName := IMGDIR + "/" + url[strings.LastIndex(url, "/")+1:] output, err := os.Create(fileName) defer output.Close() response, err := http.Get(url) if err != nil { fmt.Println("Error while downloading", url, "-", err) return } defer response.Body.Close() io.Copy(output, response.Body) }
- - ( ) ( , ):

// : var ( WORKERS = 5 // WORKERSCAP = 5 // ATATASTREAM = 291 //id ATATAPOS = 0 // IMGDIR = "img" // ) // : func init() { flag.IntVar(&WORKERS, "w", WORKERS, " ") flag.IntVar(&ATATASTREAM, "s", ATATASTREAM, "id ") flag.IntVar(&ATATAPOS, "p", ATATAPOS, " ") flag.StringVar(&IMGDIR, "d", IMGDIR, " ") }
- . , , os/signal , ctrl-c :

import ( "io" "os" "os/signal" "sync" "flag" "fmt" "container/heap" "strconv" "strings" "github.com/opesun/goquery" "net/http" )
!


func main() { // flag.Parse() // , if err := os.MkdirAll(IMGDIR, 666); err != nil { panic(err) } // links := make(chan string) quit := make(chan bool) b := new(Balancer) b.init(links) // keys keys := make(chan os.Signal, 1) signal.Notify(keys, os.Interrupt) // go b.balance(quit) go generator(links, ATATASTREAM, ATATAPOS) fmt.Println(" ") // : for { select { case <-keys: // : fmt.Println("CTRL-C: ") quit <- true // case <-quit: // fmt.Println(" !") return } } }
.. ( )



.. . , - ! .

time.Sleep , . , "" .
UPD. - ( 3 744 , 3.07Gb ) .

, .

pastbin.com

PS , #291 () "" 10 , ( "-p=200" ) .

UPD2: PPS "" : - Go, , . , , - + . , : , .

,
vimeo.com/49718712 - Rob Pike - 'Concurrency Is Not Parallelism'
sites.google.com/site/gopatterns - Go Language Patterns
golangtutorials.blogspot.ru/2011/05/table-of-contents.html - GoLang Tutorials
talks.golang.org - Go

, !
 ),      .                 .  ,  ,         ,        . 

, , goquery (http://github.com/opesun/goquery). , :

// atatagrab project main.go package main import ( "fmt" // : "github.com/opesun/goquery" "strconv" ) const ( // : ENDMESSAGE = "TooLateToDieYoung" ) // func generator(out chan string, stream, start int) { for pos := start; ; pos += 20 { // : x, err := goquery.ParseUrl("http://home.atata.com/streams/" + strconv.Itoa(stream) + "?order=date&from=" + strconv.Itoa(pos)) if err == nil { // : for _, url := range x.Find("figure a.image").Attrs("href") { out <- "http://atata.com/" + url } // - .. if len(x.Find("li.last.hide")) > 0 { out <- ENDMESSAGE //.. return } } } }



,
, , ( ). - , , . , , , :

" , - ?"

, . PMFC (Poor Man's Flow Control " ". , ( ) ).

" , , ?"

! - " " " ". ? , :
( ) - . - ( - ?).
. "":

// type Worker struct { urls chan string // pending int // - index int // wg *sync.WaitGroup // }
: Worker , struct - record .
" ?" - ? Go : . , - , , :

- - - . " " ( WaitGroup ) sync . : WaitGroup , , . , .
, Worker'a.

"" work ( , "", this. ):

// func (w *Worker) work(done chan *Worker) { for { url := <-w.urls // w.wg.Add(1) // download(url) // w.wg.Done() // done <- w // } }
sync :

import ( "sync" )
: done , work() . , ( ).
download() - , . .

Worker Pool , , ( Wikipedia: ( ) ), . " container/heap ", ( - ).

: - , :

package main import ("fmt") // type Animal interface { Say() string } type Cat struct{} func (c Cat) Say() string { return "!" } type Parrot struct { name string } func (p Parrot) Say() string { return p.name + " !" } func main() { kitty := Cat{} popka := Parrot{name: ""} animals := []Animal{kitty, popka} for _, animal := range animals { fmt.Println(animal.Say()) } }
, container.Heap ( http://golang.org/pkg/container/heap/#Interface ) :

type Interface interface { sort.Interface Push(x interface{}) // add x as element Len() Pop() interface{} // remove and return element Len() - 1. }
, Push Pop sort.Interface ( http://golang.org/pkg/sort/#Interface ), :

type Interface interface { // Len is the number of elements in the collection. Len() int // Less returns whether the element with index i should sort // before the element with index j. Less(i, j int) bool // Swap swaps the elements with indexes i and j. Swap(i, j int) }
, :

// "": type Pool []*Worker // - : func (p Pool) Less(i, j int) bool { return p[i].pending < p[j].pending } // : func (p Pool) Len() int { return len(p) } // : func (p Pool) Swap(i, j int) { if i >= 0 && i < len(p) && j >= 0 && j < len(p) { p[i], p[j] = p[j], p[i] p[i].index, p[j].index = i, j } } // : func (p *Pool) Push(x interface{}) { n := len(*p) worker := x.(*Worker) worker.index = n *p = append(*p, worker) } // : func (p *Pool) Pop() interface{} { old := *p n := len(old) item := old[n-1] item.index = -1 *p = old[0 : n-1] return item }
, interface{} - , ( , interface{}). any , . : http://research.swtch.com/interfaces
, heap.* Pool, "" .
"container/heap" :

import ( "container/heap" )
. , , , :

// type Balancer struct { pool Pool // "" done chan *Worker // requests chan string // flowctrl chan bool // PMFC queue int // wg *sync.WaitGroup // }
, init() :

// . func (b *Balancer) init(in chan string) { b.requests = make(chan string) b.flowctrl = make(chan bool) b.done = make(chan *Worker) b.wg = new(sync.WaitGroup) // Flow Control: go func() { for { b.requests <- <-in // <-b.flowctrl // } }() // : heap.Init(&b.pool) for i := 0; i < WORKERS; i++ { w := &Worker{ urls: make(chan string, WORKERSCAP), index: 0, pending: 0, wg: b.wg, } go w.work(b.done) // heap.Push(&b.pool, w) // } }
- "" . , - - !
, : make() , . , - " " ; , .

init() WORKERS WORKERSCAP , :

var ( WORKERS = 5 // WORKERSCAP = 5 // )
. , , ( , . , . , , ) :

// func (b *Balancer) balance(quit chan bool) { lastjobs := false // , for { select { // : case <-quit: // b.wg.Wait() // .. quit <- true //.. case url := <-b.requests: // ( flow controller) if url != ENDMESSAGE { // - ? b.dispatch(url) // , } else { lastjobs = true // } case w := <-b.done: // , b.completed(w) // if lastjobs { if w.pending == 0 { // .. heap.Remove(&b.pool, w.index) // } if len(b.pool) == 0 { // // quit <- true // } } } } }
:

// func (b *Balancer) dispatch(url string) { w := heap.Pop(&b.pool).(*Worker) // .. w.urls <- url //.. . w.pending++ // "".. heap.Push(&b.pool, w) //.. if b.queue++; b.queue < WORKERS*WORKERSCAP { b.flowctrl <- true } }
.. . : , , , .
completed , :

// func (b *Balancer) completed(w *Worker) { w.pending-- heap.Remove(&b.pool, w.index) heap.Push(&b.pool, w) if b.queue--; b.queue == WORKERS*WORKERSCAP-1 { b.flowctrl <- true } }
, , , . : , dispatch , , , - ?
, . - download(), :

// func download(url string) { fileName := IMGDIR + "/" + url[strings.LastIndex(url, "/")+1:] output, err := os.Create(fileName) defer output.Close() response, err := http.Get(url) if err != nil { fmt.Println("Error while downloading", url, "-", err) return } defer response.Body.Close() io.Copy(output, response.Body) }
- - ( ) ( , ):

// : var ( WORKERS = 5 // WORKERSCAP = 5 // ATATASTREAM = 291 //id ATATAPOS = 0 // IMGDIR = "img" // ) // : func init() { flag.IntVar(&WORKERS, "w", WORKERS, " ") flag.IntVar(&ATATASTREAM, "s", ATATASTREAM, "id ") flag.IntVar(&ATATAPOS, "p", ATATAPOS, " ") flag.StringVar(&IMGDIR, "d", IMGDIR, " ") }
- . , , os/signal , ctrl-c :

import ( "io" "os" "os/signal" "sync" "flag" "fmt" "container/heap" "strconv" "strings" "github.com/opesun/goquery" "net/http" )
!


func main() { // flag.Parse() // , if err := os.MkdirAll(IMGDIR, 666); err != nil { panic(err) } // links := make(chan string) quit := make(chan bool) b := new(Balancer) b.init(links) // keys keys := make(chan os.Signal, 1) signal.Notify(keys, os.Interrupt) // go b.balance(quit) go generator(links, ATATASTREAM, ATATAPOS) fmt.Println(" ") // : for { select { case <-keys: // : fmt.Println("CTRL-C: ") quit <- true // case <-quit: // fmt.Println(" !") return } } }
.. ( )



.. . , - ! .

time.Sleep , . , "" .
UPD. - ( 3 744 , 3.07Gb ) .

, .

pastbin.com

PS , #291 () "" 10 , ( "-p=200" ) .

UPD2: PPS "" : - Go, , . , , - + . , : , .

,
vimeo.com/49718712 - Rob Pike - 'Concurrency Is Not Parallelism'
sites.google.com/site/gopatterns - Go Language Patterns
golangtutorials.blogspot.ru/2011/05/table-of-contents.html - GoLang Tutorials
talks.golang.org - Go

, !
), . . , , , .

, , goquery (http://github.com/opesun/goquery). , :

// atatagrab project main.go package main import ( "fmt" // : "github.com/opesun/goquery" "strconv" ) const ( // : ENDMESSAGE = "TooLateToDieYoung" ) // func generator(out chan string, stream, start int) { for pos := start; ; pos += 20 { // : x, err := goquery.ParseUrl("http://home.atata.com/streams/" + strconv.Itoa(stream) + "?order=date&from=" + strconv.Itoa(pos)) if err == nil { // : for _, url := range x.Find("figure a.image").Attrs("href") { out <- "http://atata.com/" + url } // - .. if len(x.Find("li.last.hide")) > 0 { out <- ENDMESSAGE //.. return } } } }



,
, , ( ). - , , . , , , :

" , - ?"

, . PMFC (Poor Man's Flow Control " ". , ( ) ).

" , , ?"

! - " " " ". ? , :
( ) - . - ( - ?).
. "":

// type Worker struct { urls chan string // pending int // - index int // wg *sync.WaitGroup // }
: Worker , struct - record .
" ?" - ? Go : . , - , , :

- - - . " " ( WaitGroup ) sync . : WaitGroup , , . , .
, Worker'a.

"" work ( , "", this. ):

// func (w *Worker) work(done chan *Worker) { for { url := <-w.urls // w.wg.Add(1) // download(url) // w.wg.Done() // done <- w // } }
sync :

import ( "sync" )
: done , work() . , ( ).
download() - , . .

Worker Pool , , ( Wikipedia: ( ) ), . " container/heap ", ( - ).

: - , :

package main import ("fmt") // type Animal interface { Say() string } type Cat struct{} func (c Cat) Say() string { return "!" } type Parrot struct { name string } func (p Parrot) Say() string { return p.name + " !" } func main() { kitty := Cat{} popka := Parrot{name: ""} animals := []Animal{kitty, popka} for _, animal := range animals { fmt.Println(animal.Say()) } }
, container.Heap ( http://golang.org/pkg/container/heap/#Interface ) :

type Interface interface { sort.Interface Push(x interface{}) // add x as element Len() Pop() interface{} // remove and return element Len() - 1. }
, Push Pop sort.Interface ( http://golang.org/pkg/sort/#Interface ), :

type Interface interface { // Len is the number of elements in the collection. Len() int // Less returns whether the element with index i should sort // before the element with index j. Less(i, j int) bool // Swap swaps the elements with indexes i and j. Swap(i, j int) }
, :

// "": type Pool []*Worker // - : func (p Pool) Less(i, j int) bool { return p[i].pending < p[j].pending } // : func (p Pool) Len() int { return len(p) } // : func (p Pool) Swap(i, j int) { if i >= 0 && i < len(p) && j >= 0 && j < len(p) { p[i], p[j] = p[j], p[i] p[i].index, p[j].index = i, j } } // : func (p *Pool) Push(x interface{}) { n := len(*p) worker := x.(*Worker) worker.index = n *p = append(*p, worker) } // : func (p *Pool) Pop() interface{} { old := *p n := len(old) item := old[n-1] item.index = -1 *p = old[0 : n-1] return item }
, interface{} - , ( , interface{}). any , . : http://research.swtch.com/interfaces
, heap.* Pool, "" .
"container/heap" :

import ( "container/heap" )
. , , , :

// type Balancer struct { pool Pool // "" done chan *Worker // requests chan string // flowctrl chan bool // PMFC queue int // wg *sync.WaitGroup // }
, init() :

// . func (b *Balancer) init(in chan string) { b.requests = make(chan string) b.flowctrl = make(chan bool) b.done = make(chan *Worker) b.wg = new(sync.WaitGroup) // Flow Control: go func() { for { b.requests <- <-in // <-b.flowctrl // } }() // : heap.Init(&b.pool) for i := 0; i < WORKERS; i++ { w := &Worker{ urls: make(chan string, WORKERSCAP), index: 0, pending: 0, wg: b.wg, } go w.work(b.done) // heap.Push(&b.pool, w) // } }
- "" . , - - !
, : make() , . , - " " ; , .

init() WORKERS WORKERSCAP , :

var ( WORKERS = 5 // WORKERSCAP = 5 // )
. , , ( , . , . , , ) :

// func (b *Balancer) balance(quit chan bool) { lastjobs := false // , for { select { // : case <-quit: // b.wg.Wait() // .. quit <- true //.. case url := <-b.requests: // ( flow controller) if url != ENDMESSAGE { // - ? b.dispatch(url) // , } else { lastjobs = true // } case w := <-b.done: // , b.completed(w) // if lastjobs { if w.pending == 0 { // .. heap.Remove(&b.pool, w.index) // } if len(b.pool) == 0 { // // quit <- true // } } } } }
:

// func (b *Balancer) dispatch(url string) { w := heap.Pop(&b.pool).(*Worker) // .. w.urls <- url //.. . w.pending++ // "".. heap.Push(&b.pool, w) //.. if b.queue++; b.queue < WORKERS*WORKERSCAP { b.flowctrl <- true } }
.. . : , , , .
completed , :

// func (b *Balancer) completed(w *Worker) { w.pending-- heap.Remove(&b.pool, w.index) heap.Push(&b.pool, w) if b.queue--; b.queue == WORKERS*WORKERSCAP-1 { b.flowctrl <- true } }
, , , . : , dispatch , , , - ?
, . - download(), :

// func download(url string) { fileName := IMGDIR + "/" + url[strings.LastIndex(url, "/")+1:] output, err := os.Create(fileName) defer output.Close() response, err := http.Get(url) if err != nil { fmt.Println("Error while downloading", url, "-", err) return } defer response.Body.Close() io.Copy(output, response.Body) }
- - ( ) ( , ):

// : var ( WORKERS = 5 // WORKERSCAP = 5 // ATATASTREAM = 291 //id ATATAPOS = 0 // IMGDIR = "img" // ) // : func init() { flag.IntVar(&WORKERS, "w", WORKERS, " ") flag.IntVar(&ATATASTREAM, "s", ATATASTREAM, "id ") flag.IntVar(&ATATAPOS, "p", ATATAPOS, " ") flag.StringVar(&IMGDIR, "d", IMGDIR, " ") }
- . , , os/signal , ctrl-c :

import ( "io" "os" "os/signal" "sync" "flag" "fmt" "container/heap" "strconv" "strings" "github.com/opesun/goquery" "net/http" )
!


func main() { // flag.Parse() // , if err := os.MkdirAll(IMGDIR, 666); err != nil { panic(err) } // links := make(chan string) quit := make(chan bool) b := new(Balancer) b.init(links) // keys keys := make(chan os.Signal, 1) signal.Notify(keys, os.Interrupt) // go b.balance(quit) go generator(links, ATATASTREAM, ATATAPOS) fmt.Println(" ") // : for { select { case <-keys: // : fmt.Println("CTRL-C: ") quit <- true // case <-quit: // fmt.Println(" !") return } } }
.. ( )



.. . , - ! .

time.Sleep , . , "" .
UPD. - ( 3 744 , 3.07Gb ) .

, .

pastbin.com

PS , #291 () "" 10 , ( "-p=200" ) .

UPD2: PPS "" : - Go, , . , , - + . , : , .

,
vimeo.com/49718712 - Rob Pike - 'Concurrency Is Not Parallelism'
sites.google.com/site/gopatterns - Go Language Patterns
golangtutorials.blogspot.ru/2011/05/table-of-contents.html - GoLang Tutorials
talks.golang.org - Go

, !
 ),      .                 .  ,  ,         ,        . 

, , goquery (http://github.com/opesun/goquery). , :

// atatagrab project main.go package main import ( "fmt" // : "github.com/opesun/goquery" "strconv" ) const ( // : ENDMESSAGE = "TooLateToDieYoung" ) // func generator(out chan string, stream, start int) { for pos := start; ; pos += 20 { // : x, err := goquery.ParseUrl("http://home.atata.com/streams/" + strconv.Itoa(stream) + "?order=date&from=" + strconv.Itoa(pos)) if err == nil { // : for _, url := range x.Find("figure a.image").Attrs("href") { out <- "http://atata.com/" + url } // - .. if len(x.Find("li.last.hide")) > 0 { out <- ENDMESSAGE //.. return } } } }



,
, , ( ). - , , . , , , :

" , - ?"

, . PMFC (Poor Man's Flow Control " ". , ( ) ).

" , , ?"

! - " " " ". ? , :
( ) - . - ( - ?).
. "":

// type Worker struct { urls chan string // pending int // - index int // wg *sync.WaitGroup // }
: Worker , struct - record .
" ?" - ? Go : . , - , , :

- - - . " " ( WaitGroup ) sync . : WaitGroup , , . , .
, Worker'a.

"" work ( , "", this. ):

// func (w *Worker) work(done chan *Worker) { for { url := <-w.urls // w.wg.Add(1) // download(url) // w.wg.Done() // done <- w // } }
sync :

import ( "sync" )
: done , work() . , ( ).
download() - , . .

Worker Pool , , ( Wikipedia: ( ) ), . " container/heap ", ( - ).

: - , :

package main import ("fmt") // type Animal interface { Say() string } type Cat struct{} func (c Cat) Say() string { return "!" } type Parrot struct { name string } func (p Parrot) Say() string { return p.name + " !" } func main() { kitty := Cat{} popka := Parrot{name: ""} animals := []Animal{kitty, popka} for _, animal := range animals { fmt.Println(animal.Say()) } }
, container.Heap ( http://golang.org/pkg/container/heap/#Interface ) :

type Interface interface { sort.Interface Push(x interface{}) // add x as element Len() Pop() interface{} // remove and return element Len() - 1. }
, Push Pop sort.Interface ( http://golang.org/pkg/sort/#Interface ), :

type Interface interface { // Len is the number of elements in the collection. Len() int // Less returns whether the element with index i should sort // before the element with index j. Less(i, j int) bool // Swap swaps the elements with indexes i and j. Swap(i, j int) }
, :

// "": type Pool []*Worker // - : func (p Pool) Less(i, j int) bool { return p[i].pending < p[j].pending } // : func (p Pool) Len() int { return len(p) } // : func (p Pool) Swap(i, j int) { if i >= 0 && i < len(p) && j >= 0 && j < len(p) { p[i], p[j] = p[j], p[i] p[i].index, p[j].index = i, j } } // : func (p *Pool) Push(x interface{}) { n := len(*p) worker := x.(*Worker) worker.index = n *p = append(*p, worker) } // : func (p *Pool) Pop() interface{} { old := *p n := len(old) item := old[n-1] item.index = -1 *p = old[0 : n-1] return item }
, interface{} - , ( , interface{}). any , . : http://research.swtch.com/interfaces
, heap.* Pool, "" .
"container/heap" :

import ( "container/heap" )
. , , , :

// type Balancer struct { pool Pool // "" done chan *Worker // requests chan string // flowctrl chan bool // PMFC queue int // wg *sync.WaitGroup // }
, init() :

// . func (b *Balancer) init(in chan string) { b.requests = make(chan string) b.flowctrl = make(chan bool) b.done = make(chan *Worker) b.wg = new(sync.WaitGroup) // Flow Control: go func() { for { b.requests <- <-in // <-b.flowctrl // } }() // : heap.Init(&b.pool) for i := 0; i < WORKERS; i++ { w := &Worker{ urls: make(chan string, WORKERSCAP), index: 0, pending: 0, wg: b.wg, } go w.work(b.done) // heap.Push(&b.pool, w) // } }
- "" . , - - !
, : make() , . , - " " ; , .

init() WORKERS WORKERSCAP , :

var ( WORKERS = 5 // WORKERSCAP = 5 // )
. , , ( , . , . , , ) :

// func (b *Balancer) balance(quit chan bool) { lastjobs := false // , for { select { // : case <-quit: // b.wg.Wait() // .. quit <- true //.. case url := <-b.requests: // ( flow controller) if url != ENDMESSAGE { // - ? b.dispatch(url) // , } else { lastjobs = true // } case w := <-b.done: // , b.completed(w) // if lastjobs { if w.pending == 0 { // .. heap.Remove(&b.pool, w.index) // } if len(b.pool) == 0 { // // quit <- true // } } } } }
:

// func (b *Balancer) dispatch(url string) { w := heap.Pop(&b.pool).(*Worker) // .. w.urls <- url //.. . w.pending++ // "".. heap.Push(&b.pool, w) //.. if b.queue++; b.queue < WORKERS*WORKERSCAP { b.flowctrl <- true } }
.. . : , , , .
completed , :

// func (b *Balancer) completed(w *Worker) { w.pending-- heap.Remove(&b.pool, w.index) heap.Push(&b.pool, w) if b.queue--; b.queue == WORKERS*WORKERSCAP-1 { b.flowctrl <- true } }
, , , . : , dispatch , , , - ?
, . - download(), :

// func download(url string) { fileName := IMGDIR + "/" + url[strings.LastIndex(url, "/")+1:] output, err := os.Create(fileName) defer output.Close() response, err := http.Get(url) if err != nil { fmt.Println("Error while downloading", url, "-", err) return } defer response.Body.Close() io.Copy(output, response.Body) }
- - ( ) ( , ):

// : var ( WORKERS = 5 // WORKERSCAP = 5 // ATATASTREAM = 291 //id ATATAPOS = 0 // IMGDIR = "img" // ) // : func init() { flag.IntVar(&WORKERS, "w", WORKERS, " ") flag.IntVar(&ATATASTREAM, "s", ATATASTREAM, "id ") flag.IntVar(&ATATAPOS, "p", ATATAPOS, " ") flag.StringVar(&IMGDIR, "d", IMGDIR, " ") }
- . , , os/signal , ctrl-c :

import ( "io" "os" "os/signal" "sync" "flag" "fmt" "container/heap" "strconv" "strings" "github.com/opesun/goquery" "net/http" )
!


func main() { // flag.Parse() // , if err := os.MkdirAll(IMGDIR, 666); err != nil { panic(err) } // links := make(chan string) quit := make(chan bool) b := new(Balancer) b.init(links) // keys keys := make(chan os.Signal, 1) signal.Notify(keys, os.Interrupt) // go b.balance(quit) go generator(links, ATATASTREAM, ATATAPOS) fmt.Println(" ") // : for { select { case <-keys: // : fmt.Println("CTRL-C: ") quit <- true // case <-quit: // fmt.Println(" !") return } } }
.. ( )



.. . , - ! .

time.Sleep , . , "" .
UPD. - ( 3 744 , 3.07Gb ) .

, .

pastbin.com

PS , #291 () "" 10 , ( "-p=200" ) .

UPD2: PPS "" : - Go, , . , , - + . , : , .

,
vimeo.com/49718712 - Rob Pike - 'Concurrency Is Not Parallelism'
sites.google.com/site/gopatterns - Go Language Patterns
golangtutorials.blogspot.ru/2011/05/table-of-contents.html - GoLang Tutorials
talks.golang.org - Go

, !
), . . , , , .

, , goquery (http://github.com/opesun/goquery). , :

// atatagrab project main.go package main import ( "fmt" // : "github.com/opesun/goquery" "strconv" ) const ( // : ENDMESSAGE = "TooLateToDieYoung" ) // func generator(out chan string, stream, start int) { for pos := start; ; pos += 20 { // : x, err := goquery.ParseUrl("http://home.atata.com/streams/" + strconv.Itoa(stream) + "?order=date&from=" + strconv.Itoa(pos)) if err == nil { // : for _, url := range x.Find("figure a.image").Attrs("href") { out <- "http://atata.com/" + url } // - .. if len(x.Find("li.last.hide")) > 0 { out <- ENDMESSAGE //.. return } } } }



,
, , ( ). - , , . , , , :

" , - ?"

, . PMFC (Poor Man's Flow Control " ". , ( ) ).

" , , ?"

! - " " " ". ? , :
( ) - . - ( - ?).
. "":

// type Worker struct { urls chan string // pending int // - index int // wg *sync.WaitGroup // }
: Worker , struct - record .
" ?" - ? Go : . , - , , :

- - - . " " ( WaitGroup ) sync . : WaitGroup , , . , .
, Worker'a.

"" work ( , "", this. ):

// func (w *Worker) work(done chan *Worker) { for { url := <-w.urls // w.wg.Add(1) // download(url) // w.wg.Done() // done <- w // } }
sync :

import ( "sync" )
: done , work() . , ( ).
download() - , . .

Worker Pool , , ( Wikipedia: ( ) ), . " container/heap ", ( - ).

: - , :

package main import ("fmt") // type Animal interface { Say() string } type Cat struct{} func (c Cat) Say() string { return "!" } type Parrot struct { name string } func (p Parrot) Say() string { return p.name + " !" } func main() { kitty := Cat{} popka := Parrot{name: ""} animals := []Animal{kitty, popka} for _, animal := range animals { fmt.Println(animal.Say()) } }
, container.Heap ( http://golang.org/pkg/container/heap/#Interface ) :

type Interface interface { sort.Interface Push(x interface{}) // add x as element Len() Pop() interface{} // remove and return element Len() - 1. }
, Push Pop sort.Interface ( http://golang.org/pkg/sort/#Interface ), :

type Interface interface { // Len is the number of elements in the collection. Len() int // Less returns whether the element with index i should sort // before the element with index j. Less(i, j int) bool // Swap swaps the elements with indexes i and j. Swap(i, j int) }
, :

// "": type Pool []*Worker // - : func (p Pool) Less(i, j int) bool { return p[i].pending < p[j].pending } // : func (p Pool) Len() int { return len(p) } // : func (p Pool) Swap(i, j int) { if i >= 0 && i < len(p) && j >= 0 && j < len(p) { p[i], p[j] = p[j], p[i] p[i].index, p[j].index = i, j } } // : func (p *Pool) Push(x interface{}) { n := len(*p) worker := x.(*Worker) worker.index = n *p = append(*p, worker) } // : func (p *Pool) Pop() interface{} { old := *p n := len(old) item := old[n-1] item.index = -1 *p = old[0 : n-1] return item }
, interface{} - , ( , interface{}). any , . : http://research.swtch.com/interfaces
, heap.* Pool, "" .
"container/heap" :

import ( "container/heap" )
. , , , :

// type Balancer struct { pool Pool // "" done chan *Worker // requests chan string // flowctrl chan bool // PMFC queue int // wg *sync.WaitGroup // }
, init() :

// . func (b *Balancer) init(in chan string) { b.requests = make(chan string) b.flowctrl = make(chan bool) b.done = make(chan *Worker) b.wg = new(sync.WaitGroup) // Flow Control: go func() { for { b.requests <- <-in // <-b.flowctrl // } }() // : heap.Init(&b.pool) for i := 0; i < WORKERS; i++ { w := &Worker{ urls: make(chan string, WORKERSCAP), index: 0, pending: 0, wg: b.wg, } go w.work(b.done) // heap.Push(&b.pool, w) // } }
- "" . , - - !
, : make() , . , - " " ; , .

init() WORKERS WORKERSCAP , :

var ( WORKERS = 5 // WORKERSCAP = 5 // )
. , , ( , . , . , , ) :

// func (b *Balancer) balance(quit chan bool) { lastjobs := false // , for { select { // : case <-quit: // b.wg.Wait() // .. quit <- true //.. case url := <-b.requests: // ( flow controller) if url != ENDMESSAGE { // - ? b.dispatch(url) // , } else { lastjobs = true // } case w := <-b.done: // , b.completed(w) // if lastjobs { if w.pending == 0 { // .. heap.Remove(&b.pool, w.index) // } if len(b.pool) == 0 { // // quit <- true // } } } } }
:

// func (b *Balancer) dispatch(url string) { w := heap.Pop(&b.pool).(*Worker) // .. w.urls <- url //.. . w.pending++ // "".. heap.Push(&b.pool, w) //.. if b.queue++; b.queue < WORKERS*WORKERSCAP { b.flowctrl <- true } }
.. . : , , , .
completed , :

// func (b *Balancer) completed(w *Worker) { w.pending-- heap.Remove(&b.pool, w.index) heap.Push(&b.pool, w) if b.queue--; b.queue == WORKERS*WORKERSCAP-1 { b.flowctrl <- true } }
, , , . : , dispatch , , , - ?
, . - download(), :

// func download(url string) { fileName := IMGDIR + "/" + url[strings.LastIndex(url, "/")+1:] output, err := os.Create(fileName) defer output.Close() response, err := http.Get(url) if err != nil { fmt.Println("Error while downloading", url, "-", err) return } defer response.Body.Close() io.Copy(output, response.Body) }
- - ( ) ( , ):

// : var ( WORKERS = 5 // WORKERSCAP = 5 // ATATASTREAM = 291 //id ATATAPOS = 0 // IMGDIR = "img" // ) // : func init() { flag.IntVar(&WORKERS, "w", WORKERS, " ") flag.IntVar(&ATATASTREAM, "s", ATATASTREAM, "id ") flag.IntVar(&ATATAPOS, "p", ATATAPOS, " ") flag.StringVar(&IMGDIR, "d", IMGDIR, " ") }
- . , , os/signal , ctrl-c :

import ( "io" "os" "os/signal" "sync" "flag" "fmt" "container/heap" "strconv" "strings" "github.com/opesun/goquery" "net/http" )
!


func main() { // flag.Parse() // , if err := os.MkdirAll(IMGDIR, 666); err != nil { panic(err) } // links := make(chan string) quit := make(chan bool) b := new(Balancer) b.init(links) // keys keys := make(chan os.Signal, 1) signal.Notify(keys, os.Interrupt) // go b.balance(quit) go generator(links, ATATASTREAM, ATATAPOS) fmt.Println(" ") // : for { select { case <-keys: // : fmt.Println("CTRL-C: ") quit <- true // case <-quit: // fmt.Println(" !") return } } }
.. ( )



.. . , - ! .

time.Sleep , . , "" .
UPD. - ( 3 744 , 3.07Gb ) .

, .

pastbin.com

PS , #291 () "" 10 , ( "-p=200" ) .

UPD2: PPS "" : - Go, , . , , - + . , : , .

,
vimeo.com/49718712 - Rob Pike - 'Concurrency Is Not Parallelism'
sites.google.com/site/gopatterns - Go Language Patterns
golangtutorials.blogspot.ru/2011/05/table-of-contents.html - GoLang Tutorials
talks.golang.org - Go

, !
 ),      .                 .  ,  ,         ,        . 

, , goquery (http://github.com/opesun/goquery). , :

// atatagrab project main.go package main import ( "fmt" // : "github.com/opesun/goquery" "strconv" ) const ( // : ENDMESSAGE = "TooLateToDieYoung" ) // func generator(out chan string, stream, start int) { for pos := start; ; pos += 20 { // : x, err := goquery.ParseUrl("http://home.atata.com/streams/" + strconv.Itoa(stream) + "?order=date&from=" + strconv.Itoa(pos)) if err == nil { // : for _, url := range x.Find("figure a.image").Attrs("href") { out <- "http://atata.com/" + url } // - .. if len(x.Find("li.last.hide")) > 0 { out <- ENDMESSAGE //.. return } } } }



,
, , ( ). - , , . , , , :

" , - ?"

, . PMFC (Poor Man's Flow Control " ". , ( ) ).

" , , ?"

! - " " " ". ? , :
( ) - . - ( - ?).
. "":

// type Worker struct { urls chan string // pending int // - index int // wg *sync.WaitGroup // }
: Worker , struct - record .
" ?" - ? Go : . , - , , :

- - - . " " ( WaitGroup ) sync . : WaitGroup , , . , .
, Worker'a.

"" work ( , "", this. ):

// func (w *Worker) work(done chan *Worker) { for { url := <-w.urls // w.wg.Add(1) // download(url) // w.wg.Done() // done <- w // } }
sync :

import ( "sync" )
: done , work() . , ( ).
download() - , . .

Worker Pool , , ( Wikipedia: ( ) ), . " container/heap ", ( - ).

: - , :

package main import ("fmt") // type Animal interface { Say() string } type Cat struct{} func (c Cat) Say() string { return "!" } type Parrot struct { name string } func (p Parrot) Say() string { return p.name + " !" } func main() { kitty := Cat{} popka := Parrot{name: ""} animals := []Animal{kitty, popka} for _, animal := range animals { fmt.Println(animal.Say()) } }
, container.Heap ( http://golang.org/pkg/container/heap/#Interface ) :

type Interface interface { sort.Interface Push(x interface{}) // add x as element Len() Pop() interface{} // remove and return element Len() - 1. }
, Push Pop sort.Interface ( http://golang.org/pkg/sort/#Interface ), :

type Interface interface { // Len is the number of elements in the collection. Len() int // Less returns whether the element with index i should sort // before the element with index j. Less(i, j int) bool // Swap swaps the elements with indexes i and j. Swap(i, j int) }
, :

// "": type Pool []*Worker // - : func (p Pool) Less(i, j int) bool { return p[i].pending < p[j].pending } // : func (p Pool) Len() int { return len(p) } // : func (p Pool) Swap(i, j int) { if i >= 0 && i < len(p) && j >= 0 && j < len(p) { p[i], p[j] = p[j], p[i] p[i].index, p[j].index = i, j } } // : func (p *Pool) Push(x interface{}) { n := len(*p) worker := x.(*Worker) worker.index = n *p = append(*p, worker) } // : func (p *Pool) Pop() interface{} { old := *p n := len(old) item := old[n-1] item.index = -1 *p = old[0 : n-1] return item }
, interface{} - , ( , interface{}). any , . : http://research.swtch.com/interfaces
, heap.* Pool, "" .
"container/heap" :

import ( "container/heap" )
. , , , :

// type Balancer struct { pool Pool // "" done chan *Worker // requests chan string // flowctrl chan bool // PMFC queue int // wg *sync.WaitGroup // }
, init() :

// . func (b *Balancer) init(in chan string) { b.requests = make(chan string) b.flowctrl = make(chan bool) b.done = make(chan *Worker) b.wg = new(sync.WaitGroup) // Flow Control: go func() { for { b.requests <- <-in // <-b.flowctrl // } }() // : heap.Init(&b.pool) for i := 0; i < WORKERS; i++ { w := &Worker{ urls: make(chan string, WORKERSCAP), index: 0, pending: 0, wg: b.wg, } go w.work(b.done) // heap.Push(&b.pool, w) // } }
- "" . , - - !
, : make() , . , - " " ; , .

init() WORKERS WORKERSCAP , :

var ( WORKERS = 5 // WORKERSCAP = 5 // )
. , , ( , . , . , , ) :

// func (b *Balancer) balance(quit chan bool) { lastjobs := false // , for { select { // : case <-quit: // b.wg.Wait() // .. quit <- true //.. case url := <-b.requests: // ( flow controller) if url != ENDMESSAGE { // - ? b.dispatch(url) // , } else { lastjobs = true // } case w := <-b.done: // , b.completed(w) // if lastjobs { if w.pending == 0 { // .. heap.Remove(&b.pool, w.index) // } if len(b.pool) == 0 { // // quit <- true // } } } } }
:

// func (b *Balancer) dispatch(url string) { w := heap.Pop(&b.pool).(*Worker) // .. w.urls <- url //.. . w.pending++ // "".. heap.Push(&b.pool, w) //.. if b.queue++; b.queue < WORKERS*WORKERSCAP { b.flowctrl <- true } }
.. . : , , , .
completed , :

// func (b *Balancer) completed(w *Worker) { w.pending-- heap.Remove(&b.pool, w.index) heap.Push(&b.pool, w) if b.queue--; b.queue == WORKERS*WORKERSCAP-1 { b.flowctrl <- true } }
, , , . : , dispatch , , , - ?
, . - download(), :

// func download(url string) { fileName := IMGDIR + "/" + url[strings.LastIndex(url, "/")+1:] output, err := os.Create(fileName) defer output.Close() response, err := http.Get(url) if err != nil { fmt.Println("Error while downloading", url, "-", err) return } defer response.Body.Close() io.Copy(output, response.Body) }
- - ( ) ( , ):

// : var ( WORKERS = 5 // WORKERSCAP = 5 // ATATASTREAM = 291 //id ATATAPOS = 0 // IMGDIR = "img" // ) // : func init() { flag.IntVar(&WORKERS, "w", WORKERS, " ") flag.IntVar(&ATATASTREAM, "s", ATATASTREAM, "id ") flag.IntVar(&ATATAPOS, "p", ATATAPOS, " ") flag.StringVar(&IMGDIR, "d", IMGDIR, " ") }
- . , , os/signal , ctrl-c :

import ( "io" "os" "os/signal" "sync" "flag" "fmt" "container/heap" "strconv" "strings" "github.com/opesun/goquery" "net/http" )
!


func main() { // flag.Parse() // , if err := os.MkdirAll(IMGDIR, 666); err != nil { panic(err) } // links := make(chan string) quit := make(chan bool) b := new(Balancer) b.init(links) // keys keys := make(chan os.Signal, 1) signal.Notify(keys, os.Interrupt) // go b.balance(quit) go generator(links, ATATASTREAM, ATATAPOS) fmt.Println(" ") // : for { select { case <-keys: // : fmt.Println("CTRL-C: ") quit <- true // case <-quit: // fmt.Println(" !") return } } }
.. ( )



.. . , - ! .

time.Sleep , . , "" .
UPD. - ( 3 744 , 3.07Gb ) .

, .

pastbin.com

PS , #291 () "" 10 , ( "-p=200" ) .

UPD2: PPS "" : - Go, , . , , - + . , : , .

,
vimeo.com/49718712 - Rob Pike - 'Concurrency Is Not Parallelism'
sites.google.com/site/gopatterns - Go Language Patterns
golangtutorials.blogspot.ru/2011/05/table-of-contents.html - GoLang Tutorials
talks.golang.org - Go

, !
), . . , , , .

, , goquery (http://github.com/opesun/goquery). , :

// atatagrab project main.go package main import ( "fmt" // : "github.com/opesun/goquery" "strconv" ) const ( // : ENDMESSAGE = "TooLateToDieYoung" ) // func generator(out chan string, stream, start int) { for pos := start; ; pos += 20 { // : x, err := goquery.ParseUrl("http://home.atata.com/streams/" + strconv.Itoa(stream) + "?order=date&from=" + strconv.Itoa(pos)) if err == nil { // : for _, url := range x.Find("figure a.image").Attrs("href") { out <- "http://atata.com/" + url } // - .. if len(x.Find("li.last.hide")) > 0 { out <- ENDMESSAGE //.. return } } } }



,
, , ( ). - , , . , , , :

" , - ?"

, . PMFC (Poor Man's Flow Control " ". , ( ) ).

" , , ?"

! - " " " ". ? , :
( ) - . - ( - ?).
. "":

// type Worker struct { urls chan string // pending int // - index int // wg *sync.WaitGroup // }
: Worker , struct - record .
" ?" - ? Go : . , - , , :

- - - . " " ( WaitGroup ) sync . : WaitGroup , , . , .
, Worker'a.

"" work ( , "", this. ):

// func (w *Worker) work(done chan *Worker) { for { url := <-w.urls // w.wg.Add(1) // download(url) // w.wg.Done() // done <- w // } }
sync :

import ( "sync" )
: done , work() . , ( ).
download() - , . .

Worker Pool , , ( Wikipedia: ( ) ), . " container/heap ", ( - ).

: - , :

package main import ("fmt") // type Animal interface { Say() string } type Cat struct{} func (c Cat) Say() string { return "!" } type Parrot struct { name string } func (p Parrot) Say() string { return p.name + " !" } func main() { kitty := Cat{} popka := Parrot{name: ""} animals := []Animal{kitty, popka} for _, animal := range animals { fmt.Println(animal.Say()) } }
, container.Heap ( http://golang.org/pkg/container/heap/#Interface ) :

type Interface interface { sort.Interface Push(x interface{}) // add x as element Len() Pop() interface{} // remove and return element Len() - 1. }
, Push Pop sort.Interface ( http://golang.org/pkg/sort/#Interface ), :

type Interface interface { // Len is the number of elements in the collection. Len() int // Less returns whether the element with index i should sort // before the element with index j. Less(i, j int) bool // Swap swaps the elements with indexes i and j. Swap(i, j int) }
, :

// "": type Pool []*Worker // - : func (p Pool) Less(i, j int) bool { return p[i].pending < p[j].pending } // : func (p Pool) Len() int { return len(p) } // : func (p Pool) Swap(i, j int) { if i >= 0 && i < len(p) && j >= 0 && j < len(p) { p[i], p[j] = p[j], p[i] p[i].index, p[j].index = i, j } } // : func (p *Pool) Push(x interface{}) { n := len(*p) worker := x.(*Worker) worker.index = n *p = append(*p, worker) } // : func (p *Pool) Pop() interface{} { old := *p n := len(old) item := old[n-1] item.index = -1 *p = old[0 : n-1] return item }
, interface{} - , ( , interface{}). any , . : http://research.swtch.com/interfaces
, heap.* Pool, "" .
"container/heap" :

import ( "container/heap" )
. , , , :

// type Balancer struct { pool Pool // "" done chan *Worker // requests chan string // flowctrl chan bool // PMFC queue int // wg *sync.WaitGroup // }
, init() :

// . func (b *Balancer) init(in chan string) { b.requests = make(chan string) b.flowctrl = make(chan bool) b.done = make(chan *Worker) b.wg = new(sync.WaitGroup) // Flow Control: go func() { for { b.requests <- <-in // <-b.flowctrl // } }() // : heap.Init(&b.pool) for i := 0; i < WORKERS; i++ { w := &Worker{ urls: make(chan string, WORKERSCAP), index: 0, pending: 0, wg: b.wg, } go w.work(b.done) // heap.Push(&b.pool, w) // } }
- "" . , - - !
, : make() , . , - " " ; , .

init() WORKERS WORKERSCAP , :

var ( WORKERS = 5 // WORKERSCAP = 5 // )
. , , ( , . , . , , ) :

// func (b *Balancer) balance(quit chan bool) { lastjobs := false // , for { select { // : case <-quit: // b.wg.Wait() // .. quit <- true //.. case url := <-b.requests: // ( flow controller) if url != ENDMESSAGE { // - ? b.dispatch(url) // , } else { lastjobs = true // } case w := <-b.done: // , b.completed(w) // if lastjobs { if w.pending == 0 { // .. heap.Remove(&b.pool, w.index) // } if len(b.pool) == 0 { // // quit <- true // } } } } }
:

// func (b *Balancer) dispatch(url string) { w := heap.Pop(&b.pool).(*Worker) // .. w.urls <- url //.. . w.pending++ // "".. heap.Push(&b.pool, w) //.. if b.queue++; b.queue < WORKERS*WORKERSCAP { b.flowctrl <- true } }
.. . : , , , .
completed , :

// func (b *Balancer) completed(w *Worker) { w.pending-- heap.Remove(&b.pool, w.index) heap.Push(&b.pool, w) if b.queue--; b.queue == WORKERS*WORKERSCAP-1 { b.flowctrl <- true } }
, , , . : , dispatch , , , - ?
, . - download(), :

// func download(url string) { fileName := IMGDIR + "/" + url[strings.LastIndex(url, "/")+1:] output, err := os.Create(fileName) defer output.Close() response, err := http.Get(url) if err != nil { fmt.Println("Error while downloading", url, "-", err) return } defer response.Body.Close() io.Copy(output, response.Body) }
- - ( ) ( , ):

// : var ( WORKERS = 5 // WORKERSCAP = 5 // ATATASTREAM = 291 //id ATATAPOS = 0 // IMGDIR = "img" // ) // : func init() { flag.IntVar(&WORKERS, "w", WORKERS, " ") flag.IntVar(&ATATASTREAM, "s", ATATASTREAM, "id ") flag.IntVar(&ATATAPOS, "p", ATATAPOS, " ") flag.StringVar(&IMGDIR, "d", IMGDIR, " ") }
- . , , os/signal , ctrl-c :

import ( "io" "os" "os/signal" "sync" "flag" "fmt" "container/heap" "strconv" "strings" "github.com/opesun/goquery" "net/http" )
!


func main() { // flag.Parse() // , if err := os.MkdirAll(IMGDIR, 666); err != nil { panic(err) } // links := make(chan string) quit := make(chan bool) b := new(Balancer) b.init(links) // keys keys := make(chan os.Signal, 1) signal.Notify(keys, os.Interrupt) // go b.balance(quit) go generator(links, ATATASTREAM, ATATAPOS) fmt.Println(" ") // : for { select { case <-keys: // : fmt.Println("CTRL-C: ") quit <- true // case <-quit: // fmt.Println(" !") return } } }
.. ( )



.. . , - ! .

time.Sleep , . , "" .
UPD. - ( 3 744 , 3.07Gb ) .

, .

pastbin.com

PS , #291 () "" 10 , ( "-p=200" ) .

UPD2: PPS "" : - Go, , . , , - + . , : , .

,
vimeo.com/49718712 - Rob Pike - 'Concurrency Is Not Parallelism'
sites.google.com/site/gopatterns - Go Language Patterns
golangtutorials.blogspot.ru/2011/05/table-of-contents.html - GoLang Tutorials
talks.golang.org - Go

, !
 ),      .                 .  ,  ,         ,        . 

, , goquery (http://github.com/opesun/goquery). , :

// atatagrab project main.go package main import ( "fmt" // : "github.com/opesun/goquery" "strconv" ) const ( // : ENDMESSAGE = "TooLateToDieYoung" ) // func generator(out chan string, stream, start int) { for pos := start; ; pos += 20 { // : x, err := goquery.ParseUrl("http://home.atata.com/streams/" + strconv.Itoa(stream) + "?order=date&from=" + strconv.Itoa(pos)) if err == nil { // : for _, url := range x.Find("figure a.image").Attrs("href") { out <- "http://atata.com/" + url } // - .. if len(x.Find("li.last.hide")) > 0 { out <- ENDMESSAGE //.. return } } } }



,
, , ( ). - , , . , , , :

" , - ?"

, . PMFC (Poor Man's Flow Control " ". , ( ) ).

" , , ?"

! - " " " ". ? , :
( ) - . - ( - ?).
. "":

// type Worker struct { urls chan string // pending int // - index int // wg *sync.WaitGroup // }
: Worker , struct - record .
" ?" - ? Go : . , - , , :

- - - . " " ( WaitGroup ) sync . : WaitGroup , , . , .
, Worker'a.

"" work ( , "", this. ):

// func (w *Worker) work(done chan *Worker) { for { url := <-w.urls // w.wg.Add(1) // download(url) // w.wg.Done() // done <- w // } }
sync :

import ( "sync" )
: done , work() . , ( ).
download() - , . .

Worker Pool , , ( Wikipedia: ( ) ), . " container/heap ", ( - ).

: - , :

package main import ("fmt") // type Animal interface { Say() string } type Cat struct{} func (c Cat) Say() string { return "!" } type Parrot struct { name string } func (p Parrot) Say() string { return p.name + " !" } func main() { kitty := Cat{} popka := Parrot{name: ""} animals := []Animal{kitty, popka} for _, animal := range animals { fmt.Println(animal.Say()) } }
, container.Heap ( http://golang.org/pkg/container/heap/#Interface ) :

type Interface interface { sort.Interface Push(x interface{}) // add x as element Len() Pop() interface{} // remove and return element Len() - 1. }
, Push Pop sort.Interface ( http://golang.org/pkg/sort/#Interface ), :

type Interface interface { // Len is the number of elements in the collection. Len() int // Less returns whether the element with index i should sort // before the element with index j. Less(i, j int) bool // Swap swaps the elements with indexes i and j. Swap(i, j int) }
, :

// "": type Pool []*Worker // - : func (p Pool) Less(i, j int) bool { return p[i].pending < p[j].pending } // : func (p Pool) Len() int { return len(p) } // : func (p Pool) Swap(i, j int) { if i >= 0 && i < len(p) && j >= 0 && j < len(p) { p[i], p[j] = p[j], p[i] p[i].index, p[j].index = i, j } } // : func (p *Pool) Push(x interface{}) { n := len(*p) worker := x.(*Worker) worker.index = n *p = append(*p, worker) } // : func (p *Pool) Pop() interface{} { old := *p n := len(old) item := old[n-1] item.index = -1 *p = old[0 : n-1] return item }
, interface{} - , ( , interface{}). any , . : http://research.swtch.com/interfaces
, heap.* Pool, "" .
"container/heap" :

import ( "container/heap" )
. , , , :

// type Balancer struct { pool Pool // "" done chan *Worker // requests chan string // flowctrl chan bool // PMFC queue int // wg *sync.WaitGroup // }
, init() :

// . func (b *Balancer) init(in chan string) { b.requests = make(chan string) b.flowctrl = make(chan bool) b.done = make(chan *Worker) b.wg = new(sync.WaitGroup) // Flow Control: go func() { for { b.requests <- <-in // <-b.flowctrl // } }() // : heap.Init(&b.pool) for i := 0; i < WORKERS; i++ { w := &Worker{ urls: make(chan string, WORKERSCAP), index: 0, pending: 0, wg: b.wg, } go w.work(b.done) // heap.Push(&b.pool, w) // } }
- "" . , - - !
, : make() , . , - " " ; , .

init() WORKERS WORKERSCAP , :

var ( WORKERS = 5 // WORKERSCAP = 5 // )
. , , ( , . , . , , ) :

// func (b *Balancer) balance(quit chan bool) { lastjobs := false // , for { select { // : case <-quit: // b.wg.Wait() // .. quit <- true //.. case url := <-b.requests: // ( flow controller) if url != ENDMESSAGE { // - ? b.dispatch(url) // , } else { lastjobs = true // } case w := <-b.done: // , b.completed(w) // if lastjobs { if w.pending == 0 { // .. heap.Remove(&b.pool, w.index) // } if len(b.pool) == 0 { // // quit <- true // } } } } }
:

// func (b *Balancer) dispatch(url string) { w := heap.Pop(&b.pool).(*Worker) // .. w.urls <- url //.. . w.pending++ // "".. heap.Push(&b.pool, w) //.. if b.queue++; b.queue < WORKERS*WORKERSCAP { b.flowctrl <- true } }
.. . : , , , .
completed , :

// func (b *Balancer) completed(w *Worker) { w.pending-- heap.Remove(&b.pool, w.index) heap.Push(&b.pool, w) if b.queue--; b.queue == WORKERS*WORKERSCAP-1 { b.flowctrl <- true } }
, , , . : , dispatch , , , - ?
, . - download(), :

// func download(url string) { fileName := IMGDIR + "/" + url[strings.LastIndex(url, "/")+1:] output, err := os.Create(fileName) defer output.Close() response, err := http.Get(url) if err != nil { fmt.Println("Error while downloading", url, "-", err) return } defer response.Body.Close() io.Copy(output, response.Body) }
- - ( ) ( , ):

// : var ( WORKERS = 5 // WORKERSCAP = 5 // ATATASTREAM = 291 //id ATATAPOS = 0 // IMGDIR = "img" // ) // : func init() { flag.IntVar(&WORKERS, "w", WORKERS, " ") flag.IntVar(&ATATASTREAM, "s", ATATASTREAM, "id ") flag.IntVar(&ATATAPOS, "p", ATATAPOS, " ") flag.StringVar(&IMGDIR, "d", IMGDIR, " ") }
- . , , os/signal , ctrl-c :

import ( "io" "os" "os/signal" "sync" "flag" "fmt" "container/heap" "strconv" "strings" "github.com/opesun/goquery" "net/http" )
!


func main() { // flag.Parse() // , if err := os.MkdirAll(IMGDIR, 666); err != nil { panic(err) } // links := make(chan string) quit := make(chan bool) b := new(Balancer) b.init(links) // keys keys := make(chan os.Signal, 1) signal.Notify(keys, os.Interrupt) // go b.balance(quit) go generator(links, ATATASTREAM, ATATAPOS) fmt.Println(" ") // : for { select { case <-keys: // : fmt.Println("CTRL-C: ") quit <- true // case <-quit: // fmt.Println(" !") return } } }
.. ( )



.. . , - ! .

time.Sleep , . , "" .
UPD. - ( 3 744 , 3.07Gb ) .

, .

pastbin.com

PS , #291 () "" 10 , ( "-p=200" ) .

UPD2: PPS "" : - Go, , . , , - + . , : , .

,
vimeo.com/49718712 - Rob Pike - 'Concurrency Is Not Parallelism'
sites.google.com/site/gopatterns - Go Language Patterns
golangtutorials.blogspot.ru/2011/05/table-of-contents.html - GoLang Tutorials
talks.golang.org - Go

, !
), . . , , , .

, , goquery (http://github.com/opesun/goquery). , :

// atatagrab project main.go package main import ( "fmt" // : "github.com/opesun/goquery" "strconv" ) const ( // : ENDMESSAGE = "TooLateToDieYoung" ) // func generator(out chan string, stream, start int) { for pos := start; ; pos += 20 { // : x, err := goquery.ParseUrl("http://home.atata.com/streams/" + strconv.Itoa(stream) + "?order=date&from=" + strconv.Itoa(pos)) if err == nil { // : for _, url := range x.Find("figure a.image").Attrs("href") { out <- "http://atata.com/" + url } // - .. if len(x.Find("li.last.hide")) > 0 { out <- ENDMESSAGE //.. return } } } }



,
, , ( ). - , , . , , , :

" , - ?"

, . PMFC (Poor Man's Flow Control " ". , ( ) ).

" , , ?"

! - " " " ". ? , :
( ) - . - ( - ?).
. "":

// type Worker struct { urls chan string // pending int // - index int // wg *sync.WaitGroup // }
: Worker , struct - record .
" ?" - ? Go : . , - , , :

- - - . " " ( WaitGroup ) sync . : WaitGroup , , . , .
, Worker'a.

"" work ( , "", this. ):

// func (w *Worker) work(done chan *Worker) { for { url := <-w.urls // w.wg.Add(1) // download(url) // w.wg.Done() // done <- w // } }
sync :

import ( "sync" )
: done , work() . , ( ).
download() - , . .

Worker Pool , , ( Wikipedia: ( ) ), . " container/heap ", ( - ).

: - , :

package main import ("fmt") // type Animal interface { Say() string } type Cat struct{} func (c Cat) Say() string { return "!" } type Parrot struct { name string } func (p Parrot) Say() string { return p.name + " !" } func main() { kitty := Cat{} popka := Parrot{name: ""} animals := []Animal{kitty, popka} for _, animal := range animals { fmt.Println(animal.Say()) } }
, container.Heap ( http://golang.org/pkg/container/heap/#Interface ) :

type Interface interface { sort.Interface Push(x interface{}) // add x as element Len() Pop() interface{} // remove and return element Len() - 1. }
, Push Pop sort.Interface ( http://golang.org/pkg/sort/#Interface ), :

type Interface interface { // Len is the number of elements in the collection. Len() int // Less returns whether the element with index i should sort // before the element with index j. Less(i, j int) bool // Swap swaps the elements with indexes i and j. Swap(i, j int) }
, :

// "": type Pool []*Worker // - : func (p Pool) Less(i, j int) bool { return p[i].pending < p[j].pending } // : func (p Pool) Len() int { return len(p) } // : func (p Pool) Swap(i, j int) { if i >= 0 && i < len(p) && j >= 0 && j < len(p) { p[i], p[j] = p[j], p[i] p[i].index, p[j].index = i, j } } // : func (p *Pool) Push(x interface{}) { n := len(*p) worker := x.(*Worker) worker.index = n *p = append(*p, worker) } // : func (p *Pool) Pop() interface{} { old := *p n := len(old) item := old[n-1] item.index = -1 *p = old[0 : n-1] return item }
, interface{} - , ( , interface{}). any , . : http://research.swtch.com/interfaces
, heap.* Pool, "" .
"container/heap" :

import ( "container/heap" )
. , , , :

// type Balancer struct { pool Pool // "" done chan *Worker // requests chan string // flowctrl chan bool // PMFC queue int // wg *sync.WaitGroup // }
, init() :

// . func (b *Balancer) init(in chan string) { b.requests = make(chan string) b.flowctrl = make(chan bool) b.done = make(chan *Worker) b.wg = new(sync.WaitGroup) // Flow Control: go func() { for { b.requests <- <-in // <-b.flowctrl // } }() // : heap.Init(&b.pool) for i := 0; i < WORKERS; i++ { w := &Worker{ urls: make(chan string, WORKERSCAP), index: 0, pending: 0, wg: b.wg, } go w.work(b.done) // heap.Push(&b.pool, w) // } }
- "" . , - - !
, : make() , . , - " " ; , .

init() WORKERS WORKERSCAP , :

var ( WORKERS = 5 // WORKERSCAP = 5 // )
. , , ( , . , . , , ) :

// func (b *Balancer) balance(quit chan bool) { lastjobs := false // , for { select { // : case <-quit: // b.wg.Wait() // .. quit <- true //.. case url := <-b.requests: // ( flow controller) if url != ENDMESSAGE { // - ? b.dispatch(url) // , } else { lastjobs = true // } case w := <-b.done: // , b.completed(w) // if lastjobs { if w.pending == 0 { // .. heap.Remove(&b.pool, w.index) // } if len(b.pool) == 0 { // // quit <- true // } } } } }
:

// func (b *Balancer) dispatch(url string) { w := heap.Pop(&b.pool).(*Worker) // .. w.urls <- url //.. . w.pending++ // "".. heap.Push(&b.pool, w) //.. if b.queue++; b.queue < WORKERS*WORKERSCAP { b.flowctrl <- true } }
.. . : , , , .
completed , :

// func (b *Balancer) completed(w *Worker) { w.pending-- heap.Remove(&b.pool, w.index) heap.Push(&b.pool, w) if b.queue--; b.queue == WORKERS*WORKERSCAP-1 { b.flowctrl <- true } }
, , , . : , dispatch , , , - ?
, . - download(), :

// func download(url string) { fileName := IMGDIR + "/" + url[strings.LastIndex(url, "/")+1:] output, err := os.Create(fileName) defer output.Close() response, err := http.Get(url) if err != nil { fmt.Println("Error while downloading", url, "-", err) return } defer response.Body.Close() io.Copy(output, response.Body) }
- - ( ) ( , ):

// : var ( WORKERS = 5 // WORKERSCAP = 5 // ATATASTREAM = 291 //id ATATAPOS = 0 // IMGDIR = "img" // ) // : func init() { flag.IntVar(&WORKERS, "w", WORKERS, " ") flag.IntVar(&ATATASTREAM, "s", ATATASTREAM, "id ") flag.IntVar(&ATATAPOS, "p", ATATAPOS, " ") flag.StringVar(&IMGDIR, "d", IMGDIR, " ") }
- . , , os/signal , ctrl-c :

import ( "io" "os" "os/signal" "sync" "flag" "fmt" "container/heap" "strconv" "strings" "github.com/opesun/goquery" "net/http" )
!


func main() { // flag.Parse() // , if err := os.MkdirAll(IMGDIR, 666); err != nil { panic(err) } // links := make(chan string) quit := make(chan bool) b := new(Balancer) b.init(links) // keys keys := make(chan os.Signal, 1) signal.Notify(keys, os.Interrupt) // go b.balance(quit) go generator(links, ATATASTREAM, ATATAPOS) fmt.Println(" ") // : for { select { case <-keys: // : fmt.Println("CTRL-C: ") quit <- true // case <-quit: // fmt.Println(" !") return } } }
.. ( )



.. . , - ! .

time.Sleep , . , "" .
UPD. - ( 3 744 , 3.07Gb ) .

, .

pastbin.com

PS , #291 () "" 10 , ( "-p=200" ) .

UPD2: PPS "" : - Go, , . , , - + . , : , .

,
vimeo.com/49718712 - Rob Pike - 'Concurrency Is Not Parallelism'
sites.google.com/site/gopatterns - Go Language Patterns
golangtutorials.blogspot.ru/2011/05/table-of-contents.html - GoLang Tutorials
talks.golang.org - Go

, !
 ),      .                 .  ,  ,         ,        . 

, , goquery (http://github.com/opesun/goquery). , :

// atatagrab project main.go package main import ( "fmt" // : "github.com/opesun/goquery" "strconv" ) const ( // : ENDMESSAGE = "TooLateToDieYoung" ) // func generator(out chan string, stream, start int) { for pos := start; ; pos += 20 { // : x, err := goquery.ParseUrl("http://home.atata.com/streams/" + strconv.Itoa(stream) + "?order=date&from=" + strconv.Itoa(pos)) if err == nil { // : for _, url := range x.Find("figure a.image").Attrs("href") { out <- "http://atata.com/" + url } // - .. if len(x.Find("li.last.hide")) > 0 { out <- ENDMESSAGE //.. return } } } }



,
, , ( ). - , , . , , , :

" , - ?"

, . PMFC (Poor Man's Flow Control " ". , ( ) ).

" , , ?"

! - " " " ". ? , :
( ) - . - ( - ?).
. "":

// type Worker struct { urls chan string // pending int // - index int // wg *sync.WaitGroup // }
: Worker , struct - record .
" ?" - ? Go : . , - , , :

- - - . " " ( WaitGroup ) sync . : WaitGroup , , . , .
, Worker'a.

"" work ( , "", this. ):

// func (w *Worker) work(done chan *Worker) { for { url := <-w.urls // w.wg.Add(1) // download(url) // w.wg.Done() // done <- w // } }
sync :

import ( "sync" )
: done , work() . , ( ).
download() - , . .

Worker Pool , , ( Wikipedia: ( ) ), . " container/heap ", ( - ).

: - , :

package main import ("fmt") // type Animal interface { Say() string } type Cat struct{} func (c Cat) Say() string { return "!" } type Parrot struct { name string } func (p Parrot) Say() string { return p.name + " !" } func main() { kitty := Cat{} popka := Parrot{name: ""} animals := []Animal{kitty, popka} for _, animal := range animals { fmt.Println(animal.Say()) } }
, container.Heap ( http://golang.org/pkg/container/heap/#Interface ) :

type Interface interface { sort.Interface Push(x interface{}) // add x as element Len() Pop() interface{} // remove and return element Len() - 1. }
, Push Pop sort.Interface ( http://golang.org/pkg/sort/#Interface ), :

type Interface interface { // Len is the number of elements in the collection. Len() int // Less returns whether the element with index i should sort // before the element with index j. Less(i, j int) bool // Swap swaps the elements with indexes i and j. Swap(i, j int) }
, :

// "": type Pool []*Worker // - : func (p Pool) Less(i, j int) bool { return p[i].pending < p[j].pending } // : func (p Pool) Len() int { return len(p) } // : func (p Pool) Swap(i, j int) { if i >= 0 && i < len(p) && j >= 0 && j < len(p) { p[i], p[j] = p[j], p[i] p[i].index, p[j].index = i, j } } // : func (p *Pool) Push(x interface{}) { n := len(*p) worker := x.(*Worker) worker.index = n *p = append(*p, worker) } // : func (p *Pool) Pop() interface{} { old := *p n := len(old) item := old[n-1] item.index = -1 *p = old[0 : n-1] return item }
, interface{} - , ( , interface{}). any , . : http://research.swtch.com/interfaces
, heap.* Pool, "" .
"container/heap" :

import ( "container/heap" )
. , , , :

// type Balancer struct { pool Pool // "" done chan *Worker // requests chan string // flowctrl chan bool // PMFC queue int // wg *sync.WaitGroup // }
, init() :

// . func (b *Balancer) init(in chan string) { b.requests = make(chan string) b.flowctrl = make(chan bool) b.done = make(chan *Worker) b.wg = new(sync.WaitGroup) // Flow Control: go func() { for { b.requests <- <-in // <-b.flowctrl // } }() // : heap.Init(&b.pool) for i := 0; i < WORKERS; i++ { w := &Worker{ urls: make(chan string, WORKERSCAP), index: 0, pending: 0, wg: b.wg, } go w.work(b.done) // heap.Push(&b.pool, w) // } }
- "" . , - - !
, : make() , . , - " " ; , .

init() WORKERS WORKERSCAP , :

var ( WORKERS = 5 // WORKERSCAP = 5 // )
. , , ( , . , . , , ) :

// func (b *Balancer) balance(quit chan bool) { lastjobs := false // , for { select { // : case <-quit: // b.wg.Wait() // .. quit <- true //.. case url := <-b.requests: // ( flow controller) if url != ENDMESSAGE { // - ? b.dispatch(url) // , } else { lastjobs = true // } case w := <-b.done: // , b.completed(w) // if lastjobs { if w.pending == 0 { // .. heap.Remove(&b.pool, w.index) // } if len(b.pool) == 0 { // // quit <- true // } } } } }
:

// func (b *Balancer) dispatch(url string) { w := heap.Pop(&b.pool).(*Worker) // .. w.urls <- url //.. . w.pending++ // "".. heap.Push(&b.pool, w) //.. if b.queue++; b.queue < WORKERS*WORKERSCAP { b.flowctrl <- true } }
.. . : , , , .
completed , :

// func (b *Balancer) completed(w *Worker) { w.pending-- heap.Remove(&b.pool, w.index) heap.Push(&b.pool, w) if b.queue--; b.queue == WORKERS*WORKERSCAP-1 { b.flowctrl <- true } }
, , , . : , dispatch , , , - ?
, . - download(), :

// func download(url string) { fileName := IMGDIR + "/" + url[strings.LastIndex(url, "/")+1:] output, err := os.Create(fileName) defer output.Close() response, err := http.Get(url) if err != nil { fmt.Println("Error while downloading", url, "-", err) return } defer response.Body.Close() io.Copy(output, response.Body) }
- - ( ) ( , ):

// : var ( WORKERS = 5 // WORKERSCAP = 5 // ATATASTREAM = 291 //id ATATAPOS = 0 // IMGDIR = "img" // ) // : func init() { flag.IntVar(&WORKERS, "w", WORKERS, " ") flag.IntVar(&ATATASTREAM, "s", ATATASTREAM, "id ") flag.IntVar(&ATATAPOS, "p", ATATAPOS, " ") flag.StringVar(&IMGDIR, "d", IMGDIR, " ") }
- . , , os/signal , ctrl-c :

import ( "io" "os" "os/signal" "sync" "flag" "fmt" "container/heap" "strconv" "strings" "github.com/opesun/goquery" "net/http" )
!


func main() { // flag.Parse() // , if err := os.MkdirAll(IMGDIR, 666); err != nil { panic(err) } // links := make(chan string) quit := make(chan bool) b := new(Balancer) b.init(links) // keys keys := make(chan os.Signal, 1) signal.Notify(keys, os.Interrupt) // go b.balance(quit) go generator(links, ATATASTREAM, ATATAPOS) fmt.Println(" ") // : for { select { case <-keys: // : fmt.Println("CTRL-C: ") quit <- true // case <-quit: // fmt.Println(" !") return } } }
.. ( )



.. . , - ! .

time.Sleep , . , "" .
UPD. - ( 3 744 , 3.07Gb ) .

, .

pastbin.com

PS , #291 () "" 10 , ( "-p=200" ) .

UPD2: PPS "" : - Go, , . , , - + . , : , .

,
vimeo.com/49718712 - Rob Pike - 'Concurrency Is Not Parallelism'
sites.google.com/site/gopatterns - Go Language Patterns
golangtutorials.blogspot.ru/2011/05/table-of-contents.html - GoLang Tutorials
talks.golang.org - Go

, !
), . . , , , .

, , goquery (http://github.com/opesun/goquery). , :

// atatagrab project main.go package main import ( "fmt" // : "github.com/opesun/goquery" "strconv" ) const ( // : ENDMESSAGE = "TooLateToDieYoung" ) // func generator(out chan string, stream, start int) { for pos := start; ; pos += 20 { // : x, err := goquery.ParseUrl("http://home.atata.com/streams/" + strconv.Itoa(stream) + "?order=date&from=" + strconv.Itoa(pos)) if err == nil { // : for _, url := range x.Find("figure a.image").Attrs("href") { out <- "http://atata.com/" + url } // - .. if len(x.Find("li.last.hide")) > 0 { out <- ENDMESSAGE //.. return } } } }



,
, , ( ). - , , . , , , :

" , - ?"

, . PMFC (Poor Man's Flow Control " ". , ( ) ).

" , , ?"

! - " " " ". ? , :
( ) - . - ( - ?).
. "":

// type Worker struct { urls chan string // pending int // - index int // wg *sync.WaitGroup // }
: Worker , struct - record .
" ?" - ? Go : . , - , , :

- - - . " " ( WaitGroup ) sync . : WaitGroup , , . , .
, Worker'a.

"" work ( , "", this. ):

// func (w *Worker) work(done chan *Worker) { for { url := <-w.urls // w.wg.Add(1) // download(url) // w.wg.Done() // done <- w // } }
sync :

import ( "sync" )
: done , work() . , ( ).
download() - , . .

Worker Pool , , ( Wikipedia: ( ) ), . " container/heap ", ( - ).

: - , :

package main import ("fmt") // type Animal interface { Say() string } type Cat struct{} func (c Cat) Say() string { return "!" } type Parrot struct { name string } func (p Parrot) Say() string { return p.name + " !" } func main() { kitty := Cat{} popka := Parrot{name: ""} animals := []Animal{kitty, popka} for _, animal := range animals { fmt.Println(animal.Say()) } }
, container.Heap ( http://golang.org/pkg/container/heap/#Interface ) :

type Interface interface { sort.Interface Push(x interface{}) // add x as element Len() Pop() interface{} // remove and return element Len() - 1. }
, Push Pop sort.Interface ( http://golang.org/pkg/sort/#Interface ), :

type Interface interface { // Len is the number of elements in the collection. Len() int // Less returns whether the element with index i should sort // before the element with index j. Less(i, j int) bool // Swap swaps the elements with indexes i and j. Swap(i, j int) }
, :

// "": type Pool []*Worker // - : func (p Pool) Less(i, j int) bool { return p[i].pending < p[j].pending } // : func (p Pool) Len() int { return len(p) } // : func (p Pool) Swap(i, j int) { if i >= 0 && i < len(p) && j >= 0 && j < len(p) { p[i], p[j] = p[j], p[i] p[i].index, p[j].index = i, j } } // : func (p *Pool) Push(x interface{}) { n := len(*p) worker := x.(*Worker) worker.index = n *p = append(*p, worker) } // : func (p *Pool) Pop() interface{} { old := *p n := len(old) item := old[n-1] item.index = -1 *p = old[0 : n-1] return item }
, interface{} - , ( , interface{}). any , . : http://research.swtch.com/interfaces
, heap.* Pool, "" .
"container/heap" :

import ( "container/heap" )
. , , , :

// type Balancer struct { pool Pool // "" done chan *Worker // requests chan string // flowctrl chan bool // PMFC queue int // wg *sync.WaitGroup // }
, init() :

// . func (b *Balancer) init(in chan string) { b.requests = make(chan string) b.flowctrl = make(chan bool) b.done = make(chan *Worker) b.wg = new(sync.WaitGroup) // Flow Control: go func() { for { b.requests <- <-in // <-b.flowctrl // } }() // : heap.Init(&b.pool) for i := 0; i < WORKERS; i++ { w := &Worker{ urls: make(chan string, WORKERSCAP), index: 0, pending: 0, wg: b.wg, } go w.work(b.done) // heap.Push(&b.pool, w) // } }
- "" . , - - !
, : make() , . , - " " ; , .

init() WORKERS WORKERSCAP , :

var ( WORKERS = 5 // WORKERSCAP = 5 // )
. , , ( , . , . , , ) :

// func (b *Balancer) balance(quit chan bool) { lastjobs := false // , for { select { // : case <-quit: // b.wg.Wait() // .. quit <- true //.. case url := <-b.requests: // ( flow controller) if url != ENDMESSAGE { // - ? b.dispatch(url) // , } else { lastjobs = true // } case w := <-b.done: // , b.completed(w) // if lastjobs { if w.pending == 0 { // .. heap.Remove(&b.pool, w.index) // } if len(b.pool) == 0 { // // quit <- true // } } } } }
:

// func (b *Balancer) dispatch(url string) { w := heap.Pop(&b.pool).(*Worker) // .. w.urls <- url //.. . w.pending++ // "".. heap.Push(&b.pool, w) //.. if b.queue++; b.queue < WORKERS*WORKERSCAP { b.flowctrl <- true } }
.. . : , , , .
completed , :

// func (b *Balancer) completed(w *Worker) { w.pending-- heap.Remove(&b.pool, w.index) heap.Push(&b.pool, w) if b.queue--; b.queue == WORKERS*WORKERSCAP-1 { b.flowctrl <- true } }
, , , . : , dispatch , , , - ?
, . - download(), :

// func download(url string) { fileName := IMGDIR + "/" + url[strings.LastIndex(url, "/")+1:] output, err := os.Create(fileName) defer output.Close() response, err := http.Get(url) if err != nil { fmt.Println("Error while downloading", url, "-", err) return } defer response.Body.Close() io.Copy(output, response.Body) }
- - ( ) ( , ):

// : var ( WORKERS = 5 // WORKERSCAP = 5 // ATATASTREAM = 291 //id ATATAPOS = 0 // IMGDIR = "img" // ) // : func init() { flag.IntVar(&WORKERS, "w", WORKERS, " ") flag.IntVar(&ATATASTREAM, "s", ATATASTREAM, "id ") flag.IntVar(&ATATAPOS, "p", ATATAPOS, " ") flag.StringVar(&IMGDIR, "d", IMGDIR, " ") }
- . , , os/signal , ctrl-c :

import ( "io" "os" "os/signal" "sync" "flag" "fmt" "container/heap" "strconv" "strings" "github.com/opesun/goquery" "net/http" )
!


func main() { // flag.Parse() // , if err := os.MkdirAll(IMGDIR, 666); err != nil { panic(err) } // links := make(chan string) quit := make(chan bool) b := new(Balancer) b.init(links) // keys keys := make(chan os.Signal, 1) signal.Notify(keys, os.Interrupt) // go b.balance(quit) go generator(links, ATATASTREAM, ATATAPOS) fmt.Println(" ") // : for { select { case <-keys: // : fmt.Println("CTRL-C: ") quit <- true // case <-quit: // fmt.Println(" !") return } } }
.. ( )



.. . , - ! .

time.Sleep , . , "" .
UPD. - ( 3 744 , 3.07Gb ) .

, .

pastbin.com

PS , #291 () "" 10 , ( "-p=200" ) .

UPD2: PPS "" : - Go, , . , , - + . , : , .

,
vimeo.com/49718712 - Rob Pike - 'Concurrency Is Not Parallelism'
sites.google.com/site/gopatterns - Go Language Patterns
golangtutorials.blogspot.ru/2011/05/table-of-contents.html - GoLang Tutorials
talks.golang.org - Go

, !
 ),      .                 .  ,  ,         ,        . 

, , goquery (http://github.com/opesun/goquery). , :

// atatagrab project main.go package main import ( "fmt" // : "github.com/opesun/goquery" "strconv" ) const ( // : ENDMESSAGE = "TooLateToDieYoung" ) // func generator(out chan string, stream, start int) { for pos := start; ; pos += 20 { // : x, err := goquery.ParseUrl("http://home.atata.com/streams/" + strconv.Itoa(stream) + "?order=date&from=" + strconv.Itoa(pos)) if err == nil { // : for _, url := range x.Find("figure a.image").Attrs("href") { out <- "http://atata.com/" + url } // - .. if len(x.Find("li.last.hide")) > 0 { out <- ENDMESSAGE //.. return } } } }



,
, , ( ). - , , . , , , :

" , - ?"

, . PMFC (Poor Man's Flow Control " ". , ( ) ).

" , , ?"

! - " " " ". ? , :
( ) - . - ( - ?).
. "":

// type Worker struct { urls chan string // pending int // - index int // wg *sync.WaitGroup // }
: Worker , struct - record .
" ?" - ? Go : . , - , , :

- - - . " " ( WaitGroup ) sync . : WaitGroup , , . , .
, Worker'a.

"" work ( , "", this. ):

// func (w *Worker) work(done chan *Worker) { for { url := <-w.urls // w.wg.Add(1) // download(url) // w.wg.Done() // done <- w // } }
sync :

import ( "sync" )
: done , work() . , ( ).
download() - , . .

Worker Pool , , ( Wikipedia: ( ) ), . " container/heap ", ( - ).

: - , :

package main import ("fmt") // type Animal interface { Say() string } type Cat struct{} func (c Cat) Say() string { return "!" } type Parrot struct { name string } func (p Parrot) Say() string { return p.name + " !" } func main() { kitty := Cat{} popka := Parrot{name: ""} animals := []Animal{kitty, popka} for _, animal := range animals { fmt.Println(animal.Say()) } }
, container.Heap ( http://golang.org/pkg/container/heap/#Interface ) :

type Interface interface { sort.Interface Push(x interface{}) // add x as element Len() Pop() interface{} // remove and return element Len() - 1. }
, Push Pop sort.Interface ( http://golang.org/pkg/sort/#Interface ), :

type Interface interface { // Len is the number of elements in the collection. Len() int // Less returns whether the element with index i should sort // before the element with index j. Less(i, j int) bool // Swap swaps the elements with indexes i and j. Swap(i, j int) }
, :

// "": type Pool []*Worker // - : func (p Pool) Less(i, j int) bool { return p[i].pending < p[j].pending } // : func (p Pool) Len() int { return len(p) } // : func (p Pool) Swap(i, j int) { if i >= 0 && i < len(p) && j >= 0 && j < len(p) { p[i], p[j] = p[j], p[i] p[i].index, p[j].index = i, j } } // : func (p *Pool) Push(x interface{}) { n := len(*p) worker := x.(*Worker) worker.index = n *p = append(*p, worker) } // : func (p *Pool) Pop() interface{} { old := *p n := len(old) item := old[n-1] item.index = -1 *p = old[0 : n-1] return item }
, interface{} - , ( , interface{}). any , . : http://research.swtch.com/interfaces
, heap.* Pool, "" .
"container/heap" :

import ( "container/heap" )
. , , , :

// type Balancer struct { pool Pool // "" done chan *Worker // requests chan string // flowctrl chan bool // PMFC queue int // wg *sync.WaitGroup // }
, init() :

// . func (b *Balancer) init(in chan string) { b.requests = make(chan string) b.flowctrl = make(chan bool) b.done = make(chan *Worker) b.wg = new(sync.WaitGroup) // Flow Control: go func() { for { b.requests <- <-in // <-b.flowctrl // } }() // : heap.Init(&b.pool) for i := 0; i < WORKERS; i++ { w := &Worker{ urls: make(chan string, WORKERSCAP), index: 0, pending: 0, wg: b.wg, } go w.work(b.done) // heap.Push(&b.pool, w) // } }
- "" . , - - !
, : make() , . , - " " ; , .

init() WORKERS WORKERSCAP , :

var ( WORKERS = 5 // WORKERSCAP = 5 // )
. , , ( , . , . , , ) :

// func (b *Balancer) balance(quit chan bool) { lastjobs := false // , for { select { // : case <-quit: // b.wg.Wait() // .. quit <- true //.. case url := <-b.requests: // ( flow controller) if url != ENDMESSAGE { // - ? b.dispatch(url) // , } else { lastjobs = true // } case w := <-b.done: // , b.completed(w) // if lastjobs { if w.pending == 0 { // .. heap.Remove(&b.pool, w.index) // } if len(b.pool) == 0 { // // quit <- true // } } } } }
:

// func (b *Balancer) dispatch(url string) { w := heap.Pop(&b.pool).(*Worker) // .. w.urls <- url //.. . w.pending++ // "".. heap.Push(&b.pool, w) //.. if b.queue++; b.queue < WORKERS*WORKERSCAP { b.flowctrl <- true } }
.. . : , , , .
completed , :

// func (b *Balancer) completed(w *Worker) { w.pending-- heap.Remove(&b.pool, w.index) heap.Push(&b.pool, w) if b.queue--; b.queue == WORKERS*WORKERSCAP-1 { b.flowctrl <- true } }
, , , . : , dispatch , , , - ?
, . - download(), :

// func download(url string) { fileName := IMGDIR + "/" + url[strings.LastIndex(url, "/")+1:] output, err := os.Create(fileName) defer output.Close() response, err := http.Get(url) if err != nil { fmt.Println("Error while downloading", url, "-", err) return } defer response.Body.Close() io.Copy(output, response.Body) }
- - ( ) ( , ):

// : var ( WORKERS = 5 // WORKERSCAP = 5 // ATATASTREAM = 291 //id ATATAPOS = 0 // IMGDIR = "img" // ) // : func init() { flag.IntVar(&WORKERS, "w", WORKERS, " ") flag.IntVar(&ATATASTREAM, "s", ATATASTREAM, "id ") flag.IntVar(&ATATAPOS, "p", ATATAPOS, " ") flag.StringVar(&IMGDIR, "d", IMGDIR, " ") }
- . , , os/signal , ctrl-c :

import ( "io" "os" "os/signal" "sync" "flag" "fmt" "container/heap" "strconv" "strings" "github.com/opesun/goquery" "net/http" )
!


func main() { // flag.Parse() // , if err := os.MkdirAll(IMGDIR, 666); err != nil { panic(err) } // links := make(chan string) quit := make(chan bool) b := new(Balancer) b.init(links) // keys keys := make(chan os.Signal, 1) signal.Notify(keys, os.Interrupt) // go b.balance(quit) go generator(links, ATATASTREAM, ATATAPOS) fmt.Println(" ") // : for { select { case <-keys: // : fmt.Println("CTRL-C: ") quit <- true // case <-quit: // fmt.Println(" !") return } } }
.. ( )



.. . , - ! .

time.Sleep , . , "" .
UPD. - ( 3 744 , 3.07Gb ) .

, .

pastbin.com

PS , #291 () "" 10 , ( "-p=200" ) .

UPD2: PPS "" : - Go, , . , , - + . , : , .

,
vimeo.com/49718712 - Rob Pike - 'Concurrency Is Not Parallelism'
sites.google.com/site/gopatterns - Go Language Patterns
golangtutorials.blogspot.ru/2011/05/table-of-contents.html - GoLang Tutorials
talks.golang.org - Go

, !
), . . , , , .

, , goquery (http://github.com/opesun/goquery). , :

// atatagrab project main.go package main import ( "fmt" // : "github.com/opesun/goquery" "strconv" ) const ( // : ENDMESSAGE = "TooLateToDieYoung" ) // func generator(out chan string, stream, start int) { for pos := start; ; pos += 20 { // : x, err := goquery.ParseUrl("http://home.atata.com/streams/" + strconv.Itoa(stream) + "?order=date&from=" + strconv.Itoa(pos)) if err == nil { // : for _, url := range x.Find("figure a.image").Attrs("href") { out <- "http://atata.com/" + url } // - .. if len(x.Find("li.last.hide")) > 0 { out <- ENDMESSAGE //.. return } } } }



,
, , ( ). - , , . , , , :

" , - ?"

, . PMFC (Poor Man's Flow Control " ". , ( ) ).

" , , ?"

! - " " " ". ? , :
( ) - . - ( - ?).
. "":

// type Worker struct { urls chan string // pending int // - index int // wg *sync.WaitGroup // }
: Worker , struct - record .
" ?" - ? Go : . , - , , :

- - - . " " ( WaitGroup ) sync . : WaitGroup , , . , .
, Worker'a.

"" work ( , "", this. ):

// func (w *Worker) work(done chan *Worker) { for { url := <-w.urls // w.wg.Add(1) // download(url) // w.wg.Done() // done <- w // } }
sync :

import ( "sync" )
: done , work() . , ( ).
download() - , . .

Worker Pool , , ( Wikipedia: ( ) ), . " container/heap ", ( - ).

: - , :

package main import ("fmt") // type Animal interface { Say() string } type Cat struct{} func (c Cat) Say() string { return "!" } type Parrot struct { name string } func (p Parrot) Say() string { return p.name + " !" } func main() { kitty := Cat{} popka := Parrot{name: ""} animals := []Animal{kitty, popka} for _, animal := range animals { fmt.Println(animal.Say()) } }
, container.Heap ( http://golang.org/pkg/container/heap/#Interface ) :

type Interface interface { sort.Interface Push(x interface{}) // add x as element Len() Pop() interface{} // remove and return element Len() - 1. }
, Push Pop sort.Interface ( http://golang.org/pkg/sort/#Interface ), :

type Interface interface { // Len is the number of elements in the collection. Len() int // Less returns whether the element with index i should sort // before the element with index j. Less(i, j int) bool // Swap swaps the elements with indexes i and j. Swap(i, j int) }
, :

// "": type Pool []*Worker // - : func (p Pool) Less(i, j int) bool { return p[i].pending < p[j].pending } // : func (p Pool) Len() int { return len(p) } // : func (p Pool) Swap(i, j int) { if i >= 0 && i < len(p) && j >= 0 && j < len(p) { p[i], p[j] = p[j], p[i] p[i].index, p[j].index = i, j } } // : func (p *Pool) Push(x interface{}) { n := len(*p) worker := x.(*Worker) worker.index = n *p = append(*p, worker) } // : func (p *Pool) Pop() interface{} { old := *p n := len(old) item := old[n-1] item.index = -1 *p = old[0 : n-1] return item }
, interface{} - , ( , interface{}). any , . : http://research.swtch.com/interfaces
, heap.* Pool, "" .
"container/heap" :

import ( "container/heap" )
. , , , :

// type Balancer struct { pool Pool // "" done chan *Worker // requests chan string // flowctrl chan bool // PMFC queue int // wg *sync.WaitGroup // }
, init() :

// . func (b *Balancer) init(in chan string) { b.requests = make(chan string) b.flowctrl = make(chan bool) b.done = make(chan *Worker) b.wg = new(sync.WaitGroup) // Flow Control: go func() { for { b.requests <- <-in // <-b.flowctrl // } }() // : heap.Init(&b.pool) for i := 0; i < WORKERS; i++ { w := &Worker{ urls: make(chan string, WORKERSCAP), index: 0, pending: 0, wg: b.wg, } go w.work(b.done) // heap.Push(&b.pool, w) // } }
- "" . , - - !
, : make() , . , - " " ; , .

init() WORKERS WORKERSCAP , :

var ( WORKERS = 5 // WORKERSCAP = 5 // )
. , , ( , . , . , , ) :

// func (b *Balancer) balance(quit chan bool) { lastjobs := false // , for { select { // : case <-quit: // b.wg.Wait() // .. quit <- true //.. case url := <-b.requests: // ( flow controller) if url != ENDMESSAGE { // - ? b.dispatch(url) // , } else { lastjobs = true // } case w := <-b.done: // , b.completed(w) // if lastjobs { if w.pending == 0 { // .. heap.Remove(&b.pool, w.index) // } if len(b.pool) == 0 { // // quit <- true // } } } } }
:

// func (b *Balancer) dispatch(url string) { w := heap.Pop(&b.pool).(*Worker) // .. w.urls <- url //.. . w.pending++ // "".. heap.Push(&b.pool, w) //.. if b.queue++; b.queue < WORKERS*WORKERSCAP { b.flowctrl <- true } }
.. . : , , , .
completed , :

// func (b *Balancer) completed(w *Worker) { w.pending-- heap.Remove(&b.pool, w.index) heap.Push(&b.pool, w) if b.queue--; b.queue == WORKERS*WORKERSCAP-1 { b.flowctrl <- true } }
, , , . : , dispatch , , , - ?
, . - download(), :

// func download(url string) { fileName := IMGDIR + "/" + url[strings.LastIndex(url, "/")+1:] output, err := os.Create(fileName) defer output.Close() response, err := http.Get(url) if err != nil { fmt.Println("Error while downloading", url, "-", err) return } defer response.Body.Close() io.Copy(output, response.Body) }
- - ( ) ( , ):

// : var ( WORKERS = 5 // WORKERSCAP = 5 // ATATASTREAM = 291 //id ATATAPOS = 0 // IMGDIR = "img" // ) // : func init() { flag.IntVar(&WORKERS, "w", WORKERS, " ") flag.IntVar(&ATATASTREAM, "s", ATATASTREAM, "id ") flag.IntVar(&ATATAPOS, "p", ATATAPOS, " ") flag.StringVar(&IMGDIR, "d", IMGDIR, " ") }
- . , , os/signal , ctrl-c :

import ( "io" "os" "os/signal" "sync" "flag" "fmt" "container/heap" "strconv" "strings" "github.com/opesun/goquery" "net/http" )
!


func main() { // flag.Parse() // , if err := os.MkdirAll(IMGDIR, 666); err != nil { panic(err) } // links := make(chan string) quit := make(chan bool) b := new(Balancer) b.init(links) // keys keys := make(chan os.Signal, 1) signal.Notify(keys, os.Interrupt) // go b.balance(quit) go generator(links, ATATASTREAM, ATATAPOS) fmt.Println(" ") // : for { select { case <-keys: // : fmt.Println("CTRL-C: ") quit <- true // case <-quit: // fmt.Println(" !") return } } }
.. ( )



.. . , - ! .

time.Sleep , . , "" .
UPD. - ( 3 744 , 3.07Gb ) .

, .

pastbin.com

PS , #291 () "" 10 , ( "-p=200" ) .

UPD2: PPS "" : - Go, , . , , - + . , : , .

,
vimeo.com/49718712 - Rob Pike - 'Concurrency Is Not Parallelism'
sites.google.com/site/gopatterns - Go Language Patterns
golangtutorials.blogspot.ru/2011/05/table-of-contents.html - GoLang Tutorials
talks.golang.org - Go

, !
 ),      .                 .  ,  ,         ,        . 

, , goquery (http://github.com/opesun/goquery). , :

// atatagrab project main.go package main import ( "fmt" // : "github.com/opesun/goquery" "strconv" ) const ( // : ENDMESSAGE = "TooLateToDieYoung" ) // func generator(out chan string, stream, start int) { for pos := start; ; pos += 20 { // : x, err := goquery.ParseUrl("http://home.atata.com/streams/" + strconv.Itoa(stream) + "?order=date&from=" + strconv.Itoa(pos)) if err == nil { // : for _, url := range x.Find("figure a.image").Attrs("href") { out <- "http://atata.com/" + url } // - .. if len(x.Find("li.last.hide")) > 0 { out <- ENDMESSAGE //.. return } } } }



,
, , ( ). - , , . , , , :

" , - ?"

, . PMFC (Poor Man's Flow Control " ". , ( ) ).

" , , ?"

! - " " " ". ? , :
( ) - . - ( - ?).
. "":

// type Worker struct { urls chan string // pending int // - index int // wg *sync.WaitGroup // }
: Worker , struct - record .
" ?" - ? Go : . , - , , :

- - - . " " ( WaitGroup ) sync . : WaitGroup , , . , .
, Worker'a.

"" work ( , "", this. ):

// func (w *Worker) work(done chan *Worker) { for { url := <-w.urls // w.wg.Add(1) // download(url) // w.wg.Done() // done <- w // } }
sync :

import ( "sync" )
: done , work() . , ( ).
download() - , . .

Worker Pool , , ( Wikipedia: ( ) ), . " container/heap ", ( - ).

: - , :

package main import ("fmt") // type Animal interface { Say() string } type Cat struct{} func (c Cat) Say() string { return "!" } type Parrot struct { name string } func (p Parrot) Say() string { return p.name + " !" } func main() { kitty := Cat{} popka := Parrot{name: ""} animals := []Animal{kitty, popka} for _, animal := range animals { fmt.Println(animal.Say()) } }
, container.Heap ( http://golang.org/pkg/container/heap/#Interface ) :

type Interface interface { sort.Interface Push(x interface{}) // add x as element Len() Pop() interface{} // remove and return element Len() - 1. }
, Push Pop sort.Interface ( http://golang.org/pkg/sort/#Interface ), :

type Interface interface { // Len is the number of elements in the collection. Len() int // Less returns whether the element with index i should sort // before the element with index j. Less(i, j int) bool // Swap swaps the elements with indexes i and j. Swap(i, j int) }
, :

// "": type Pool []*Worker // - : func (p Pool) Less(i, j int) bool { return p[i].pending < p[j].pending } // : func (p Pool) Len() int { return len(p) } // : func (p Pool) Swap(i, j int) { if i >= 0 && i < len(p) && j >= 0 && j < len(p) { p[i], p[j] = p[j], p[i] p[i].index, p[j].index = i, j } } // : func (p *Pool) Push(x interface{}) { n := len(*p) worker := x.(*Worker) worker.index = n *p = append(*p, worker) } // : func (p *Pool) Pop() interface{} { old := *p n := len(old) item := old[n-1] item.index = -1 *p = old[0 : n-1] return item }
, interface{} - , ( , interface{}). any , . : http://research.swtch.com/interfaces
, heap.* Pool, "" .
"container/heap" :

import ( "container/heap" )
. , , , :

// type Balancer struct { pool Pool // "" done chan *Worker // requests chan string // flowctrl chan bool // PMFC queue int // wg *sync.WaitGroup // }
, init() :

// . func (b *Balancer) init(in chan string) { b.requests = make(chan string) b.flowctrl = make(chan bool) b.done = make(chan *Worker) b.wg = new(sync.WaitGroup) // Flow Control: go func() { for { b.requests <- <-in // <-b.flowctrl // } }() // : heap.Init(&b.pool) for i := 0; i < WORKERS; i++ { w := &Worker{ urls: make(chan string, WORKERSCAP), index: 0, pending: 0, wg: b.wg, } go w.work(b.done) // heap.Push(&b.pool, w) // } }
- "" . , - - !
, : make() , . , - " " ; , .

init() WORKERS WORKERSCAP , :

var ( WORKERS = 5 // WORKERSCAP = 5 // )
. , , ( , . , . , , ) :

// func (b *Balancer) balance(quit chan bool) { lastjobs := false // , for { select { // : case <-quit: // b.wg.Wait() // .. quit <- true //.. case url := <-b.requests: // ( flow controller) if url != ENDMESSAGE { // - ? b.dispatch(url) // , } else { lastjobs = true // } case w := <-b.done: // , b.completed(w) // if lastjobs { if w.pending == 0 { // .. heap.Remove(&b.pool, w.index) // } if len(b.pool) == 0 { // // quit <- true // } } } } }
:

// func (b *Balancer) dispatch(url string) { w := heap.Pop(&b.pool).(*Worker) // .. w.urls <- url //.. . w.pending++ // "".. heap.Push(&b.pool, w) //.. if b.queue++; b.queue < WORKERS*WORKERSCAP { b.flowctrl <- true } }
.. . : , , , .
completed , :

// func (b *Balancer) completed(w *Worker) { w.pending-- heap.Remove(&b.pool, w.index) heap.Push(&b.pool, w) if b.queue--; b.queue == WORKERS*WORKERSCAP-1 { b.flowctrl <- true } }
, , , . : , dispatch , , , - ?
, . - download(), :

// func download(url string) { fileName := IMGDIR + "/" + url[strings.LastIndex(url, "/")+1:] output, err := os.Create(fileName) defer output.Close() response, err := http.Get(url) if err != nil { fmt.Println("Error while downloading", url, "-", err) return } defer response.Body.Close() io.Copy(output, response.Body) }
- - ( ) ( , ):

// : var ( WORKERS = 5 // WORKERSCAP = 5 // ATATASTREAM = 291 //id ATATAPOS = 0 // IMGDIR = "img" // ) // : func init() { flag.IntVar(&WORKERS, "w", WORKERS, " ") flag.IntVar(&ATATASTREAM, "s", ATATASTREAM, "id ") flag.IntVar(&ATATAPOS, "p", ATATAPOS, " ") flag.StringVar(&IMGDIR, "d", IMGDIR, " ") }
- . , , os/signal , ctrl-c :

import ( "io" "os" "os/signal" "sync" "flag" "fmt" "container/heap" "strconv" "strings" "github.com/opesun/goquery" "net/http" )
!


func main() { // flag.Parse() // , if err := os.MkdirAll(IMGDIR, 666); err != nil { panic(err) } // links := make(chan string) quit := make(chan bool) b := new(Balancer) b.init(links) // keys keys := make(chan os.Signal, 1) signal.Notify(keys, os.Interrupt) // go b.balance(quit) go generator(links, ATATASTREAM, ATATAPOS) fmt.Println(" ") // : for { select { case <-keys: // : fmt.Println("CTRL-C: ") quit <- true // case <-quit: // fmt.Println(" !") return } } }
.. ( )



.. . , - ! .

time.Sleep , . , "" .
UPD. - ( 3 744 , 3.07Gb ) .

, .

pastbin.com

PS , #291 () "" 10 , ( "-p=200" ) .

UPD2: PPS "" : - Go, , . , , - + . , : , .

,
vimeo.com/49718712 - Rob Pike - 'Concurrency Is Not Parallelism'
sites.google.com/site/gopatterns - Go Language Patterns
golangtutorials.blogspot.ru/2011/05/table-of-contents.html - GoLang Tutorials
talks.golang.org - Go

, !
), . . , , , .

, , goquery (http://github.com/opesun/goquery). , :

// atatagrab project main.go package main import ( "fmt" // : "github.com/opesun/goquery" "strconv" ) const ( // : ENDMESSAGE = "TooLateToDieYoung" ) // func generator(out chan string, stream, start int) { for pos := start; ; pos += 20 { // : x, err := goquery.ParseUrl("http://home.atata.com/streams/" + strconv.Itoa(stream) + "?order=date&from=" + strconv.Itoa(pos)) if err == nil { // : for _, url := range x.Find("figure a.image").Attrs("href") { out <- "http://atata.com/" + url } // - .. if len(x.Find("li.last.hide")) > 0 { out <- ENDMESSAGE //.. return } } } }



,
, , ( ). - , , . , , , :

" , - ?"

, . PMFC (Poor Man's Flow Control " ". , ( ) ).

" , , ?"

! - " " " ". ? , :
( ) - . - ( - ?).
. "":

// type Worker struct { urls chan string // pending int // - index int // wg *sync.WaitGroup // }
: Worker , struct - record .
" ?" - ? Go : . , - , , :

- - - . " " ( WaitGroup ) sync . : WaitGroup , , . , .
, Worker'a.

"" work ( , "", this. ):

// func (w *Worker) work(done chan *Worker) { for { url := <-w.urls // w.wg.Add(1) // download(url) // w.wg.Done() // done <- w // } }
sync :

import ( "sync" )
: done , work() . , ( ).
download() - , . .

Worker Pool , , ( Wikipedia: ( ) ), . " container/heap ", ( - ).

: - , :

package main import ("fmt") // type Animal interface { Say() string } type Cat struct{} func (c Cat) Say() string { return "!" } type Parrot struct { name string } func (p Parrot) Say() string { return p.name + " !" } func main() { kitty := Cat{} popka := Parrot{name: ""} animals := []Animal{kitty, popka} for _, animal := range animals { fmt.Println(animal.Say()) } }
, container.Heap ( http://golang.org/pkg/container/heap/#Interface ) :

type Interface interface { sort.Interface Push(x interface{}) // add x as element Len() Pop() interface{} // remove and return element Len() - 1. }
, Push Pop sort.Interface ( http://golang.org/pkg/sort/#Interface ), :

type Interface interface { // Len is the number of elements in the collection. Len() int // Less returns whether the element with index i should sort // before the element with index j. Less(i, j int) bool // Swap swaps the elements with indexes i and j. Swap(i, j int) }
, :

// "": type Pool []*Worker // - : func (p Pool) Less(i, j int) bool { return p[i].pending < p[j].pending } // : func (p Pool) Len() int { return len(p) } // : func (p Pool) Swap(i, j int) { if i >= 0 && i < len(p) && j >= 0 && j < len(p) { p[i], p[j] = p[j], p[i] p[i].index, p[j].index = i, j } } // : func (p *Pool) Push(x interface{}) { n := len(*p) worker := x.(*Worker) worker.index = n *p = append(*p, worker) } // : func (p *Pool) Pop() interface{} { old := *p n := len(old) item := old[n-1] item.index = -1 *p = old[0 : n-1] return item }
, interface{} - , ( , interface{}). any , . : http://research.swtch.com/interfaces
, heap.* Pool, "" .
"container/heap" :

import ( "container/heap" )
. , , , :

// type Balancer struct { pool Pool // "" done chan *Worker // requests chan string // flowctrl chan bool // PMFC queue int // wg *sync.WaitGroup // }
, init() :

// . func (b *Balancer) init(in chan string) { b.requests = make(chan string) b.flowctrl = make(chan bool) b.done = make(chan *Worker) b.wg = new(sync.WaitGroup) // Flow Control: go func() { for { b.requests <- <-in // <-b.flowctrl // } }() // : heap.Init(&b.pool) for i := 0; i < WORKERS; i++ { w := &Worker{ urls: make(chan string, WORKERSCAP), index: 0, pending: 0, wg: b.wg, } go w.work(b.done) // heap.Push(&b.pool, w) // } }
- "" . , - - !
, : make() , . , - " " ; , .

init() WORKERS WORKERSCAP , :

var ( WORKERS = 5 // WORKERSCAP = 5 // )
. , , ( , . , . , , ) :

// func (b *Balancer) balance(quit chan bool) { lastjobs := false // , for { select { // : case <-quit: // b.wg.Wait() // .. quit <- true //.. case url := <-b.requests: // ( flow controller) if url != ENDMESSAGE { // - ? b.dispatch(url) // , } else { lastjobs = true // } case w := <-b.done: // , b.completed(w) // if lastjobs { if w.pending == 0 { // .. heap.Remove(&b.pool, w.index) // } if len(b.pool) == 0 { // // quit <- true // } } } } }
:

// func (b *Balancer) dispatch(url string) { w := heap.Pop(&b.pool).(*Worker) // .. w.urls <- url //.. . w.pending++ // "".. heap.Push(&b.pool, w) //.. if b.queue++; b.queue < WORKERS*WORKERSCAP { b.flowctrl <- true } }
.. . : , , , .
completed , :

// func (b *Balancer) completed(w *Worker) { w.pending-- heap.Remove(&b.pool, w.index) heap.Push(&b.pool, w) if b.queue--; b.queue == WORKERS*WORKERSCAP-1 { b.flowctrl <- true } }
, , , . : , dispatch , , , - ?
, . - download(), :

// func download(url string) { fileName := IMGDIR + "/" + url[strings.LastIndex(url, "/")+1:] output, err := os.Create(fileName) defer output.Close() response, err := http.Get(url) if err != nil { fmt.Println("Error while downloading", url, "-", err) return } defer response.Body.Close() io.Copy(output, response.Body) }
- - ( ) ( , ):

// : var ( WORKERS = 5 // WORKERSCAP = 5 // ATATASTREAM = 291 //id ATATAPOS = 0 // IMGDIR = "img" // ) // : func init() { flag.IntVar(&WORKERS, "w", WORKERS, " ") flag.IntVar(&ATATASTREAM, "s", ATATASTREAM, "id ") flag.IntVar(&ATATAPOS, "p", ATATAPOS, " ") flag.StringVar(&IMGDIR, "d", IMGDIR, " ") }
- . , , os/signal , ctrl-c :

import ( "io" "os" "os/signal" "sync" "flag" "fmt" "container/heap" "strconv" "strings" "github.com/opesun/goquery" "net/http" )
!


func main() { // flag.Parse() // , if err := os.MkdirAll(IMGDIR, 666); err != nil { panic(err) } // links := make(chan string) quit := make(chan bool) b := new(Balancer) b.init(links) // keys keys := make(chan os.Signal, 1) signal.Notify(keys, os.Interrupt) // go b.balance(quit) go generator(links, ATATASTREAM, ATATAPOS) fmt.Println(" ") // : for { select { case <-keys: // : fmt.Println("CTRL-C: ") quit <- true // case <-quit: // fmt.Println(" !") return } } }
.. ( )



.. . , - ! .

time.Sleep , . , "" .
UPD. - ( 3 744 , 3.07Gb ) .

, .

pastbin.com

PS , #291 () "" 10 , ( "-p=200" ) .

UPD2: PPS "" : - Go, , . , , - + . , : , .

,
vimeo.com/49718712 - Rob Pike - 'Concurrency Is Not Parallelism'
sites.google.com/site/gopatterns - Go Language Patterns
golangtutorials.blogspot.ru/2011/05/table-of-contents.html - GoLang Tutorials
talks.golang.org - Go

, !
 ),      .                 .  ,  ,         ,        . 

, , goquery (http://github.com/opesun/goquery). , :

// atatagrab project main.go package main import ( "fmt" // : "github.com/opesun/goquery" "strconv" ) const ( // : ENDMESSAGE = "TooLateToDieYoung" ) // func generator(out chan string, stream, start int) { for pos := start; ; pos += 20 { // : x, err := goquery.ParseUrl("http://home.atata.com/streams/" + strconv.Itoa(stream) + "?order=date&from=" + strconv.Itoa(pos)) if err == nil { // : for _, url := range x.Find("figure a.image").Attrs("href") { out <- "http://atata.com/" + url } // - .. if len(x.Find("li.last.hide")) > 0 { out <- ENDMESSAGE //.. return } } } }



,
, , ( ). - , , . , , , :

" , - ?"

, . PMFC (Poor Man's Flow Control " ". , ( ) ).

" , , ?"

! - " " " ". ? , :
( ) - . - ( - ?).
. "":

// type Worker struct { urls chan string // pending int // - index int // wg *sync.WaitGroup // }
: Worker , struct - record .
" ?" - ? Go : . , - , , :

- - - . " " ( WaitGroup ) sync . : WaitGroup , , . , .
, Worker'a.

"" work ( , "", this. ):

// func (w *Worker) work(done chan *Worker) { for { url := <-w.urls // w.wg.Add(1) // download(url) // w.wg.Done() // done <- w // } }
sync :

import ( "sync" )
: done , work() . , ( ).
download() - , . .

Worker Pool , , ( Wikipedia: ( ) ), . " container/heap ", ( - ).

: - , :

package main import ("fmt") // type Animal interface { Say() string } type Cat struct{} func (c Cat) Say() string { return "!" } type Parrot struct { name string } func (p Parrot) Say() string { return p.name + " !" } func main() { kitty := Cat{} popka := Parrot{name: ""} animals := []Animal{kitty, popka} for _, animal := range animals { fmt.Println(animal.Say()) } }
, container.Heap ( http://golang.org/pkg/container/heap/#Interface ) :

type Interface interface { sort.Interface Push(x interface{}) // add x as element Len() Pop() interface{} // remove and return element Len() - 1. }
, Push Pop sort.Interface ( http://golang.org/pkg/sort/#Interface ), :

type Interface interface { // Len is the number of elements in the collection. Len() int // Less returns whether the element with index i should sort // before the element with index j. Less(i, j int) bool // Swap swaps the elements with indexes i and j. Swap(i, j int) }
, :

// "": type Pool []*Worker // - : func (p Pool) Less(i, j int) bool { return p[i].pending < p[j].pending } // : func (p Pool) Len() int { return len(p) } // : func (p Pool) Swap(i, j int) { if i >= 0 && i < len(p) && j >= 0 && j < len(p) { p[i], p[j] = p[j], p[i] p[i].index, p[j].index = i, j } } // : func (p *Pool) Push(x interface{}) { n := len(*p) worker := x.(*Worker) worker.index = n *p = append(*p, worker) } // : func (p *Pool) Pop() interface{} { old := *p n := len(old) item := old[n-1] item.index = -1 *p = old[0 : n-1] return item }
, interface{} - , ( , interface{}). any , . : http://research.swtch.com/interfaces
, heap.* Pool, "" .
"container/heap" :

import ( "container/heap" )
. , , , :

// type Balancer struct { pool Pool // "" done chan *Worker // requests chan string // flowctrl chan bool // PMFC queue int // wg *sync.WaitGroup // }
, init() :

// . func (b *Balancer) init(in chan string) { b.requests = make(chan string) b.flowctrl = make(chan bool) b.done = make(chan *Worker) b.wg = new(sync.WaitGroup) // Flow Control: go func() { for { b.requests <- <-in // <-b.flowctrl // } }() // : heap.Init(&b.pool) for i := 0; i < WORKERS; i++ { w := &Worker{ urls: make(chan string, WORKERSCAP), index: 0, pending: 0, wg: b.wg, } go w.work(b.done) // heap.Push(&b.pool, w) // } }
- "" . , - - !
, : make() , . , - " " ; , .

init() WORKERS WORKERSCAP , :

var ( WORKERS = 5 // WORKERSCAP = 5 // )
. , , ( , . , . , , ) :

// func (b *Balancer) balance(quit chan bool) { lastjobs := false // , for { select { // : case <-quit: // b.wg.Wait() // .. quit <- true //.. case url := <-b.requests: // ( flow controller) if url != ENDMESSAGE { // - ? b.dispatch(url) // , } else { lastjobs = true // } case w := <-b.done: // , b.completed(w) // if lastjobs { if w.pending == 0 { // .. heap.Remove(&b.pool, w.index) // } if len(b.pool) == 0 { // // quit <- true // } } } } }
:

// func (b *Balancer) dispatch(url string) { w := heap.Pop(&b.pool).(*Worker) // .. w.urls <- url //.. . w.pending++ // "".. heap.Push(&b.pool, w) //.. if b.queue++; b.queue < WORKERS*WORKERSCAP { b.flowctrl <- true } }
.. . : , , , .
completed , :

// func (b *Balancer) completed(w *Worker) { w.pending-- heap.Remove(&b.pool, w.index) heap.Push(&b.pool, w) if b.queue--; b.queue == WORKERS*WORKERSCAP-1 { b.flowctrl <- true } }
, , , . : , dispatch , , , - ?
, . - download(), :

// func download(url string) { fileName := IMGDIR + "/" + url[strings.LastIndex(url, "/")+1:] output, err := os.Create(fileName) defer output.Close() response, err := http.Get(url) if err != nil { fmt.Println("Error while downloading", url, "-", err) return } defer response.Body.Close() io.Copy(output, response.Body) }
- - ( ) ( , ):

// : var ( WORKERS = 5 // WORKERSCAP = 5 // ATATASTREAM = 291 //id ATATAPOS = 0 // IMGDIR = "img" // ) // : func init() { flag.IntVar(&WORKERS, "w", WORKERS, " ") flag.IntVar(&ATATASTREAM, "s", ATATASTREAM, "id ") flag.IntVar(&ATATAPOS, "p", ATATAPOS, " ") flag.StringVar(&IMGDIR, "d", IMGDIR, " ") }
- . , , os/signal , ctrl-c :

import ( "io" "os" "os/signal" "sync" "flag" "fmt" "container/heap" "strconv" "strings" "github.com/opesun/goquery" "net/http" )
!


func main() { // flag.Parse() // , if err := os.MkdirAll(IMGDIR, 666); err != nil { panic(err) } // links := make(chan string) quit := make(chan bool) b := new(Balancer) b.init(links) // keys keys := make(chan os.Signal, 1) signal.Notify(keys, os.Interrupt) // go b.balance(quit) go generator(links, ATATASTREAM, ATATAPOS) fmt.Println(" ") // : for { select { case <-keys: // : fmt.Println("CTRL-C: ") quit <- true // case <-quit: // fmt.Println(" !") return } } }
.. ( )



.. . , - ! .

time.Sleep , . , "" .
UPD. - ( 3 744 , 3.07Gb ) .

, .

pastbin.com

PS , #291 () "" 10 , ( "-p=200" ) .

UPD2: PPS "" : - Go, , . , , - + . , : , .

,
vimeo.com/49718712 - Rob Pike - 'Concurrency Is Not Parallelism'
sites.google.com/site/gopatterns - Go Language Patterns
golangtutorials.blogspot.ru/2011/05/table-of-contents.html - GoLang Tutorials
talks.golang.org - Go

, !
), . . , , , .

, , goquery (http://github.com/opesun/goquery). , :

// atatagrab project main.go package main import ( "fmt" // : "github.com/opesun/goquery" "strconv" ) const ( // : ENDMESSAGE = "TooLateToDieYoung" ) // func generator(out chan string, stream, start int) { for pos := start; ; pos += 20 { // : x, err := goquery.ParseUrl("http://home.atata.com/streams/" + strconv.Itoa(stream) + "?order=date&from=" + strconv.Itoa(pos)) if err == nil { // : for _, url := range x.Find("figure a.image").Attrs("href") { out <- "http://atata.com/" + url } // - .. if len(x.Find("li.last.hide")) > 0 { out <- ENDMESSAGE //.. return } } } }



,
, , ( ). - , , . , , , :

" , - ?"

, . PMFC (Poor Man's Flow Control " ". , ( ) ).

" , , ?"

! - " " " ". ? , :
( ) - . - ( - ?).
. "":

// type Worker struct { urls chan string // pending int // - index int // wg *sync.WaitGroup // }
: Worker , struct - record .
" ?" - ? Go : . , - , , :

- - - . " " ( WaitGroup ) sync . : WaitGroup , , . , .
, Worker'a.

"" work ( , "", this. ):

// func (w *Worker) work(done chan *Worker) { for { url := <-w.urls // w.wg.Add(1) // download(url) // w.wg.Done() // done <- w // } }
sync :

import ( "sync" )
: done , work() . , ( ).
download() - , . .

Worker Pool , , ( Wikipedia: ( ) ), . " container/heap ", ( - ).

: - , :

package main import ("fmt") // type Animal interface { Say() string } type Cat struct{} func (c Cat) Say() string { return "!" } type Parrot struct { name string } func (p Parrot) Say() string { return p.name + " !" } func main() { kitty := Cat{} popka := Parrot{name: ""} animals := []Animal{kitty, popka} for _, animal := range animals { fmt.Println(animal.Say()) } }
, container.Heap ( http://golang.org/pkg/container/heap/#Interface ) :

type Interface interface { sort.Interface Push(x interface{}) // add x as element Len() Pop() interface{} // remove and return element Len() - 1. }
, Push Pop sort.Interface ( http://golang.org/pkg/sort/#Interface ), :

type Interface interface { // Len is the number of elements in the collection. Len() int // Less returns whether the element with index i should sort // before the element with index j. Less(i, j int) bool // Swap swaps the elements with indexes i and j. Swap(i, j int) }
, :

// "": type Pool []*Worker // - : func (p Pool) Less(i, j int) bool { return p[i].pending < p[j].pending } // : func (p Pool) Len() int { return len(p) } // : func (p Pool) Swap(i, j int) { if i >= 0 && i < len(p) && j >= 0 && j < len(p) { p[i], p[j] = p[j], p[i] p[i].index, p[j].index = i, j } } // : func (p *Pool) Push(x interface{}) { n := len(*p) worker := x.(*Worker) worker.index = n *p = append(*p, worker) } // : func (p *Pool) Pop() interface{} { old := *p n := len(old) item := old[n-1] item.index = -1 *p = old[0 : n-1] return item }
, interface{} - , ( , interface{}). any , . : http://research.swtch.com/interfaces
, heap.* Pool, "" .
"container/heap" :

import ( "container/heap" )
. , , , :

// type Balancer struct { pool Pool // "" done chan *Worker // requests chan string // flowctrl chan bool // PMFC queue int // wg *sync.WaitGroup // }
, init() :

// . func (b *Balancer) init(in chan string) { b.requests = make(chan string) b.flowctrl = make(chan bool) b.done = make(chan *Worker) b.wg = new(sync.WaitGroup) // Flow Control: go func() { for { b.requests <- <-in // <-b.flowctrl // } }() // : heap.Init(&b.pool) for i := 0; i < WORKERS; i++ { w := &Worker{ urls: make(chan string, WORKERSCAP), index: 0, pending: 0, wg: b.wg, } go w.work(b.done) // heap.Push(&b.pool, w) // } }
- "" . , - - !
, : make() , . , - " " ; , .

init() WORKERS WORKERSCAP , :

var ( WORKERS = 5 // WORKERSCAP = 5 // )
. , , ( , . , . , , ) :

// func (b *Balancer) balance(quit chan bool) { lastjobs := false // , for { select { // : case <-quit: // b.wg.Wait() // .. quit <- true //.. case url := <-b.requests: // ( flow controller) if url != ENDMESSAGE { // - ? b.dispatch(url) // , } else { lastjobs = true // } case w := <-b.done: // , b.completed(w) // if lastjobs { if w.pending == 0 { // .. heap.Remove(&b.pool, w.index) // } if len(b.pool) == 0 { // // quit <- true // } } } } }
:

// func (b *Balancer) dispatch(url string) { w := heap.Pop(&b.pool).(*Worker) // .. w.urls <- url //.. . w.pending++ // "".. heap.Push(&b.pool, w) //.. if b.queue++; b.queue < WORKERS*WORKERSCAP { b.flowctrl <- true } }
.. . : , , , .
completed , :

// func (b *Balancer) completed(w *Worker) { w.pending-- heap.Remove(&b.pool, w.index) heap.Push(&b.pool, w) if b.queue--; b.queue == WORKERS*WORKERSCAP-1 { b.flowctrl <- true } }
, , , . : , dispatch , , , - ?
, . - download(), :

// func download(url string) { fileName := IMGDIR + "/" + url[strings.LastIndex(url, "/")+1:] output, err := os.Create(fileName) defer output.Close() response, err := http.Get(url) if err != nil { fmt.Println("Error while downloading", url, "-", err) return } defer response.Body.Close() io.Copy(output, response.Body) }
- - ( ) ( , ):

// : var ( WORKERS = 5 // WORKERSCAP = 5 // ATATASTREAM = 291 //id ATATAPOS = 0 // IMGDIR = "img" // ) // : func init() { flag.IntVar(&WORKERS, "w", WORKERS, " ") flag.IntVar(&ATATASTREAM, "s", ATATASTREAM, "id ") flag.IntVar(&ATATAPOS, "p", ATATAPOS, " ") flag.StringVar(&IMGDIR, "d", IMGDIR, " ") }
- . , , os/signal , ctrl-c :

import ( "io" "os" "os/signal" "sync" "flag" "fmt" "container/heap" "strconv" "strings" "github.com/opesun/goquery" "net/http" )
!


func main() { // flag.Parse() // , if err := os.MkdirAll(IMGDIR, 666); err != nil { panic(err) } // links := make(chan string) quit := make(chan bool) b := new(Balancer) b.init(links) // keys keys := make(chan os.Signal, 1) signal.Notify(keys, os.Interrupt) // go b.balance(quit) go generator(links, ATATASTREAM, ATATAPOS) fmt.Println(" ") // : for { select { case <-keys: // : fmt.Println("CTRL-C: ") quit <- true // case <-quit: // fmt.Println(" !") return } } }
.. ( )



.. . , - ! .

time.Sleep , . , "" .
UPD. - ( 3 744 , 3.07Gb ) .

, .

pastbin.com

PS , #291 () "" 10 , ( "-p=200" ) .

UPD2: PPS "" : - Go, , . , , - + . , : , .

,
vimeo.com/49718712 - Rob Pike - 'Concurrency Is Not Parallelism'
sites.google.com/site/gopatterns - Go Language Patterns
golangtutorials.blogspot.ru/2011/05/table-of-contents.html - GoLang Tutorials
talks.golang.org - Go

, !
 ),      .                 .  ,  ,         ,        . 

, , goquery (http://github.com/opesun/goquery). , :

// atatagrab project main.go package main import ( "fmt" // : "github.com/opesun/goquery" "strconv" ) const ( // : ENDMESSAGE = "TooLateToDieYoung" ) // func generator(out chan string, stream, start int) { for pos := start; ; pos += 20 { // : x, err := goquery.ParseUrl("http://home.atata.com/streams/" + strconv.Itoa(stream) + "?order=date&from=" + strconv.Itoa(pos)) if err == nil { // : for _, url := range x.Find("figure a.image").Attrs("href") { out <- "http://atata.com/" + url } // - .. if len(x.Find("li.last.hide")) > 0 { out <- ENDMESSAGE //.. return } } } }



,
, , ( ). - , , . , , , :

" , - ?"

, . PMFC (Poor Man's Flow Control " ". , ( ) ).

" , , ?"

! - " " " ". ? , :
( ) - . - ( - ?).
. "":

// type Worker struct { urls chan string // pending int // - index int // wg *sync.WaitGroup // }
: Worker , struct - record .
" ?" - ? Go : . , - , , :

- - - . " " ( WaitGroup ) sync . : WaitGroup , , . , .
, Worker'a.

"" work ( , "", this. ):

// func (w *Worker) work(done chan *Worker) { for { url := <-w.urls // w.wg.Add(1) // download(url) // w.wg.Done() // done <- w // } }
sync :

import ( "sync" )
: done , work() . , ( ).
download() - , . .

Worker Pool , , ( Wikipedia: ( ) ), . " container/heap ", ( - ).

: - , :

package main import ("fmt") // type Animal interface { Say() string } type Cat struct{} func (c Cat) Say() string { return "!" } type Parrot struct { name string } func (p Parrot) Say() string { return p.name + " !" } func main() { kitty := Cat{} popka := Parrot{name: ""} animals := []Animal{kitty, popka} for _, animal := range animals { fmt.Println(animal.Say()) } }
, container.Heap ( http://golang.org/pkg/container/heap/#Interface ) :

type Interface interface { sort.Interface Push(x interface{}) // add x as element Len() Pop() interface{} // remove and return element Len() - 1. }
, Push Pop sort.Interface ( http://golang.org/pkg/sort/#Interface ), :

type Interface interface { // Len is the number of elements in the collection. Len() int // Less returns whether the element with index i should sort // before the element with index j. Less(i, j int) bool // Swap swaps the elements with indexes i and j. Swap(i, j int) }
, :

// "": type Pool []*Worker // - : func (p Pool) Less(i, j int) bool { return p[i].pending < p[j].pending } // : func (p Pool) Len() int { return len(p) } // : func (p Pool) Swap(i, j int) { if i >= 0 && i < len(p) && j >= 0 && j < len(p) { p[i], p[j] = p[j], p[i] p[i].index, p[j].index = i, j } } // : func (p *Pool) Push(x interface{}) { n := len(*p) worker := x.(*Worker) worker.index = n *p = append(*p, worker) } // : func (p *Pool) Pop() interface{} { old := *p n := len(old) item := old[n-1] item.index = -1 *p = old[0 : n-1] return item }
, interface{} - , ( , interface{}). any , . : http://research.swtch.com/interfaces
, heap.* Pool, "" .
"container/heap" :

import ( "container/heap" )
. , , , :

// type Balancer struct { pool Pool // "" done chan *Worker // requests chan string // flowctrl chan bool // PMFC queue int // wg *sync.WaitGroup // }
, init() :

// . func (b *Balancer) init(in chan string) { b.requests = make(chan string) b.flowctrl = make(chan bool) b.done = make(chan *Worker) b.wg = new(sync.WaitGroup) // Flow Control: go func() { for { b.requests <- <-in // <-b.flowctrl // } }() // : heap.Init(&b.pool) for i := 0; i < WORKERS; i++ { w := &Worker{ urls: make(chan string, WORKERSCAP), index: 0, pending: 0, wg: b.wg, } go w.work(b.done) // heap.Push(&b.pool, w) // } }
- "" . , - - !
, : make() , . , - " " ; , .

init() WORKERS WORKERSCAP , :

var ( WORKERS = 5 // WORKERSCAP = 5 // )
. , , ( , . , . , , ) :

// func (b *Balancer) balance(quit chan bool) { lastjobs := false // , for { select { // : case <-quit: // b.wg.Wait() // .. quit <- true //.. case url := <-b.requests: // ( flow controller) if url != ENDMESSAGE { // - ? b.dispatch(url) // , } else { lastjobs = true // } case w := <-b.done: // , b.completed(w) // if lastjobs { if w.pending == 0 { // .. heap.Remove(&b.pool, w.index) // } if len(b.pool) == 0 { // // quit <- true // } } } } }
:

// func (b *Balancer) dispatch(url string) { w := heap.Pop(&b.pool).(*Worker) // .. w.urls <- url //.. . w.pending++ // "".. heap.Push(&b.pool, w) //.. if b.queue++; b.queue < WORKERS*WORKERSCAP { b.flowctrl <- true } }
.. . : , , , .
completed , :

// func (b *Balancer) completed(w *Worker) { w.pending-- heap.Remove(&b.pool, w.index) heap.Push(&b.pool, w) if b.queue--; b.queue == WORKERS*WORKERSCAP-1 { b.flowctrl <- true } }
, , , . : , dispatch , , , - ?
, . - download(), :

// func download(url string) { fileName := IMGDIR + "/" + url[strings.LastIndex(url, "/")+1:] output, err := os.Create(fileName) defer output.Close() response, err := http.Get(url) if err != nil { fmt.Println("Error while downloading", url, "-", err) return } defer response.Body.Close() io.Copy(output, response.Body) }
- - ( ) ( , ):

// : var ( WORKERS = 5 // WORKERSCAP = 5 // ATATASTREAM = 291 //id ATATAPOS = 0 // IMGDIR = "img" // ) // : func init() { flag.IntVar(&WORKERS, "w", WORKERS, " ") flag.IntVar(&ATATASTREAM, "s", ATATASTREAM, "id ") flag.IntVar(&ATATAPOS, "p", ATATAPOS, " ") flag.StringVar(&IMGDIR, "d", IMGDIR, " ") }
- . , , os/signal , ctrl-c :

import ( "io" "os" "os/signal" "sync" "flag" "fmt" "container/heap" "strconv" "strings" "github.com/opesun/goquery" "net/http" )
!


func main() { // flag.Parse() // , if err := os.MkdirAll(IMGDIR, 666); err != nil { panic(err) } // links := make(chan string) quit := make(chan bool) b := new(Balancer) b.init(links) // keys keys := make(chan os.Signal, 1) signal.Notify(keys, os.Interrupt) // go b.balance(quit) go generator(links, ATATASTREAM, ATATAPOS) fmt.Println(" ") // : for { select { case <-keys: // : fmt.Println("CTRL-C: ") quit <- true // case <-quit: // fmt.Println(" !") return } } }
.. ( )



.. . , - ! .

time.Sleep , . , "" .
UPD. - ( 3 744 , 3.07Gb ) .

, .

pastbin.com

PS , #291 () "" 10 , ( "-p=200" ) .

UPD2: PPS "" : - Go, , . , , - + . , : , .

,
vimeo.com/49718712 - Rob Pike - 'Concurrency Is Not Parallelism'
sites.google.com/site/gopatterns - Go Language Patterns
golangtutorials.blogspot.ru/2011/05/table-of-contents.html - GoLang Tutorials
talks.golang.org - Go

, !
), . . , , , .

, , goquery (http://github.com/opesun/goquery). , :

// atatagrab project main.go package main import ( "fmt" // : "github.com/opesun/goquery" "strconv" ) const ( // : ENDMESSAGE = "TooLateToDieYoung" ) // func generator(out chan string, stream, start int) { for pos := start; ; pos += 20 { // : x, err := goquery.ParseUrl("http://home.atata.com/streams/" + strconv.Itoa(stream) + "?order=date&from=" + strconv.Itoa(pos)) if err == nil { // : for _, url := range x.Find("figure a.image").Attrs("href") { out <- "http://atata.com/" + url } // - .. if len(x.Find("li.last.hide")) > 0 { out <- ENDMESSAGE //.. return } } } }



,
, , ( ). - , , . , , , :

" , - ?"

, . PMFC (Poor Man's Flow Control " ". , ( ) ).

" , , ?"

! - " " " ". ? , :
( ) - . - ( - ?).
. "":

// type Worker struct { urls chan string // pending int // - index int // wg *sync.WaitGroup // }
: Worker , struct - record .
" ?" - ? Go : . , - , , :

- - - . " " ( WaitGroup ) sync . : WaitGroup , , . , .
, Worker'a.

"" work ( , "", this. ):

// func (w *Worker) work(done chan *Worker) { for { url := <-w.urls // w.wg.Add(1) // download(url) // w.wg.Done() // done <- w // } }
sync :

import ( "sync" )
: done , work() . , ( ).
download() - , . .

Worker Pool , , ( Wikipedia: ( ) ), . " container/heap ", ( - ).

: - , :

package main import ("fmt") // type Animal interface { Say() string } type Cat struct{} func (c Cat) Say() string { return "!" } type Parrot struct { name string } func (p Parrot) Say() string { return p.name + " !" } func main() { kitty := Cat{} popka := Parrot{name: ""} animals := []Animal{kitty, popka} for _, animal := range animals { fmt.Println(animal.Say()) } }
, container.Heap ( http://golang.org/pkg/container/heap/#Interface ) :

type Interface interface { sort.Interface Push(x interface{}) // add x as element Len() Pop() interface{} // remove and return element Len() - 1. }
, Push Pop sort.Interface ( http://golang.org/pkg/sort/#Interface ), :

type Interface interface { // Len is the number of elements in the collection. Len() int // Less returns whether the element with index i should sort // before the element with index j. Less(i, j int) bool // Swap swaps the elements with indexes i and j. Swap(i, j int) }
, :

// "": type Pool []*Worker // - : func (p Pool) Less(i, j int) bool { return p[i].pending < p[j].pending } // : func (p Pool) Len() int { return len(p) } // : func (p Pool) Swap(i, j int) { if i >= 0 && i < len(p) && j >= 0 && j < len(p) { p[i], p[j] = p[j], p[i] p[i].index, p[j].index = i, j } } // : func (p *Pool) Push(x interface{}) { n := len(*p) worker := x.(*Worker) worker.index = n *p = append(*p, worker) } // : func (p *Pool) Pop() interface{} { old := *p n := len(old) item := old[n-1] item.index = -1 *p = old[0 : n-1] return item }
, interface{} - , ( , interface{}). any , . : http://research.swtch.com/interfaces
, heap.* Pool, "" .
"container/heap" :

import ( "container/heap" )
. , , , :

// type Balancer struct { pool Pool // "" done chan *Worker // requests chan string // flowctrl chan bool // PMFC queue int // wg *sync.WaitGroup // }
, init() :

// . func (b *Balancer) init(in chan string) { b.requests = make(chan string) b.flowctrl = make(chan bool) b.done = make(chan *Worker) b.wg = new(sync.WaitGroup) // Flow Control: go func() { for { b.requests <- <-in // <-b.flowctrl // } }() // : heap.Init(&b.pool) for i := 0; i < WORKERS; i++ { w := &Worker{ urls: make(chan string, WORKERSCAP), index: 0, pending: 0, wg: b.wg, } go w.work(b.done) // heap.Push(&b.pool, w) // } }
- "" . , - - !
, : make() , . , - " " ; , .

init() WORKERS WORKERSCAP , :

var ( WORKERS = 5 // WORKERSCAP = 5 // )
. , , ( , . , . , , ) :

// func (b *Balancer) balance(quit chan bool) { lastjobs := false // , for { select { // : case <-quit: // b.wg.Wait() // .. quit <- true //.. case url := <-b.requests: // ( flow controller) if url != ENDMESSAGE { // - ? b.dispatch(url) // , } else { lastjobs = true // } case w := <-b.done: // , b.completed(w) // if lastjobs { if w.pending == 0 { // .. heap.Remove(&b.pool, w.index) // } if len(b.pool) == 0 { // // quit <- true // } } } } }
:

// func (b *Balancer) dispatch(url string) { w := heap.Pop(&b.pool).(*Worker) // .. w.urls <- url //.. . w.pending++ // "".. heap.Push(&b.pool, w) //.. if b.queue++; b.queue < WORKERS*WORKERSCAP { b.flowctrl <- true } }
.. . : , , , .
completed , :

// func (b *Balancer) completed(w *Worker) { w.pending-- heap.Remove(&b.pool, w.index) heap.Push(&b.pool, w) if b.queue--; b.queue == WORKERS*WORKERSCAP-1 { b.flowctrl <- true } }
, , , . : , dispatch , , , - ?
, . - download(), :

// func download(url string) { fileName := IMGDIR + "/" + url[strings.LastIndex(url, "/")+1:] output, err := os.Create(fileName) defer output.Close() response, err := http.Get(url) if err != nil { fmt.Println("Error while downloading", url, "-", err) return } defer response.Body.Close() io.Copy(output, response.Body) }
- - ( ) ( , ):

// : var ( WORKERS = 5 // WORKERSCAP = 5 // ATATASTREAM = 291 //id ATATAPOS = 0 // IMGDIR = "img" // ) // : func init() { flag.IntVar(&WORKERS, "w", WORKERS, " ") flag.IntVar(&ATATASTREAM, "s", ATATASTREAM, "id ") flag.IntVar(&ATATAPOS, "p", ATATAPOS, " ") flag.StringVar(&IMGDIR, "d", IMGDIR, " ") }
- . , , os/signal , ctrl-c :

import ( "io" "os" "os/signal" "sync" "flag" "fmt" "container/heap" "strconv" "strings" "github.com/opesun/goquery" "net/http" )
!


func main() { // flag.Parse() // , if err := os.MkdirAll(IMGDIR, 666); err != nil { panic(err) } // links := make(chan string) quit := make(chan bool) b := new(Balancer) b.init(links) // keys keys := make(chan os.Signal, 1) signal.Notify(keys, os.Interrupt) // go b.balance(quit) go generator(links, ATATASTREAM, ATATAPOS) fmt.Println(" ") // : for { select { case <-keys: // : fmt.Println("CTRL-C: ") quit <- true // case <-quit: // fmt.Println(" !") return } } }
.. ( )



.. . , - ! .

time.Sleep , . , "" .
UPD. - ( 3 744 , 3.07Gb ) .

, .

pastbin.com

PS , #291 () "" 10 , ( "-p=200" ) .

UPD2: PPS "" : - Go, , . , , - + . , : , .

,
vimeo.com/49718712 - Rob Pike - 'Concurrency Is Not Parallelism'
sites.google.com/site/gopatterns - Go Language Patterns
golangtutorials.blogspot.ru/2011/05/table-of-contents.html - GoLang Tutorials
talks.golang.org - Go

, !
 ),      .                 .  ,  ,         ,        . 

, , goquery (http://github.com/opesun/goquery). , :

// atatagrab project main.go package main import ( "fmt" // : "github.com/opesun/goquery" "strconv" ) const ( // : ENDMESSAGE = "TooLateToDieYoung" ) // func generator(out chan string, stream, start int) { for pos := start; ; pos += 20 { // : x, err := goquery.ParseUrl("http://home.atata.com/streams/" + strconv.Itoa(stream) + "?order=date&from=" + strconv.Itoa(pos)) if err == nil { // : for _, url := range x.Find("figure a.image").Attrs("href") { out <- "http://atata.com/" + url } // - .. if len(x.Find("li.last.hide")) > 0 { out <- ENDMESSAGE //.. return } } } }



,
, , ( ). - , , . , , , :

" , - ?"

, . PMFC (Poor Man's Flow Control " ". , ( ) ).

" , , ?"

! - " " " ". ? , :
( ) - . - ( - ?).
. "":

// type Worker struct { urls chan string // pending int // - index int // wg *sync.WaitGroup // }
: Worker , struct - record .
" ?" - ? Go : . , - , , :

- - - . " " ( WaitGroup ) sync . : WaitGroup , , . , .
, Worker'a.

"" work ( , "", this. ):

// func (w *Worker) work(done chan *Worker) { for { url := <-w.urls // w.wg.Add(1) // download(url) // w.wg.Done() // done <- w // } }
sync :

import ( "sync" )
: done , work() . , ( ).
download() - , . .

Worker Pool , , ( Wikipedia: ( ) ), . " container/heap ", ( - ).

: - , :

package main import ("fmt") // type Animal interface { Say() string } type Cat struct{} func (c Cat) Say() string { return "!" } type Parrot struct { name string } func (p Parrot) Say() string { return p.name + " !" } func main() { kitty := Cat{} popka := Parrot{name: ""} animals := []Animal{kitty, popka} for _, animal := range animals { fmt.Println(animal.Say()) } }
, container.Heap ( http://golang.org/pkg/container/heap/#Interface ) :

type Interface interface { sort.Interface Push(x interface{}) // add x as element Len() Pop() interface{} // remove and return element Len() - 1. }
, Push Pop sort.Interface ( http://golang.org/pkg/sort/#Interface ), :

type Interface interface { // Len is the number of elements in the collection. Len() int // Less returns whether the element with index i should sort // before the element with index j. Less(i, j int) bool // Swap swaps the elements with indexes i and j. Swap(i, j int) }
, :

// "": type Pool []*Worker // - : func (p Pool) Less(i, j int) bool { return p[i].pending < p[j].pending } // : func (p Pool) Len() int { return len(p) } // : func (p Pool) Swap(i, j int) { if i >= 0 && i < len(p) && j >= 0 && j < len(p) { p[i], p[j] = p[j], p[i] p[i].index, p[j].index = i, j } } // : func (p *Pool) Push(x interface{}) { n := len(*p) worker := x.(*Worker) worker.index = n *p = append(*p, worker) } // : func (p *Pool) Pop() interface{} { old := *p n := len(old) item := old[n-1] item.index = -1 *p = old[0 : n-1] return item }
, interface{} - , ( , interface{}). any , . : http://research.swtch.com/interfaces
, heap.* Pool, "" .
"container/heap" :

import ( "container/heap" )
. , , , :

// type Balancer struct { pool Pool // "" done chan *Worker // requests chan string // flowctrl chan bool // PMFC queue int // wg *sync.WaitGroup // }
, init() :

// . func (b *Balancer) init(in chan string) { b.requests = make(chan string) b.flowctrl = make(chan bool) b.done = make(chan *Worker) b.wg = new(sync.WaitGroup) // Flow Control: go func() { for { b.requests <- <-in // <-b.flowctrl // } }() // : heap.Init(&b.pool) for i := 0; i < WORKERS; i++ { w := &Worker{ urls: make(chan string, WORKERSCAP), index: 0, pending: 0, wg: b.wg, } go w.work(b.done) // heap.Push(&b.pool, w) // } }
- "" . , - - !
, : make() , . , - " " ; , .

init() WORKERS WORKERSCAP , :

var ( WORKERS = 5 // WORKERSCAP = 5 // )
. , , ( , . , . , , ) :

// func (b *Balancer) balance(quit chan bool) { lastjobs := false // , for { select { // : case <-quit: // b.wg.Wait() // .. quit <- true //.. case url := <-b.requests: // ( flow controller) if url != ENDMESSAGE { // - ? b.dispatch(url) // , } else { lastjobs = true // } case w := <-b.done: // , b.completed(w) // if lastjobs { if w.pending == 0 { // .. heap.Remove(&b.pool, w.index) // } if len(b.pool) == 0 { // // quit <- true // } } } } }
:

// func (b *Balancer) dispatch(url string) { w := heap.Pop(&b.pool).(*Worker) // .. w.urls <- url //.. . w.pending++ // "".. heap.Push(&b.pool, w) //.. if b.queue++; b.queue < WORKERS*WORKERSCAP { b.flowctrl <- true } }
.. . : , , , .
completed , :

// func (b *Balancer) completed(w *Worker) { w.pending-- heap.Remove(&b.pool, w.index) heap.Push(&b.pool, w) if b.queue--; b.queue == WORKERS*WORKERSCAP-1 { b.flowctrl <- true } }
, , , . : , dispatch , , , - ?
, . - download(), :

// func download(url string) { fileName := IMGDIR + "/" + url[strings.LastIndex(url, "/")+1:] output, err := os.Create(fileName) defer output.Close() response, err := http.Get(url) if err != nil { fmt.Println("Error while downloading", url, "-", err) return } defer response.Body.Close() io.Copy(output, response.Body) }
- - ( ) ( , ):

// : var ( WORKERS = 5 // WORKERSCAP = 5 // ATATASTREAM = 291 //id ATATAPOS = 0 // IMGDIR = "img" // ) // : func init() { flag.IntVar(&WORKERS, "w", WORKERS, " ") flag.IntVar(&ATATASTREAM, "s", ATATASTREAM, "id ") flag.IntVar(&ATATAPOS, "p", ATATAPOS, " ") flag.StringVar(&IMGDIR, "d", IMGDIR, " ") }
- . , , os/signal , ctrl-c :

import ( "io" "os" "os/signal" "sync" "flag" "fmt" "container/heap" "strconv" "strings" "github.com/opesun/goquery" "net/http" )
!


func main() { // flag.Parse() // , if err := os.MkdirAll(IMGDIR, 666); err != nil { panic(err) } // links := make(chan string) quit := make(chan bool) b := new(Balancer) b.init(links) // keys keys := make(chan os.Signal, 1) signal.Notify(keys, os.Interrupt) // go b.balance(quit) go generator(links, ATATASTREAM, ATATAPOS) fmt.Println(" ") // : for { select { case <-keys: // : fmt.Println("CTRL-C: ") quit <- true // case <-quit: // fmt.Println(" !") return } } }
.. ( )



.. . , - ! .

time.Sleep , . , "" .
UPD. - ( 3 744 , 3.07Gb ) .

, .

pastbin.com

PS , #291 () "" 10 , ( "-p=200" ) .

UPD2: PPS "" : - Go, , . , , - + . , : , .

,
vimeo.com/49718712 - Rob Pike - 'Concurrency Is Not Parallelism'
sites.google.com/site/gopatterns - Go Language Patterns
golangtutorials.blogspot.ru/2011/05/table-of-contents.html - GoLang Tutorials
talks.golang.org - Go

, !
), . . , , , .

, , goquery (http://github.com/opesun/goquery). , :

// atatagrab project main.go package main import ( "fmt" // : "github.com/opesun/goquery" "strconv" ) const ( // : ENDMESSAGE = "TooLateToDieYoung" ) // func generator(out chan string, stream, start int) { for pos := start; ; pos += 20 { // : x, err := goquery.ParseUrl("http://home.atata.com/streams/" + strconv.Itoa(stream) + "?order=date&from=" + strconv.Itoa(pos)) if err == nil { // : for _, url := range x.Find("figure a.image").Attrs("href") { out <- "http://atata.com/" + url } // - .. if len(x.Find("li.last.hide")) > 0 { out <- ENDMESSAGE //.. return } } } }



,
, , ( ). - , , . , , , :

" , - ?"

, . PMFC (Poor Man's Flow Control " ". , ( ) ).

" , , ?"

! - " " " ". ? , :
( ) - . - ( - ?).
. "":

// type Worker struct { urls chan string // pending int // - index int // wg *sync.WaitGroup // }
: Worker , struct - record .
" ?" - ? Go : . , - , , :

- - - . " " ( WaitGroup ) sync . : WaitGroup , , . , .
, Worker'a.

"" work ( , "", this. ):

// func (w *Worker) work(done chan *Worker) { for { url := <-w.urls // w.wg.Add(1) // download(url) // w.wg.Done() // done <- w // } }
sync :

import ( "sync" )
: done , work() . , ( ).
download() - , . .

Worker Pool , , ( Wikipedia: ( ) ), . " container/heap ", ( - ).

: - , :

package main import ("fmt") // type Animal interface { Say() string } type Cat struct{} func (c Cat) Say() string { return "!" } type Parrot struct { name string } func (p Parrot) Say() string { return p.name + " !" } func main() { kitty := Cat{} popka := Parrot{name: ""} animals := []Animal{kitty, popka} for _, animal := range animals { fmt.Println(animal.Say()) } }
, container.Heap ( http://golang.org/pkg/container/heap/#Interface ) :

type Interface interface { sort.Interface Push(x interface{}) // add x as element Len() Pop() interface{} // remove and return element Len() - 1. }
, Push Pop sort.Interface ( http://golang.org/pkg/sort/#Interface ), :

type Interface interface { // Len is the number of elements in the collection. Len() int // Less returns whether the element with index i should sort // before the element with index j. Less(i, j int) bool // Swap swaps the elements with indexes i and j. Swap(i, j int) }
, :

// "": type Pool []*Worker // - : func (p Pool) Less(i, j int) bool { return p[i].pending < p[j].pending } // : func (p Pool) Len() int { return len(p) } // : func (p Pool) Swap(i, j int) { if i >= 0 && i < len(p) && j >= 0 && j < len(p) { p[i], p[j] = p[j], p[i] p[i].index, p[j].index = i, j } } // : func (p *Pool) Push(x interface{}) { n := len(*p) worker := x.(*Worker) worker.index = n *p = append(*p, worker) } // : func (p *Pool) Pop() interface{} { old := *p n := len(old) item := old[n-1] item.index = -1 *p = old[0 : n-1] return item }
, interface{} - , ( , interface{}). any , . : http://research.swtch.com/interfaces
, heap.* Pool, "" .
"container/heap" :

import ( "container/heap" )
. , , , :

// type Balancer struct { pool Pool // "" done chan *Worker // requests chan string // flowctrl chan bool // PMFC queue int // wg *sync.WaitGroup // }
, init() :

// . func (b *Balancer) init(in chan string) { b.requests = make(chan string) b.flowctrl = make(chan bool) b.done = make(chan *Worker) b.wg = new(sync.WaitGroup) // Flow Control: go func() { for { b.requests <- <-in // <-b.flowctrl // } }() // : heap.Init(&b.pool) for i := 0; i < WORKERS; i++ { w := &Worker{ urls: make(chan string, WORKERSCAP), index: 0, pending: 0, wg: b.wg, } go w.work(b.done) // heap.Push(&b.pool, w) // } }
- "" . , - - !
, : make() , . , - " " ; , .

init() WORKERS WORKERSCAP , :

var ( WORKERS = 5 // WORKERSCAP = 5 // )
. , , ( , . , . , , ) :

// func (b *Balancer) balance(quit chan bool) { lastjobs := false // , for { select { // : case <-quit: // b.wg.Wait() // .. quit <- true //.. case url := <-b.requests: // ( flow controller) if url != ENDMESSAGE { // - ? b.dispatch(url) // , } else { lastjobs = true // } case w := <-b.done: // , b.completed(w) // if lastjobs { if w.pending == 0 { // .. heap.Remove(&b.pool, w.index) // } if len(b.pool) == 0 { // // quit <- true // } } } } }
:

// func (b *Balancer) dispatch(url string) { w := heap.Pop(&b.pool).(*Worker) // .. w.urls <- url //.. . w.pending++ // "".. heap.Push(&b.pool, w) //.. if b.queue++; b.queue < WORKERS*WORKERSCAP { b.flowctrl <- true } }
.. . : , , , .
completed , :

// func (b *Balancer) completed(w *Worker) { w.pending-- heap.Remove(&b.pool, w.index) heap.Push(&b.pool, w) if b.queue--; b.queue == WORKERS*WORKERSCAP-1 { b.flowctrl <- true } }
, , , . : , dispatch , , , - ?
, . - download(), :

// func download(url string) { fileName := IMGDIR + "/" + url[strings.LastIndex(url, "/")+1:] output, err := os.Create(fileName) defer output.Close() response, err := http.Get(url) if err != nil { fmt.Println("Error while downloading", url, "-", err) return } defer response.Body.Close() io.Copy(output, response.Body) }
- - ( ) ( , ):

// : var ( WORKERS = 5 // WORKERSCAP = 5 // ATATASTREAM = 291 //id ATATAPOS = 0 // IMGDIR = "img" // ) // : func init() { flag.IntVar(&WORKERS, "w", WORKERS, " ") flag.IntVar(&ATATASTREAM, "s", ATATASTREAM, "id ") flag.IntVar(&ATATAPOS, "p", ATATAPOS, " ") flag.StringVar(&IMGDIR, "d", IMGDIR, " ") }
- . , , os/signal , ctrl-c :

import ( "io" "os" "os/signal" "sync" "flag" "fmt" "container/heap" "strconv" "strings" "github.com/opesun/goquery" "net/http" )
!


func main() { // flag.Parse() // , if err := os.MkdirAll(IMGDIR, 666); err != nil { panic(err) } // links := make(chan string) quit := make(chan bool) b := new(Balancer) b.init(links) // keys keys := make(chan os.Signal, 1) signal.Notify(keys, os.Interrupt) // go b.balance(quit) go generator(links, ATATASTREAM, ATATAPOS) fmt.Println(" ") // : for { select { case <-keys: // : fmt.Println("CTRL-C: ") quit <- true // case <-quit: // fmt.Println(" !") return } } }
.. ( )



.. . , - ! .

time.Sleep , . , "" .
UPD. - ( 3 744 , 3.07Gb ) .

, .

pastbin.com

PS , #291 () "" 10 , ( "-p=200" ) .

UPD2: PPS "" : - Go, , . , , - + . , : , .

,
vimeo.com/49718712 - Rob Pike - 'Concurrency Is Not Parallelism'
sites.google.com/site/gopatterns - Go Language Patterns
golangtutorials.blogspot.ru/2011/05/table-of-contents.html - GoLang Tutorials
talks.golang.org - Go

, !
 ),      .                 .  ,  ,         ,        . 

, , goquery (http://github.com/opesun/goquery). , :

// atatagrab project main.go package main import ( "fmt" // : "github.com/opesun/goquery" "strconv" ) const ( // : ENDMESSAGE = "TooLateToDieYoung" ) // func generator(out chan string, stream, start int) { for pos := start; ; pos += 20 { // : x, err := goquery.ParseUrl("http://home.atata.com/streams/" + strconv.Itoa(stream) + "?order=date&from=" + strconv.Itoa(pos)) if err == nil { // : for _, url := range x.Find("figure a.image").Attrs("href") { out <- "http://atata.com/" + url } // - .. if len(x.Find("li.last.hide")) > 0 { out <- ENDMESSAGE //.. return } } } }



,
, , ( ). - , , . , , , :

" , - ?"

, . PMFC (Poor Man's Flow Control " ". , ( ) ).

" , , ?"

! - " " " ". ? , :
( ) - . - ( - ?).
. "":

// type Worker struct { urls chan string // pending int // - index int // wg *sync.WaitGroup // }
: Worker , struct - record .
" ?" - ? Go : . , - , , :

- - - . " " ( WaitGroup ) sync . : WaitGroup , , . , .
, Worker'a.

"" work ( , "", this. ):

// func (w *Worker) work(done chan *Worker) { for { url := <-w.urls // w.wg.Add(1) // download(url) // w.wg.Done() // done <- w // } }
sync :

import ( "sync" )
: done , work() . , ( ).
download() - , . .

Worker Pool , , ( Wikipedia: ( ) ), . " container/heap ", ( - ).

: - , :

package main import ("fmt") // type Animal interface { Say() string } type Cat struct{} func (c Cat) Say() string { return "!" } type Parrot struct { name string } func (p Parrot) Say() string { return p.name + " !" } func main() { kitty := Cat{} popka := Parrot{name: ""} animals := []Animal{kitty, popka} for _, animal := range animals { fmt.Println(animal.Say()) } }
, container.Heap ( http://golang.org/pkg/container/heap/#Interface ) :

type Interface interface { sort.Interface Push(x interface{}) // add x as element Len() Pop() interface{} // remove and return element Len() - 1. }
, Push Pop sort.Interface ( http://golang.org/pkg/sort/#Interface ), :

type Interface interface { // Len is the number of elements in the collection. Len() int // Less returns whether the element with index i should sort // before the element with index j. Less(i, j int) bool // Swap swaps the elements with indexes i and j. Swap(i, j int) }
, :

// "": type Pool []*Worker // - : func (p Pool) Less(i, j int) bool { return p[i].pending < p[j].pending } // : func (p Pool) Len() int { return len(p) } // : func (p Pool) Swap(i, j int) { if i >= 0 && i < len(p) && j >= 0 && j < len(p) { p[i], p[j] = p[j], p[i] p[i].index, p[j].index = i, j } } // : func (p *Pool) Push(x interface{}) { n := len(*p) worker := x.(*Worker) worker.index = n *p = append(*p, worker) } // : func (p *Pool) Pop() interface{} { old := *p n := len(old) item := old[n-1] item.index = -1 *p = old[0 : n-1] return item }
, interface{} - , ( , interface{}). any , . : http://research.swtch.com/interfaces
, heap.* Pool, "" .
"container/heap" :

import ( "container/heap" )
. , , , :

// type Balancer struct { pool Pool // "" done chan *Worker // requests chan string // flowctrl chan bool // PMFC queue int // wg *sync.WaitGroup // }
, init() :

// . func (b *Balancer) init(in chan string) { b.requests = make(chan string) b.flowctrl = make(chan bool) b.done = make(chan *Worker) b.wg = new(sync.WaitGroup) // Flow Control: go func() { for { b.requests <- <-in // <-b.flowctrl // } }() // : heap.Init(&b.pool) for i := 0; i < WORKERS; i++ { w := &Worker{ urls: make(chan string, WORKERSCAP), index: 0, pending: 0, wg: b.wg, } go w.work(b.done) // heap.Push(&b.pool, w) // } }
- "" . , - - !
, : make() , . , - " " ; , .

init() WORKERS WORKERSCAP , :

var ( WORKERS = 5 // WORKERSCAP = 5 // )
. , , ( , . , . , , ) :

// func (b *Balancer) balance(quit chan bool) { lastjobs := false // , for { select { // : case <-quit: // b.wg.Wait() // .. quit <- true //.. case url := <-b.requests: // ( flow controller) if url != ENDMESSAGE { // - ? b.dispatch(url) // , } else { lastjobs = true // } case w := <-b.done: // , b.completed(w) // if lastjobs { if w.pending == 0 { // .. heap.Remove(&b.pool, w.index) // } if len(b.pool) == 0 { // // quit <- true // } } } } }
:

// func (b *Balancer) dispatch(url string) { w := heap.Pop(&b.pool).(*Worker) // .. w.urls <- url //.. . w.pending++ // "".. heap.Push(&b.pool, w) //.. if b.queue++; b.queue < WORKERS*WORKERSCAP { b.flowctrl <- true } }
.. . : , , , .
completed , :

// func (b *Balancer) completed(w *Worker) { w.pending-- heap.Remove(&b.pool, w.index) heap.Push(&b.pool, w) if b.queue--; b.queue == WORKERS*WORKERSCAP-1 { b.flowctrl <- true } }
, , , . : , dispatch , , , - ?
, . - download(), :

// func download(url string) { fileName := IMGDIR + "/" + url[strings.LastIndex(url, "/")+1:] output, err := os.Create(fileName) defer output.Close() response, err := http.Get(url) if err != nil { fmt.Println("Error while downloading", url, "-", err) return } defer response.Body.Close() io.Copy(output, response.Body) }
- - ( ) ( , ):

// : var ( WORKERS = 5 // WORKERSCAP = 5 // ATATASTREAM = 291 //id ATATAPOS = 0 // IMGDIR = "img" // ) // : func init() { flag.IntVar(&WORKERS, "w", WORKERS, " ") flag.IntVar(&ATATASTREAM, "s", ATATASTREAM, "id ") flag.IntVar(&ATATAPOS, "p", ATATAPOS, " ") flag.StringVar(&IMGDIR, "d", IMGDIR, " ") }
- . , , os/signal , ctrl-c :

import ( "io" "os" "os/signal" "sync" "flag" "fmt" "container/heap" "strconv" "strings" "github.com/opesun/goquery" "net/http" )
!


func main() { // flag.Parse() // , if err := os.MkdirAll(IMGDIR, 666); err != nil { panic(err) } // links := make(chan string) quit := make(chan bool) b := new(Balancer) b.init(links) // keys keys := make(chan os.Signal, 1) signal.Notify(keys, os.Interrupt) // go b.balance(quit) go generator(links, ATATASTREAM, ATATAPOS) fmt.Println(" ") // : for { select { case <-keys: // : fmt.Println("CTRL-C: ") quit <- true // case <-quit: // fmt.Println(" !") return } } }
.. ( )



.. . , - ! .

time.Sleep , . , "" .
UPD. - ( 3 744 , 3.07Gb ) .

, .

pastbin.com

PS , #291 () "" 10 , ( "-p=200" ) .

UPD2: PPS "" : - Go, , . , , - + . , : , .

,
vimeo.com/49718712 - Rob Pike - 'Concurrency Is Not Parallelism'
sites.google.com/site/gopatterns - Go Language Patterns
golangtutorials.blogspot.ru/2011/05/table-of-contents.html - GoLang Tutorials
talks.golang.org - Go

, !
), . . , , , .

, , goquery (http://github.com/opesun/goquery). , :

// atatagrab project main.go package main import ( "fmt" // : "github.com/opesun/goquery" "strconv" ) const ( // : ENDMESSAGE = "TooLateToDieYoung" ) // func generator(out chan string, stream, start int) { for pos := start; ; pos += 20 { // : x, err := goquery.ParseUrl("http://home.atata.com/streams/" + strconv.Itoa(stream) + "?order=date&from=" + strconv.Itoa(pos)) if err == nil { // : for _, url := range x.Find("figure a.image").Attrs("href") { out <- "http://atata.com/" + url } // - .. if len(x.Find("li.last.hide")) > 0 { out <- ENDMESSAGE //.. return } } } }



,
, , ( ). - , , . , , , :

" , - ?"

, . PMFC (Poor Man's Flow Control " ". , ( ) ).

" , , ?"

! - " " " ". ? , :
( ) - . - ( - ?).
. "":

// type Worker struct { urls chan string // pending int // - index int // wg *sync.WaitGroup // }
: Worker , struct - record .
" ?" - ? Go : . , - , , :

- - - . " " ( WaitGroup ) sync . : WaitGroup , , . , .
, Worker'a.

"" work ( , "", this. ):

// func (w *Worker) work(done chan *Worker) { for { url := <-w.urls // w.wg.Add(1) // download(url) // w.wg.Done() // done <- w // } }
sync :

import ( "sync" )
: done , work() . , ( ).
download() - , . .

Worker Pool , , ( Wikipedia: ( ) ), . " container/heap ", ( - ).

: - , :

package main import ("fmt") // type Animal interface { Say() string } type Cat struct{} func (c Cat) Say() string { return "!" } type Parrot struct { name string } func (p Parrot) Say() string { return p.name + " !" } func main() { kitty := Cat{} popka := Parrot{name: ""} animals := []Animal{kitty, popka} for _, animal := range animals { fmt.Println(animal.Say()) } }
, container.Heap ( http://golang.org/pkg/container/heap/#Interface ) :

type Interface interface { sort.Interface Push(x interface{}) // add x as element Len() Pop() interface{} // remove and return element Len() - 1. }
, Push Pop sort.Interface ( http://golang.org/pkg/sort/#Interface ), :

type Interface interface { // Len is the number of elements in the collection. Len() int // Less returns whether the element with index i should sort // before the element with index j. Less(i, j int) bool // Swap swaps the elements with indexes i and j. Swap(i, j int) }
, :

// "": type Pool []*Worker // - : func (p Pool) Less(i, j int) bool { return p[i].pending < p[j].pending } // : func (p Pool) Len() int { return len(p) } // : func (p Pool) Swap(i, j int) { if i >= 0 && i < len(p) && j >= 0 && j < len(p) { p[i], p[j] = p[j], p[i] p[i].index, p[j].index = i, j } } // : func (p *Pool) Push(x interface{}) { n := len(*p) worker := x.(*Worker) worker.index = n *p = append(*p, worker) } // : func (p *Pool) Pop() interface{} { old := *p n := len(old) item := old[n-1] item.index = -1 *p = old[0 : n-1] return item }
, interface{} - , ( , interface{}). any , . : http://research.swtch.com/interfaces
, heap.* Pool, "" .
"container/heap" :

import ( "container/heap" )
. , , , :

// type Balancer struct { pool Pool // "" done chan *Worker // requests chan string // flowctrl chan bool // PMFC queue int // wg *sync.WaitGroup // }
, init() :

// . func (b *Balancer) init(in chan string) { b.requests = make(chan string) b.flowctrl = make(chan bool) b.done = make(chan *Worker) b.wg = new(sync.WaitGroup) // Flow Control: go func() { for { b.requests <- <-in // <-b.flowctrl // } }() // : heap.Init(&b.pool) for i := 0; i < WORKERS; i++ { w := &Worker{ urls: make(chan string, WORKERSCAP), index: 0, pending: 0, wg: b.wg, } go w.work(b.done) // heap.Push(&b.pool, w) // } }
- "" . , - - !
, : make() , . , - " " ; , .

init() WORKERS WORKERSCAP , :

var ( WORKERS = 5 // WORKERSCAP = 5 // )
. , , ( , . , . , , ) :

// func (b *Balancer) balance(quit chan bool) { lastjobs := false // , for { select { // : case <-quit: // b.wg.Wait() // .. quit <- true //.. case url := <-b.requests: // ( flow controller) if url != ENDMESSAGE { // - ? b.dispatch(url) // , } else { lastjobs = true // } case w := <-b.done: // , b.completed(w) // if lastjobs { if w.pending == 0 { // .. heap.Remove(&b.pool, w.index) // } if len(b.pool) == 0 { // // quit <- true // } } } } }
:

// func (b *Balancer) dispatch(url string) { w := heap.Pop(&b.pool).(*Worker) // .. w.urls <- url //.. . w.pending++ // "".. heap.Push(&b.pool, w) //.. if b.queue++; b.queue < WORKERS*WORKERSCAP { b.flowctrl <- true } }
.. . : , , , .
completed , :

// func (b *Balancer) completed(w *Worker) { w.pending-- heap.Remove(&b.pool, w.index) heap.Push(&b.pool, w) if b.queue--; b.queue == WORKERS*WORKERSCAP-1 { b.flowctrl <- true } }
, , , . : , dispatch , , , - ?
, . - download(), :

// func download(url string) { fileName := IMGDIR + "/" + url[strings.LastIndex(url, "/")+1:] output, err := os.Create(fileName) defer output.Close() response, err := http.Get(url) if err != nil { fmt.Println("Error while downloading", url, "-", err) return } defer response.Body.Close() io.Copy(output, response.Body) }
- - ( ) ( , ):

// : var ( WORKERS = 5 // WORKERSCAP = 5 // ATATASTREAM = 291 //id ATATAPOS = 0 // IMGDIR = "img" // ) // : func init() { flag.IntVar(&WORKERS, "w", WORKERS, " ") flag.IntVar(&ATATASTREAM, "s", ATATASTREAM, "id ") flag.IntVar(&ATATAPOS, "p", ATATAPOS, " ") flag.StringVar(&IMGDIR, "d", IMGDIR, " ") }
- . , , os/signal , ctrl-c :

import ( "io" "os" "os/signal" "sync" "flag" "fmt" "container/heap" "strconv" "strings" "github.com/opesun/goquery" "net/http" )
!


func main() { // flag.Parse() // , if err := os.MkdirAll(IMGDIR, 666); err != nil { panic(err) } // links := make(chan string) quit := make(chan bool) b := new(Balancer) b.init(links) // keys keys := make(chan os.Signal, 1) signal.Notify(keys, os.Interrupt) // go b.balance(quit) go generator(links, ATATASTREAM, ATATAPOS) fmt.Println(" ") // : for { select { case <-keys: // : fmt.Println("CTRL-C: ") quit <- true // case <-quit: // fmt.Println(" !") return } } }
.. ( )



.. . , - ! .

time.Sleep , . , "" .
UPD. - ( 3 744 , 3.07Gb ) .

, .

pastbin.com

PS , #291 () "" 10 , ( "-p=200" ) .

UPD2: PPS "" : - Go, , . , , - + . , : , .

,
vimeo.com/49718712 - Rob Pike - 'Concurrency Is Not Parallelism'
sites.google.com/site/gopatterns - Go Language Patterns
golangtutorials.blogspot.ru/2011/05/table-of-contents.html - GoLang Tutorials
talks.golang.org - Go

, !
 ),      .                 .  ,  ,         ,        . 

, , goquery (http://github.com/opesun/goquery). , :

// atatagrab project main.go package main import ( "fmt" // : "github.com/opesun/goquery" "strconv" ) const ( // : ENDMESSAGE = "TooLateToDieYoung" ) // func generator(out chan string, stream, start int) { for pos := start; ; pos += 20 { // : x, err := goquery.ParseUrl("http://home.atata.com/streams/" + strconv.Itoa(stream) + "?order=date&from=" + strconv.Itoa(pos)) if err == nil { // : for _, url := range x.Find("figure a.image").Attrs("href") { out <- "http://atata.com/" + url } // - .. if len(x.Find("li.last.hide")) > 0 { out <- ENDMESSAGE //.. return } } } }



,
, , ( ). - , , . , , , :

" , - ?"

, . PMFC (Poor Man's Flow Control " ". , ( ) ).

" , , ?"

! - " " " ". ? , :
( ) - . - ( - ?).
. "":

// type Worker struct { urls chan string // pending int // - index int // wg *sync.WaitGroup // }
: Worker , struct - record .
" ?" - ? Go : . , - , , :

- - - . " " ( WaitGroup ) sync . : WaitGroup , , . , .
, Worker'a.

"" work ( , "", this. ):

// func (w *Worker) work(done chan *Worker) { for { url := <-w.urls // w.wg.Add(1) // download(url) // w.wg.Done() // done <- w // } }
sync :

import ( "sync" )
: done , work() . , ( ).
download() - , . .

Worker Pool , , ( Wikipedia: ( ) ), . " container/heap ", ( - ).

: - , :

package main import ("fmt") // type Animal interface { Say() string } type Cat struct{} func (c Cat) Say() string { return "!" } type Parrot struct { name string } func (p Parrot) Say() string { return p.name + " !" } func main() { kitty := Cat{} popka := Parrot{name: ""} animals := []Animal{kitty, popka} for _, animal := range animals { fmt.Println(animal.Say()) } }
, container.Heap ( http://golang.org/pkg/container/heap/#Interface ) :

type Interface interface { sort.Interface Push(x interface{}) // add x as element Len() Pop() interface{} // remove and return element Len() - 1. }
, Push Pop sort.Interface ( http://golang.org/pkg/sort/#Interface ), :

type Interface interface { // Len is the number of elements in the collection. Len() int // Less returns whether the element with index i should sort // before the element with index j. Less(i, j int) bool // Swap swaps the elements with indexes i and j. Swap(i, j int) }
, :

// "": type Pool []*Worker // - : func (p Pool) Less(i, j int) bool { return p[i].pending < p[j].pending } // : func (p Pool) Len() int { return len(p) } // : func (p Pool) Swap(i, j int) { if i >= 0 && i < len(p) && j >= 0 && j < len(p) { p[i], p[j] = p[j], p[i] p[i].index, p[j].index = i, j } } // : func (p *Pool) Push(x interface{}) { n := len(*p) worker := x.(*Worker) worker.index = n *p = append(*p, worker) } // : func (p *Pool) Pop() interface{} { old := *p n := len(old) item := old[n-1] item.index = -1 *p = old[0 : n-1] return item }
, interface{} - , ( , interface{}). any , . : http://research.swtch.com/interfaces
, heap.* Pool, "" .
"container/heap" :

import ( "container/heap" )
. , , , :

// type Balancer struct { pool Pool // "" done chan *Worker // requests chan string // flowctrl chan bool // PMFC queue int // wg *sync.WaitGroup // }
, init() :

// . func (b *Balancer) init(in chan string) { b.requests = make(chan string) b.flowctrl = make(chan bool) b.done = make(chan *Worker) b.wg = new(sync.WaitGroup) // Flow Control: go func() { for { b.requests <- <-in // <-b.flowctrl // } }() // : heap.Init(&b.pool) for i := 0; i < WORKERS; i++ { w := &Worker{ urls: make(chan string, WORKERSCAP), index: 0, pending: 0, wg: b.wg, } go w.work(b.done) // heap.Push(&b.pool, w) // } }
- "" . , - - !
, : make() , . , - " " ; , .

init() WORKERS WORKERSCAP , :

var ( WORKERS = 5 // WORKERSCAP = 5 // )
. , , ( , . , . , , ) :

// func (b *Balancer) balance(quit chan bool) { lastjobs := false // , for { select { // : case <-quit: // b.wg.Wait() // .. quit <- true //.. case url := <-b.requests: // ( flow controller) if url != ENDMESSAGE { // - ? b.dispatch(url) // , } else { lastjobs = true // } case w := <-b.done: // , b.completed(w) // if lastjobs { if w.pending == 0 { // .. heap.Remove(&b.pool, w.index) // } if len(b.pool) == 0 { // // quit <- true // } } } } }
:

// func (b *Balancer) dispatch(url string) { w := heap.Pop(&b.pool).(*Worker) // .. w.urls <- url //.. . w.pending++ // "".. heap.Push(&b.pool, w) //.. if b.queue++; b.queue < WORKERS*WORKERSCAP { b.flowctrl <- true } }
.. . : , , , .
completed , :

// func (b *Balancer) completed(w *Worker) { w.pending-- heap.Remove(&b.pool, w.index) heap.Push(&b.pool, w) if b.queue--; b.queue == WORKERS*WORKERSCAP-1 { b.flowctrl <- true } }
, , , . : , dispatch , , , - ?
, . - download(), :

// func download(url string) { fileName := IMGDIR + "/" + url[strings.LastIndex(url, "/")+1:] output, err := os.Create(fileName) defer output.Close() response, err := http.Get(url) if err != nil { fmt.Println("Error while downloading", url, "-", err) return } defer response.Body.Close() io.Copy(output, response.Body) }
- - ( ) ( , ):

// : var ( WORKERS = 5 // WORKERSCAP = 5 // ATATASTREAM = 291 //id ATATAPOS = 0 // IMGDIR = "img" // ) // : func init() { flag.IntVar(&WORKERS, "w", WORKERS, " ") flag.IntVar(&ATATASTREAM, "s", ATATASTREAM, "id ") flag.IntVar(&ATATAPOS, "p", ATATAPOS, " ") flag.StringVar(&IMGDIR, "d", IMGDIR, " ") }
- . , , os/signal , ctrl-c :

import ( "io" "os" "os/signal" "sync" "flag" "fmt" "container/heap" "strconv" "strings" "github.com/opesun/goquery" "net/http" )
!


func main() { // flag.Parse() // , if err := os.MkdirAll(IMGDIR, 666); err != nil { panic(err) } // links := make(chan string) quit := make(chan bool) b := new(Balancer) b.init(links) // keys keys := make(chan os.Signal, 1) signal.Notify(keys, os.Interrupt) // go b.balance(quit) go generator(links, ATATASTREAM, ATATAPOS) fmt.Println(" ") // : for { select { case <-keys: // : fmt.Println("CTRL-C: ") quit <- true // case <-quit: // fmt.Println(" !") return } } }
.. ( )



.. . , - ! .

time.Sleep , . , "" .
UPD. - ( 3 744 , 3.07Gb ) .

, .

pastbin.com

PS , #291 () "" 10 , ( "-p=200" ) .

UPD2: PPS "" : - Go, , . , , - + . , : , .

,
vimeo.com/49718712 - Rob Pike - 'Concurrency Is Not Parallelism'
sites.google.com/site/gopatterns - Go Language Patterns
golangtutorials.blogspot.ru/2011/05/table-of-contents.html - GoLang Tutorials
talks.golang.org - Go

, !
), . . , , , .

, , goquery (http://github.com/opesun/goquery). , :

// atatagrab project main.go package main import ( "fmt" // : "github.com/opesun/goquery" "strconv" ) const ( // : ENDMESSAGE = "TooLateToDieYoung" ) // func generator(out chan string, stream, start int) { for pos := start; ; pos += 20 { // : x, err := goquery.ParseUrl("http://home.atata.com/streams/" + strconv.Itoa(stream) + "?order=date&from=" + strconv.Itoa(pos)) if err == nil { // : for _, url := range x.Find("figure a.image").Attrs("href") { out <- "http://atata.com/" + url } // - .. if len(x.Find("li.last.hide")) > 0 { out <- ENDMESSAGE //.. return } } } }



,
, , ( ). - , , . , , , :

" , - ?"

, . PMFC (Poor Man's Flow Control " ". , ( ) ).

" , , ?"

! - " " " ". ? , :
( ) - . - ( - ?).
. "":

// type Worker struct { urls chan string // pending int // - index int // wg *sync.WaitGroup // }
: Worker , struct - record .
" ?" - ? Go : . , - , , :

- - - . " " ( WaitGroup ) sync . : WaitGroup , , . , .
, Worker'a.

"" work ( , "", this. ):

// func (w *Worker) work(done chan *Worker) { for { url := <-w.urls // w.wg.Add(1) // download(url) // w.wg.Done() // done <- w // } }
sync :

import ( "sync" )
: done , work() . , ( ).
download() - , . .

Worker Pool , , ( Wikipedia: ( ) ), . " container/heap ", ( - ).

: - , :

package main import ("fmt") // type Animal interface { Say() string } type Cat struct{} func (c Cat) Say() string { return "!" } type Parrot struct { name string } func (p Parrot) Say() string { return p.name + " !" } func main() { kitty := Cat{} popka := Parrot{name: ""} animals := []Animal{kitty, popka} for _, animal := range animals { fmt.Println(animal.Say()) } }
, container.Heap ( http://golang.org/pkg/container/heap/#Interface ) :

type Interface interface { sort.Interface Push(x interface{}) // add x as element Len() Pop() interface{} // remove and return element Len() - 1. }
, Push Pop sort.Interface ( http://golang.org/pkg/sort/#Interface ), :

type Interface interface { // Len is the number of elements in the collection. Len() int // Less returns whether the element with index i should sort // before the element with index j. Less(i, j int) bool // Swap swaps the elements with indexes i and j. Swap(i, j int) }
, :

// "": type Pool []*Worker // - : func (p Pool) Less(i, j int) bool { return p[i].pending < p[j].pending } // : func (p Pool) Len() int { return len(p) } // : func (p Pool) Swap(i, j int) { if i >= 0 && i < len(p) && j >= 0 && j < len(p) { p[i], p[j] = p[j], p[i] p[i].index, p[j].index = i, j } } // : func (p *Pool) Push(x interface{}) { n := len(*p) worker := x.(*Worker) worker.index = n *p = append(*p, worker) } // : func (p *Pool) Pop() interface{} { old := *p n := len(old) item := old[n-1] item.index = -1 *p = old[0 : n-1] return item }
, interface{} - , ( , interface{}). any , . : http://research.swtch.com/interfaces
, heap.* Pool, "" .
"container/heap" :

import ( "container/heap" )
. , , , :

// type Balancer struct { pool Pool // "" done chan *Worker // requests chan string // flowctrl chan bool // PMFC queue int // wg *sync.WaitGroup // }
, init() :

// . func (b *Balancer) init(in chan string) { b.requests = make(chan string) b.flowctrl = make(chan bool) b.done = make(chan *Worker) b.wg = new(sync.WaitGroup) // Flow Control: go func() { for { b.requests <- <-in // <-b.flowctrl // } }() // : heap.Init(&b.pool) for i := 0; i < WORKERS; i++ { w := &Worker{ urls: make(chan string, WORKERSCAP), index: 0, pending: 0, wg: b.wg, } go w.work(b.done) // heap.Push(&b.pool, w) // } }
- "" . , - - !
, : make() , . , - " " ; , .

init() WORKERS WORKERSCAP , :

var ( WORKERS = 5 // WORKERSCAP = 5 // )
. , , ( , . , . , , ) :

// func (b *Balancer) balance(quit chan bool) { lastjobs := false // , for { select { // : case <-quit: // b.wg.Wait() // .. quit <- true //.. case url := <-b.requests: // ( flow controller) if url != ENDMESSAGE { // - ? b.dispatch(url) // , } else { lastjobs = true // } case w := <-b.done: // , b.completed(w) // if lastjobs { if w.pending == 0 { // .. heap.Remove(&b.pool, w.index) // } if len(b.pool) == 0 { // // quit <- true // } } } } }
:

// func (b *Balancer) dispatch(url string) { w := heap.Pop(&b.pool).(*Worker) // .. w.urls <- url //.. . w.pending++ // "".. heap.Push(&b.pool, w) //.. if b.queue++; b.queue < WORKERS*WORKERSCAP { b.flowctrl <- true } }
.. . : , , , .
completed , :

// func (b *Balancer) completed(w *Worker) { w.pending-- heap.Remove(&b.pool, w.index) heap.Push(&b.pool, w) if b.queue--; b.queue == WORKERS*WORKERSCAP-1 { b.flowctrl <- true } }
, , , . : , dispatch , , , - ?
, . - download(), :

// func download(url string) { fileName := IMGDIR + "/" + url[strings.LastIndex(url, "/")+1:] output, err := os.Create(fileName) defer output.Close() response, err := http.Get(url) if err != nil { fmt.Println("Error while downloading", url, "-", err) return } defer response.Body.Close() io.Copy(output, response.Body) }
- - ( ) ( , ):

// : var ( WORKERS = 5 // WORKERSCAP = 5 // ATATASTREAM = 291 //id ATATAPOS = 0 // IMGDIR = "img" // ) // : func init() { flag.IntVar(&WORKERS, "w", WORKERS, " ") flag.IntVar(&ATATASTREAM, "s", ATATASTREAM, "id ") flag.IntVar(&ATATAPOS, "p", ATATAPOS, " ") flag.StringVar(&IMGDIR, "d", IMGDIR, " ") }
- . , , os/signal , ctrl-c :

import ( "io" "os" "os/signal" "sync" "flag" "fmt" "container/heap" "strconv" "strings" "github.com/opesun/goquery" "net/http" )
!


func main() { // flag.Parse() // , if err := os.MkdirAll(IMGDIR, 666); err != nil { panic(err) } // links := make(chan string) quit := make(chan bool) b := new(Balancer) b.init(links) // keys keys := make(chan os.Signal, 1) signal.Notify(keys, os.Interrupt) // go b.balance(quit) go generator(links, ATATASTREAM, ATATAPOS) fmt.Println(" ") // : for { select { case <-keys: // : fmt.Println("CTRL-C: ") quit <- true // case <-quit: // fmt.Println(" !") return } } }
.. ( )



.. . , - ! .

time.Sleep , . , "" .
UPD. - ( 3 744 , 3.07Gb ) .

, .

pastbin.com

PS , #291 () "" 10 , ( "-p=200" ) .

UPD2: PPS "" : - Go, , . , , - + . , : , .

,
vimeo.com/49718712 - Rob Pike - 'Concurrency Is Not Parallelism'
sites.google.com/site/gopatterns - Go Language Patterns
golangtutorials.blogspot.ru/2011/05/table-of-contents.html - GoLang Tutorials
talks.golang.org - Go

, !
 ),      .                 .  ,  ,         ,        . 

, , goquery (http://github.com/opesun/goquery). , :

// atatagrab project main.go package main import ( "fmt" // : "github.com/opesun/goquery" "strconv" ) const ( // : ENDMESSAGE = "TooLateToDieYoung" ) // func generator(out chan string, stream, start int) { for pos := start; ; pos += 20 { // : x, err := goquery.ParseUrl("http://home.atata.com/streams/" + strconv.Itoa(stream) + "?order=date&from=" + strconv.Itoa(pos)) if err == nil { // : for _, url := range x.Find("figure a.image").Attrs("href") { out <- "http://atata.com/" + url } // - .. if len(x.Find("li.last.hide")) > 0 { out <- ENDMESSAGE //.. return } } } }



,
, , ( ). - , , . , , , :

" , - ?"

, . PMFC (Poor Man's Flow Control " ". , ( ) ).

" , , ?"

! - " " " ". ? , :
( ) - . - ( - ?).
. "":

// type Worker struct { urls chan string // pending int // - index int // wg *sync.WaitGroup // }
: Worker , struct - record .
" ?" - ? Go : . , - , , :

- - - . " " ( WaitGroup ) sync . : WaitGroup , , . , .
, Worker'a.

"" work ( , "", this. ):

// func (w *Worker) work(done chan *Worker) { for { url := <-w.urls // w.wg.Add(1) // download(url) // w.wg.Done() // done <- w // } }
sync :

import ( "sync" )
: done , work() . , ( ).
download() - , . .

Worker Pool , , ( Wikipedia: ( ) ), . " container/heap ", ( - ).

: - , :

package main import ("fmt") // type Animal interface { Say() string } type Cat struct{} func (c Cat) Say() string { return "!" } type Parrot struct { name string } func (p Parrot) Say() string { return p.name + " !" } func main() { kitty := Cat{} popka := Parrot{name: ""} animals := []Animal{kitty, popka} for _, animal := range animals { fmt.Println(animal.Say()) } }
, container.Heap ( http://golang.org/pkg/container/heap/#Interface ) :

type Interface interface { sort.Interface Push(x interface{}) // add x as element Len() Pop() interface{} // remove and return element Len() - 1. }
, Push Pop sort.Interface ( http://golang.org/pkg/sort/#Interface ), :

type Interface interface { // Len is the number of elements in the collection. Len() int // Less returns whether the element with index i should sort // before the element with index j. Less(i, j int) bool // Swap swaps the elements with indexes i and j. Swap(i, j int) }
, :

// "": type Pool []*Worker // - : func (p Pool) Less(i, j int) bool { return p[i].pending < p[j].pending } // : func (p Pool) Len() int { return len(p) } // : func (p Pool) Swap(i, j int) { if i >= 0 && i < len(p) && j >= 0 && j < len(p) { p[i], p[j] = p[j], p[i] p[i].index, p[j].index = i, j } } // : func (p *Pool) Push(x interface{}) { n := len(*p) worker := x.(*Worker) worker.index = n *p = append(*p, worker) } // : func (p *Pool) Pop() interface{} { old := *p n := len(old) item := old[n-1] item.index = -1 *p = old[0 : n-1] return item }
, interface{} - , ( , interface{}). any , . : http://research.swtch.com/interfaces
, heap.* Pool, "" .
"container/heap" :

import ( "container/heap" )
. , , , :

// type Balancer struct { pool Pool // "" done chan *Worker // requests chan string // flowctrl chan bool // PMFC queue int // wg *sync.WaitGroup // }
, init() :

// . func (b *Balancer) init(in chan string) { b.requests = make(chan string) b.flowctrl = make(chan bool) b.done = make(chan *Worker) b.wg = new(sync.WaitGroup) // Flow Control: go func() { for { b.requests <- <-in // <-b.flowctrl // } }() // : heap.Init(&b.pool) for i := 0; i < WORKERS; i++ { w := &Worker{ urls: make(chan string, WORKERSCAP), index: 0, pending: 0, wg: b.wg, } go w.work(b.done) // heap.Push(&b.pool, w) // } }
- "" . , - - !
, : make() , . , - " " ; , .

init() WORKERS WORKERSCAP , :

var ( WORKERS = 5 // WORKERSCAP = 5 // )
. , , ( , . , . , , ) :

// func (b *Balancer) balance(quit chan bool) { lastjobs := false // , for { select { // : case <-quit: // b.wg.Wait() // .. quit <- true //.. case url := <-b.requests: // ( flow controller) if url != ENDMESSAGE { // - ? b.dispatch(url) // , } else { lastjobs = true // } case w := <-b.done: // , b.completed(w) // if lastjobs { if w.pending == 0 { // .. heap.Remove(&b.pool, w.index) // } if len(b.pool) == 0 { // // quit <- true // } } } } }
:

// func (b *Balancer) dispatch(url string) { w := heap.Pop(&b.pool).(*Worker) // .. w.urls <- url //.. . w.pending++ // "".. heap.Push(&b.pool, w) //.. if b.queue++; b.queue < WORKERS*WORKERSCAP { b.flowctrl <- true } }
.. . : , , , .
completed , :

// func (b *Balancer) completed(w *Worker) { w.pending-- heap.Remove(&b.pool, w.index) heap.Push(&b.pool, w) if b.queue--; b.queue == WORKERS*WORKERSCAP-1 { b.flowctrl <- true } }
, , , . : , dispatch , , , - ?
, . - download(), :

// func download(url string) { fileName := IMGDIR + "/" + url[strings.LastIndex(url, "/")+1:] output, err := os.Create(fileName) defer output.Close() response, err := http.Get(url) if err != nil { fmt.Println("Error while downloading", url, "-", err) return } defer response.Body.Close() io.Copy(output, response.Body) }
- - ( ) ( , ):

// : var ( WORKERS = 5 // WORKERSCAP = 5 // ATATASTREAM = 291 //id ATATAPOS = 0 // IMGDIR = "img" // ) // : func init() { flag.IntVar(&WORKERS, "w", WORKERS, " ") flag.IntVar(&ATATASTREAM, "s", ATATASTREAM, "id ") flag.IntVar(&ATATAPOS, "p", ATATAPOS, " ") flag.StringVar(&IMGDIR, "d", IMGDIR, " ") }
- . , , os/signal , ctrl-c :

import ( "io" "os" "os/signal" "sync" "flag" "fmt" "container/heap" "strconv" "strings" "github.com/opesun/goquery" "net/http" )
!


func main() { // flag.Parse() // , if err := os.MkdirAll(IMGDIR, 666); err != nil { panic(err) } // links := make(chan string) quit := make(chan bool) b := new(Balancer) b.init(links) // keys keys := make(chan os.Signal, 1) signal.Notify(keys, os.Interrupt) // go b.balance(quit) go generator(links, ATATASTREAM, ATATAPOS) fmt.Println(" ") // : for { select { case <-keys: // : fmt.Println("CTRL-C: ") quit <- true // case <-quit: // fmt.Println(" !") return } } }
.. ( )



.. . , - ! .

time.Sleep , . , "" .
UPD. - ( 3 744 , 3.07Gb ) .

, .

pastbin.com

PS , #291 () "" 10 , ( "-p=200" ) .

UPD2: PPS "" : - Go, , . , , - + . , : , .

,
vimeo.com/49718712 - Rob Pike - 'Concurrency Is Not Parallelism'
sites.google.com/site/gopatterns - Go Language Patterns
golangtutorials.blogspot.ru/2011/05/table-of-contents.html - GoLang Tutorials
talks.golang.org - Go

, !
), . . , , , .

, , goquery (http://github.com/opesun/goquery). , :

// atatagrab project main.go package main import ( "fmt" // : "github.com/opesun/goquery" "strconv" ) const ( // : ENDMESSAGE = "TooLateToDieYoung" ) // func generator(out chan string, stream, start int) { for pos := start; ; pos += 20 { // : x, err := goquery.ParseUrl("http://home.atata.com/streams/" + strconv.Itoa(stream) + "?order=date&from=" + strconv.Itoa(pos)) if err == nil { // : for _, url := range x.Find("figure a.image").Attrs("href") { out <- "http://atata.com/" + url } // - .. if len(x.Find("li.last.hide")) > 0 { out <- ENDMESSAGE //.. return } } } }



,
, , ( ). - , , . , , , :

" , - ?"

, . PMFC (Poor Man's Flow Control " ". , ( ) ).

" , , ?"

! - " " " ". ? , :
( ) - . - ( - ?).
. "":

// type Worker struct { urls chan string // pending int // - index int // wg *sync.WaitGroup // }
: Worker , struct - record .
" ?" - ? Go : . , - , , :

- - - . " " ( WaitGroup ) sync . : WaitGroup , , . , .
, Worker'a.

"" work ( , "", this. ):

// func (w *Worker) work(done chan *Worker) { for { url := <-w.urls // w.wg.Add(1) // download(url) // w.wg.Done() // done <- w // } }
sync :

import ( "sync" )
: done , work() . , ( ).
download() - , . .

Worker Pool , , ( Wikipedia: ( ) ), . " container/heap ", ( - ).

: - , :

package main import ("fmt") // type Animal interface { Say() string } type Cat struct{} func (c Cat) Say() string { return "!" } type Parrot struct { name string } func (p Parrot) Say() string { return p.name + " !" } func main() { kitty := Cat{} popka := Parrot{name: ""} animals := []Animal{kitty, popka} for _, animal := range animals { fmt.Println(animal.Say()) } }
, container.Heap ( http://golang.org/pkg/container/heap/#Interface ) :

type Interface interface { sort.Interface Push(x interface{}) // add x as element Len() Pop() interface{} // remove and return element Len() - 1. }
, Push Pop sort.Interface ( http://golang.org/pkg/sort/#Interface ), :

type Interface interface { // Len is the number of elements in the collection. Len() int // Less returns whether the element with index i should sort // before the element with index j. Less(i, j int) bool // Swap swaps the elements with indexes i and j. Swap(i, j int) }
, :

// "": type Pool []*Worker // - : func (p Pool) Less(i, j int) bool { return p[i].pending < p[j].pending } // : func (p Pool) Len() int { return len(p) } // : func (p Pool) Swap(i, j int) { if i >= 0 && i < len(p) && j >= 0 && j < len(p) { p[i], p[j] = p[j], p[i] p[i].index, p[j].index = i, j } } // : func (p *Pool) Push(x interface{}) { n := len(*p) worker := x.(*Worker) worker.index = n *p = append(*p, worker) } // : func (p *Pool) Pop() interface{} { old := *p n := len(old) item := old[n-1] item.index = -1 *p = old[0 : n-1] return item }
, interface{} - , ( , interface{}). any , . : http://research.swtch.com/interfaces
, heap.* Pool, "" .
"container/heap" :

import ( "container/heap" )
. , , , :

// type Balancer struct { pool Pool // "" done chan *Worker // requests chan string // flowctrl chan bool // PMFC queue int // wg *sync.WaitGroup // }
, init() :

// . func (b *Balancer) init(in chan string) { b.requests = make(chan string) b.flowctrl = make(chan bool) b.done = make(chan *Worker) b.wg = new(sync.WaitGroup) // Flow Control: go func() { for { b.requests <- <-in // <-b.flowctrl // } }() // : heap.Init(&b.pool) for i := 0; i < WORKERS; i++ { w := &Worker{ urls: make(chan string, WORKERSCAP), index: 0, pending: 0, wg: b.wg, } go w.work(b.done) // heap.Push(&b.pool, w) // } }
- "" . , - - !
, : make() , . , - " " ; , .

init() WORKERS WORKERSCAP , :

var ( WORKERS = 5 // WORKERSCAP = 5 // )
. , , ( , . , . , , ) :

// func (b *Balancer) balance(quit chan bool) { lastjobs := false // , for { select { // : case <-quit: // b.wg.Wait() // .. quit <- true //.. case url := <-b.requests: // ( flow controller) if url != ENDMESSAGE { // - ? b.dispatch(url) // , } else { lastjobs = true // } case w := <-b.done: // , b.completed(w) // if lastjobs { if w.pending == 0 { // .. heap.Remove(&b.pool, w.index) // } if len(b.pool) == 0 { // // quit <- true // } } } } }
:

// func (b *Balancer) dispatch(url string) { w := heap.Pop(&b.pool).(*Worker) // .. w.urls <- url //.. . w.pending++ // "".. heap.Push(&b.pool, w) //.. if b.queue++; b.queue < WORKERS*WORKERSCAP { b.flowctrl <- true } }
.. . : , , , .
completed , :

// func (b *Balancer) completed(w *Worker) { w.pending-- heap.Remove(&b.pool, w.index) heap.Push(&b.pool, w) if b.queue--; b.queue == WORKERS*WORKERSCAP-1 { b.flowctrl <- true } }
, , , . : , dispatch , , , - ?
, . - download(), :

// func download(url string) { fileName := IMGDIR + "/" + url[strings.LastIndex(url, "/")+1:] output, err := os.Create(fileName) defer output.Close() response, err := http.Get(url) if err != nil { fmt.Println("Error while downloading", url, "-", err) return } defer response.Body.Close() io.Copy(output, response.Body) }
- - ( ) ( , ):

// : var ( WORKERS = 5 // WORKERSCAP = 5 // ATATASTREAM = 291 //id ATATAPOS = 0 // IMGDIR = "img" // ) // : func init() { flag.IntVar(&WORKERS, "w", WORKERS, " ") flag.IntVar(&ATATASTREAM, "s", ATATASTREAM, "id ") flag.IntVar(&ATATAPOS, "p", ATATAPOS, " ") flag.StringVar(&IMGDIR, "d", IMGDIR, " ") }
- . , , os/signal , ctrl-c :

import ( "io" "os" "os/signal" "sync" "flag" "fmt" "container/heap" "strconv" "strings" "github.com/opesun/goquery" "net/http" )
!


func main() { // flag.Parse() // , if err := os.MkdirAll(IMGDIR, 666); err != nil { panic(err) } // links := make(chan string) quit := make(chan bool) b := new(Balancer) b.init(links) // keys keys := make(chan os.Signal, 1) signal.Notify(keys, os.Interrupt) // go b.balance(quit) go generator(links, ATATASTREAM, ATATAPOS) fmt.Println(" ") // : for { select { case <-keys: // : fmt.Println("CTRL-C: ") quit <- true // case <-quit: // fmt.Println(" !") return } } }
.. ( )



.. . , - ! .

time.Sleep , . , "" .
UPD. - ( 3 744 , 3.07Gb ) .

, .

pastbin.com

PS , #291 () "" 10 , ( "-p=200" ) .

UPD2: PPS "" : - Go, , . , , - + . , : , .

,
vimeo.com/49718712 - Rob Pike - 'Concurrency Is Not Parallelism'
sites.google.com/site/gopatterns - Go Language Patterns
golangtutorials.blogspot.ru/2011/05/table-of-contents.html - GoLang Tutorials
talks.golang.org - Go

, !
 ),      .                 .  ,  ,         ,        . 

, , goquery (http://github.com/opesun/goquery). , :

// atatagrab project main.go package main import ( "fmt" // : "github.com/opesun/goquery" "strconv" ) const ( // : ENDMESSAGE = "TooLateToDieYoung" ) // func generator(out chan string, stream, start int) { for pos := start; ; pos += 20 { // : x, err := goquery.ParseUrl("http://home.atata.com/streams/" + strconv.Itoa(stream) + "?order=date&from=" + strconv.Itoa(pos)) if err == nil { // : for _, url := range x.Find("figure a.image").Attrs("href") { out <- "http://atata.com/" + url } // - .. if len(x.Find("li.last.hide")) > 0 { out <- ENDMESSAGE //.. return } } } }



,
, , ( ). - , , . , , , :

" , - ?"

, . PMFC (Poor Man's Flow Control " ". , ( ) ).

" , , ?"

! - " " " ". ? , :
( ) - . - ( - ?).
. "":

// type Worker struct { urls chan string // pending int // - index int // wg *sync.WaitGroup // }
: Worker , struct - record .
" ?" - ? Go : . , - , , :

- - - . " " ( WaitGroup ) sync . : WaitGroup , , . , .
, Worker'a.

"" work ( , "", this. ):

// func (w *Worker) work(done chan *Worker) { for { url := <-w.urls // w.wg.Add(1) // download(url) // w.wg.Done() // done <- w // } }
sync :

import ( "sync" )
: done , work() . , ( ).
download() - , . .

Worker Pool , , ( Wikipedia: ( ) ), . " container/heap ", ( - ).

: - , :

package main import ("fmt") // type Animal interface { Say() string } type Cat struct{} func (c Cat) Say() string { return "!" } type Parrot struct { name string } func (p Parrot) Say() string { return p.name + " !" } func main() { kitty := Cat{} popka := Parrot{name: ""} animals := []Animal{kitty, popka} for _, animal := range animals { fmt.Println(animal.Say()) } }
, container.Heap ( http://golang.org/pkg/container/heap/#Interface ) :

type Interface interface { sort.Interface Push(x interface{}) // add x as element Len() Pop() interface{} // remove and return element Len() - 1. }
, Push Pop sort.Interface ( http://golang.org/pkg/sort/#Interface ), :

type Interface interface { // Len is the number of elements in the collection. Len() int // Less returns whether the element with index i should sort // before the element with index j. Less(i, j int) bool // Swap swaps the elements with indexes i and j. Swap(i, j int) }
, :

// "": type Pool []*Worker // - : func (p Pool) Less(i, j int) bool { return p[i].pending < p[j].pending } // : func (p Pool) Len() int { return len(p) } // : func (p Pool) Swap(i, j int) { if i >= 0 && i < len(p) && j >= 0 && j < len(p) { p[i], p[j] = p[j], p[i] p[i].index, p[j].index = i, j } } // : func (p *Pool) Push(x interface{}) { n := len(*p) worker := x.(*Worker) worker.index = n *p = append(*p, worker) } // : func (p *Pool) Pop() interface{} { old := *p n := len(old) item := old[n-1] item.index = -1 *p = old[0 : n-1] return item }
, interface{} - , ( , interface{}). any , . : http://research.swtch.com/interfaces
, heap.* Pool, "" .
"container/heap" :

import ( "container/heap" )
. , , , :

// type Balancer struct { pool Pool // "" done chan *Worker // requests chan string // flowctrl chan bool // PMFC queue int // wg *sync.WaitGroup // }
, init() :

// . func (b *Balancer) init(in chan string) { b.requests = make(chan string) b.flowctrl = make(chan bool) b.done = make(chan *Worker) b.wg = new(sync.WaitGroup) // Flow Control: go func() { for { b.requests <- <-in // <-b.flowctrl // } }() // : heap.Init(&b.pool) for i := 0; i < WORKERS; i++ { w := &Worker{ urls: make(chan string, WORKERSCAP), index: 0, pending: 0, wg: b.wg, } go w.work(b.done) // heap.Push(&b.pool, w) // } }
- "" . , - - !
, : make() , . , - " " ; , .

init() WORKERS WORKERSCAP , :

var ( WORKERS = 5 // WORKERSCAP = 5 // )
. , , ( , . , . , , ) :

// func (b *Balancer) balance(quit chan bool) { lastjobs := false // , for { select { // : case <-quit: // b.wg.Wait() // .. quit <- true //.. case url := <-b.requests: // ( flow controller) if url != ENDMESSAGE { // - ? b.dispatch(url) // , } else { lastjobs = true // } case w := <-b.done: // , b.completed(w) // if lastjobs { if w.pending == 0 { // .. heap.Remove(&b.pool, w.index) // } if len(b.pool) == 0 { // // quit <- true // } } } } }
:

// func (b *Balancer) dispatch(url string) { w := heap.Pop(&b.pool).(*Worker) // .. w.urls <- url //.. . w.pending++ // "".. heap.Push(&b.pool, w) //.. if b.queue++; b.queue < WORKERS*WORKERSCAP { b.flowctrl <- true } }
.. . : , , , .
completed , :

// func (b *Balancer) completed(w *Worker) { w.pending-- heap.Remove(&b.pool, w.index) heap.Push(&b.pool, w) if b.queue--; b.queue == WORKERS*WORKERSCAP-1 { b.flowctrl <- true } }
, , , . : , dispatch , , , - ?
, . - download(), :

// func download(url string) { fileName := IMGDIR + "/" + url[strings.LastIndex(url, "/")+1:] output, err := os.Create(fileName) defer output.Close() response, err := http.Get(url) if err != nil { fmt.Println("Error while downloading", url, "-", err) return } defer response.Body.Close() io.Copy(output, response.Body) }
- - ( ) ( , ):

// : var ( WORKERS = 5 // WORKERSCAP = 5 // ATATASTREAM = 291 //id ATATAPOS = 0 // IMGDIR = "img" // ) // : func init() { flag.IntVar(&WORKERS, "w", WORKERS, " ") flag.IntVar(&ATATASTREAM, "s", ATATASTREAM, "id ") flag.IntVar(&ATATAPOS, "p", ATATAPOS, " ") flag.StringVar(&IMGDIR, "d", IMGDIR, " ") }
- . , , os/signal , ctrl-c :

import ( "io" "os" "os/signal" "sync" "flag" "fmt" "container/heap" "strconv" "strings" "github.com/opesun/goquery" "net/http" )
!


func main() { // flag.Parse() // , if err := os.MkdirAll(IMGDIR, 666); err != nil { panic(err) } // links := make(chan string) quit := make(chan bool) b := new(Balancer) b.init(links) // keys keys := make(chan os.Signal, 1) signal.Notify(keys, os.Interrupt) // go b.balance(quit) go generator(links, ATATASTREAM, ATATAPOS) fmt.Println(" ") // : for { select { case <-keys: // : fmt.Println("CTRL-C: ") quit <- true // case <-quit: // fmt.Println(" !") return } } }
.. ( )



.. . , - ! .

time.Sleep , . , "" .
UPD. - ( 3 744 , 3.07Gb ) .

, .

pastbin.com

PS , #291 () "" 10 , ( "-p=200" ) .

UPD2: PPS "" : - Go, , . , , - + . , : , .

,
vimeo.com/49718712 - Rob Pike - 'Concurrency Is Not Parallelism'
sites.google.com/site/gopatterns - Go Language Patterns
golangtutorials.blogspot.ru/2011/05/table-of-contents.html - GoLang Tutorials
talks.golang.org - Go

, !
), . . , , , .

, , goquery (http://github.com/opesun/goquery). , :

// atatagrab project main.go package main import ( "fmt" // : "github.com/opesun/goquery" "strconv" ) const ( // : ENDMESSAGE = "TooLateToDieYoung" ) // func generator(out chan string, stream, start int) { for pos := start; ; pos += 20 { // : x, err := goquery.ParseUrl("http://home.atata.com/streams/" + strconv.Itoa(stream) + "?order=date&from=" + strconv.Itoa(pos)) if err == nil { // : for _, url := range x.Find("figure a.image").Attrs("href") { out <- "http://atata.com/" + url } // - .. if len(x.Find("li.last.hide")) > 0 { out <- ENDMESSAGE //.. return } } } }



,
, , ( ). - , , . , , , :

" , - ?"

, . PMFC (Poor Man's Flow Control " ". , ( ) ).

" , , ?"

! - " " " ". ? , :
( ) - . - ( - ?).
. "":

// type Worker struct { urls chan string // pending int // - index int // wg *sync.WaitGroup // }
: Worker , struct - record .
" ?" - ? Go : . , - , , :

- - - . " " ( WaitGroup ) sync . : WaitGroup , , . , .
, Worker'a.

"" work ( , "", this. ):

// func (w *Worker) work(done chan *Worker) { for { url := <-w.urls // w.wg.Add(1) // download(url) // w.wg.Done() // done <- w // } }
sync :

import ( "sync" )
: done , work() . , ( ).
download() - , . .

Worker Pool , , ( Wikipedia: ( ) ), . " container/heap ", ( - ).

: - , :

package main import ("fmt") // type Animal interface { Say() string } type Cat struct{} func (c Cat) Say() string { return "!" } type Parrot struct { name string } func (p Parrot) Say() string { return p.name + " !" } func main() { kitty := Cat{} popka := Parrot{name: ""} animals := []Animal{kitty, popka} for _, animal := range animals { fmt.Println(animal.Say()) } }
, container.Heap ( http://golang.org/pkg/container/heap/#Interface ) :

type Interface interface { sort.Interface Push(x interface{}) // add x as element Len() Pop() interface{} // remove and return element Len() - 1. }
, Push Pop sort.Interface ( http://golang.org/pkg/sort/#Interface ), :

type Interface interface { // Len is the number of elements in the collection. Len() int // Less returns whether the element with index i should sort // before the element with index j. Less(i, j int) bool // Swap swaps the elements with indexes i and j. Swap(i, j int) }
, :

// "": type Pool []*Worker // - : func (p Pool) Less(i, j int) bool { return p[i].pending < p[j].pending } // : func (p Pool) Len() int { return len(p) } // : func (p Pool) Swap(i, j int) { if i >= 0 && i < len(p) && j >= 0 && j < len(p) { p[i], p[j] = p[j], p[i] p[i].index, p[j].index = i, j } } // : func (p *Pool) Push(x interface{}) { n := len(*p) worker := x.(*Worker) worker.index = n *p = append(*p, worker) } // : func (p *Pool) Pop() interface{} { old := *p n := len(old) item := old[n-1] item.index = -1 *p = old[0 : n-1] return item }
, interface{} - , ( , interface{}). any , . : http://research.swtch.com/interfaces
, heap.* Pool, "" .
"container/heap" :

import ( "container/heap" )
. , , , :

// type Balancer struct { pool Pool // "" done chan *Worker // requests chan string // flowctrl chan bool // PMFC queue int // wg *sync.WaitGroup // }
, init() :

// . func (b *Balancer) init(in chan string) { b.requests = make(chan string) b.flowctrl = make(chan bool) b.done = make(chan *Worker) b.wg = new(sync.WaitGroup) // Flow Control: go func() { for { b.requests <- <-in // <-b.flowctrl // } }() // : heap.Init(&b.pool) for i := 0; i < WORKERS; i++ { w := &Worker{ urls: make(chan string, WORKERSCAP), index: 0, pending: 0, wg: b.wg, } go w.work(b.done) // heap.Push(&b.pool, w) // } }
- "" . , - - !
, : make() , . , - " " ; , .

init() WORKERS WORKERSCAP , :

var ( WORKERS = 5 // WORKERSCAP = 5 // )
. , , ( , . , . , , ) :

// func (b *Balancer) balance(quit chan bool) { lastjobs := false // , for { select { // : case <-quit: // b.wg.Wait() // .. quit <- true //.. case url := <-b.requests: // ( flow controller) if url != ENDMESSAGE { // - ? b.dispatch(url) // , } else { lastjobs = true // } case w := <-b.done: // , b.completed(w) // if lastjobs { if w.pending == 0 { // .. heap.Remove(&b.pool, w.index) // } if len(b.pool) == 0 { // // quit <- true // } } } } }
:

// func (b *Balancer) dispatch(url string) { w := heap.Pop(&b.pool).(*Worker) // .. w.urls <- url //.. . w.pending++ // "".. heap.Push(&b.pool, w) //.. if b.queue++; b.queue < WORKERS*WORKERSCAP { b.flowctrl <- true } }
.. . : , , , .
completed , :

// func (b *Balancer) completed(w *Worker) { w.pending-- heap.Remove(&b.pool, w.index) heap.Push(&b.pool, w) if b.queue--; b.queue == WORKERS*WORKERSCAP-1 { b.flowctrl <- true } }
, , , . : , dispatch , , , - ?
, . - download(), :

// func download(url string) { fileName := IMGDIR + "/" + url[strings.LastIndex(url, "/")+1:] output, err := os.Create(fileName) defer output.Close() response, err := http.Get(url) if err != nil { fmt.Println("Error while downloading", url, "-", err) return } defer response.Body.Close() io.Copy(output, response.Body) }
- - ( ) ( , ):

// : var ( WORKERS = 5 // WORKERSCAP = 5 // ATATASTREAM = 291 //id ATATAPOS = 0 // IMGDIR = "img" // ) // : func init() { flag.IntVar(&WORKERS, "w", WORKERS, " ") flag.IntVar(&ATATASTREAM, "s", ATATASTREAM, "id ") flag.IntVar(&ATATAPOS, "p", ATATAPOS, " ") flag.StringVar(&IMGDIR, "d", IMGDIR, " ") }
- . , , os/signal , ctrl-c :

import ( "io" "os" "os/signal" "sync" "flag" "fmt" "container/heap" "strconv" "strings" "github.com/opesun/goquery" "net/http" )
!


func main() { // flag.Parse() // , if err := os.MkdirAll(IMGDIR, 666); err != nil { panic(err) } // links := make(chan string) quit := make(chan bool) b := new(Balancer) b.init(links) // keys keys := make(chan os.Signal, 1) signal.Notify(keys, os.Interrupt) // go b.balance(quit) go generator(links, ATATASTREAM, ATATAPOS) fmt.Println(" ") // : for { select { case <-keys: // : fmt.Println("CTRL-C: ") quit <- true // case <-quit: // fmt.Println(" !") return } } }
.. ( )



.. . , - ! .

time.Sleep , . , "" .
UPD. - ( 3 744 , 3.07Gb ) .

, .

pastbin.com

PS , #291 () "" 10 , ( "-p=200" ) .

UPD2: PPS "" : - Go, , . , , - + . , : , .

,
vimeo.com/49718712 - Rob Pike - 'Concurrency Is Not Parallelism'
sites.google.com/site/gopatterns - Go Language Patterns
golangtutorials.blogspot.ru/2011/05/table-of-contents.html - GoLang Tutorials
talks.golang.org - Go

, !
 ),      .                 .  ,  ,         ,        . 

, , goquery (http://github.com/opesun/goquery). , :

// atatagrab project main.go package main import ( "fmt" // : "github.com/opesun/goquery" "strconv" ) const ( // : ENDMESSAGE = "TooLateToDieYoung" ) // func generator(out chan string, stream, start int) { for pos := start; ; pos += 20 { // : x, err := goquery.ParseUrl("http://home.atata.com/streams/" + strconv.Itoa(stream) + "?order=date&from=" + strconv.Itoa(pos)) if err == nil { // : for _, url := range x.Find("figure a.image").Attrs("href") { out <- "http://atata.com/" + url } // - .. if len(x.Find("li.last.hide")) > 0 { out <- ENDMESSAGE //.. return } } } }



,
, , ( ). - , , . , , , :

" , - ?"

, . PMFC (Poor Man's Flow Control " ". , ( ) ).

" , , ?"

! - " " " ". ? , :
( ) - . - ( - ?).
. "":

// type Worker struct { urls chan string // pending int // - index int // wg *sync.WaitGroup // }
: Worker , struct - record .
" ?" - ? Go : . , - , , :

- - - . " " ( WaitGroup ) sync . : WaitGroup , , . , .
, Worker'a.

"" work ( , "", this. ):

// func (w *Worker) work(done chan *Worker) { for { url := <-w.urls // w.wg.Add(1) // download(url) // w.wg.Done() // done <- w // } }
sync :

import ( "sync" )
: done , work() . , ( ).
download() - , . .

Worker Pool , , ( Wikipedia: ( ) ), . " container/heap ", ( - ).

: - , :

package main import ("fmt") // type Animal interface { Say() string } type Cat struct{} func (c Cat) Say() string { return "!" } type Parrot struct { name string } func (p Parrot) Say() string { return p.name + " !" } func main() { kitty := Cat{} popka := Parrot{name: ""} animals := []Animal{kitty, popka} for _, animal := range animals { fmt.Println(animal.Say()) } }
, container.Heap ( http://golang.org/pkg/container/heap/#Interface ) :

type Interface interface { sort.Interface Push(x interface{}) // add x as element Len() Pop() interface{} // remove and return element Len() - 1. }
, Push Pop sort.Interface ( http://golang.org/pkg/sort/#Interface ), :

type Interface interface { // Len is the number of elements in the collection. Len() int // Less returns whether the element with index i should sort // before the element with index j. Less(i, j int) bool // Swap swaps the elements with indexes i and j. Swap(i, j int) }
, :

// "": type Pool []*Worker // - : func (p Pool) Less(i, j int) bool { return p[i].pending < p[j].pending } // : func (p Pool) Len() int { return len(p) } // : func (p Pool) Swap(i, j int) { if i >= 0 && i < len(p) && j >= 0 && j < len(p) { p[i], p[j] = p[j], p[i] p[i].index, p[j].index = i, j } } // : func (p *Pool) Push(x interface{}) { n := len(*p) worker := x.(*Worker) worker.index = n *p = append(*p, worker) } // : func (p *Pool) Pop() interface{} { old := *p n := len(old) item := old[n-1] item.index = -1 *p = old[0 : n-1] return item }
, interface{} - , ( , interface{}). any , . : http://research.swtch.com/interfaces
, heap.* Pool, "" .
"container/heap" :

import ( "container/heap" )
. , , , :

// type Balancer struct { pool Pool // "" done chan *Worker // requests chan string // flowctrl chan bool // PMFC queue int // wg *sync.WaitGroup // }
, init() :

// . func (b *Balancer) init(in chan string) { b.requests = make(chan string) b.flowctrl = make(chan bool) b.done = make(chan *Worker) b.wg = new(sync.WaitGroup) // Flow Control: go func() { for { b.requests <- <-in // <-b.flowctrl // } }() // : heap.Init(&b.pool) for i := 0; i < WORKERS; i++ { w := &Worker{ urls: make(chan string, WORKERSCAP), index: 0, pending: 0, wg: b.wg, } go w.work(b.done) // heap.Push(&b.pool, w) // } }
- "" . , - - !
, : make() , . , - " " ; , .

init() WORKERS WORKERSCAP , :

var ( WORKERS = 5 // WORKERSCAP = 5 // )
. , , ( , . , . , , ) :

// func (b *Balancer) balance(quit chan bool) { lastjobs := false // , for { select { // : case <-quit: // b.wg.Wait() // .. quit <- true //.. case url := <-b.requests: // ( flow controller) if url != ENDMESSAGE { // - ? b.dispatch(url) // , } else { lastjobs = true // } case w := <-b.done: // , b.completed(w) // if lastjobs { if w.pending == 0 { // .. heap.Remove(&b.pool, w.index) // } if len(b.pool) == 0 { // // quit <- true // } } } } }
:

// func (b *Balancer) dispatch(url string) { w := heap.Pop(&b.pool).(*Worker) // .. w.urls <- url //.. . w.pending++ // "".. heap.Push(&b.pool, w) //.. if b.queue++; b.queue < WORKERS*WORKERSCAP { b.flowctrl <- true } }
.. . : , , , .
completed , :

// func (b *Balancer) completed(w *Worker) { w.pending-- heap.Remove(&b.pool, w.index) heap.Push(&b.pool, w) if b.queue--; b.queue == WORKERS*WORKERSCAP-1 { b.flowctrl <- true } }
, , , . : , dispatch , , , - ?
, . - download(), :

// func download(url string) { fileName := IMGDIR + "/" + url[strings.LastIndex(url, "/")+1:] output, err := os.Create(fileName) defer output.Close() response, err := http.Get(url) if err != nil { fmt.Println("Error while downloading", url, "-", err) return } defer response.Body.Close() io.Copy(output, response.Body) }
- - ( ) ( , ):

// : var ( WORKERS = 5 // WORKERSCAP = 5 // ATATASTREAM = 291 //id ATATAPOS = 0 // IMGDIR = "img" // ) // : func init() { flag.IntVar(&WORKERS, "w", WORKERS, " ") flag.IntVar(&ATATASTREAM, "s", ATATASTREAM, "id ") flag.IntVar(&ATATAPOS, "p", ATATAPOS, " ") flag.StringVar(&IMGDIR, "d", IMGDIR, " ") }
- . , , os/signal , ctrl-c :

import ( "io" "os" "os/signal" "sync" "flag" "fmt" "container/heap" "strconv" "strings" "github.com/opesun/goquery" "net/http" )
!


func main() { // flag.Parse() // , if err := os.MkdirAll(IMGDIR, 666); err != nil { panic(err) } // links := make(chan string) quit := make(chan bool) b := new(Balancer) b.init(links) // keys keys := make(chan os.Signal, 1) signal.Notify(keys, os.Interrupt) // go b.balance(quit) go generator(links, ATATASTREAM, ATATAPOS) fmt.Println(" ") // : for { select { case <-keys: // : fmt.Println("CTRL-C: ") quit <- true // case <-quit: // fmt.Println(" !") return } } }
.. ( )



.. . , - ! .

time.Sleep , . , "" .
UPD. - ( 3 744 , 3.07Gb ) .

, .

pastbin.com

PS , #291 () "" 10 , ( "-p=200" ) .

UPD2: PPS "" : - Go, , . , , - + . , : , .

,
vimeo.com/49718712 - Rob Pike - 'Concurrency Is Not Parallelism'
sites.google.com/site/gopatterns - Go Language Patterns
golangtutorials.blogspot.ru/2011/05/table-of-contents.html - GoLang Tutorials
talks.golang.org - Go

, !
), . . , , , .

, , goquery (http://github.com/opesun/goquery). , :

// atatagrab project main.go package main import ( "fmt" // : "github.com/opesun/goquery" "strconv" ) const ( // : ENDMESSAGE = "TooLateToDieYoung" ) // func generator(out chan string, stream, start int) { for pos := start; ; pos += 20 { // : x, err := goquery.ParseUrl("http://home.atata.com/streams/" + strconv.Itoa(stream) + "?order=date&from=" + strconv.Itoa(pos)) if err == nil { // : for _, url := range x.Find("figure a.image").Attrs("href") { out <- "http://atata.com/" + url } // - .. if len(x.Find("li.last.hide")) > 0 { out <- ENDMESSAGE //.. return } } } }



,
, , ( ). - , , . , , , :

" , - ?"

, . PMFC (Poor Man's Flow Control " ". , ( ) ).

" , , ?"

! - " " " ". ? , :
( ) - . - ( - ?).
. "":

// type Worker struct { urls chan string // pending int // - index int // wg *sync.WaitGroup // }
: Worker , struct - record .
" ?" - ? Go : . , - , , :

- - - . " " ( WaitGroup ) sync . : WaitGroup , , . , .
, Worker'a.

"" work ( , "", this. ):

// func (w *Worker) work(done chan *Worker) { for { url := <-w.urls // w.wg.Add(1) // download(url) // w.wg.Done() // done <- w // } }
sync :

import ( "sync" )
: done , work() . , ( ).
download() - , . .

Worker Pool , , ( Wikipedia: ( ) ), . " container/heap ", ( - ).

: - , :

package main import ("fmt") // type Animal interface { Say() string } type Cat struct{} func (c Cat) Say() string { return "!" } type Parrot struct { name string } func (p Parrot) Say() string { return p.name + " !" } func main() { kitty := Cat{} popka := Parrot{name: ""} animals := []Animal{kitty, popka} for _, animal := range animals { fmt.Println(animal.Say()) } }
, container.Heap ( http://golang.org/pkg/container/heap/#Interface ) :

type Interface interface { sort.Interface Push(x interface{}) // add x as element Len() Pop() interface{} // remove and return element Len() - 1. }
, Push Pop sort.Interface ( http://golang.org/pkg/sort/#Interface ), :

type Interface interface { // Len is the number of elements in the collection. Len() int // Less returns whether the element with index i should sort // before the element with index j. Less(i, j int) bool // Swap swaps the elements with indexes i and j. Swap(i, j int) }
, :

// "": type Pool []*Worker // - : func (p Pool) Less(i, j int) bool { return p[i].pending < p[j].pending } // : func (p Pool) Len() int { return len(p) } // : func (p Pool) Swap(i, j int) { if i >= 0 && i < len(p) && j >= 0 && j < len(p) { p[i], p[j] = p[j], p[i] p[i].index, p[j].index = i, j } } // : func (p *Pool) Push(x interface{}) { n := len(*p) worker := x.(*Worker) worker.index = n *p = append(*p, worker) } // : func (p *Pool) Pop() interface{} { old := *p n := len(old) item := old[n-1] item.index = -1 *p = old[0 : n-1] return item }
, interface{} - , ( , interface{}). any , . : http://research.swtch.com/interfaces
, heap.* Pool, "" .
"container/heap" :

import ( "container/heap" )
. , , , :

// type Balancer struct { pool Pool // "" done chan *Worker // requests chan string // flowctrl chan bool // PMFC queue int // wg *sync.WaitGroup // }
, init() :

// . func (b *Balancer) init(in chan string) { b.requests = make(chan string) b.flowctrl = make(chan bool) b.done = make(chan *Worker) b.wg = new(sync.WaitGroup) // Flow Control: go func() { for { b.requests <- <-in // <-b.flowctrl // } }() // : heap.Init(&b.pool) for i := 0; i < WORKERS; i++ { w := &Worker{ urls: make(chan string, WORKERSCAP), index: 0, pending: 0, wg: b.wg, } go w.work(b.done) // heap.Push(&b.pool, w) // } }
- "" . , - - !
, : make() , . , - " " ; , .

init() WORKERS WORKERSCAP , :

var ( WORKERS = 5 // WORKERSCAP = 5 // )
. , , ( , . , . , , ) :

// func (b *Balancer) balance(quit chan bool) { lastjobs := false // , for { select { // : case <-quit: // b.wg.Wait() // .. quit <- true //.. case url := <-b.requests: // ( flow controller) if url != ENDMESSAGE { // - ? b.dispatch(url) // , } else { lastjobs = true // } case w := <-b.done: // , b.completed(w) // if lastjobs { if w.pending == 0 { // .. heap.Remove(&b.pool, w.index) // } if len(b.pool) == 0 { // // quit <- true // } } } } }
:

// func (b *Balancer) dispatch(url string) { w := heap.Pop(&b.pool).(*Worker) // .. w.urls <- url //.. . w.pending++ // "".. heap.Push(&b.pool, w) //.. if b.queue++; b.queue < WORKERS*WORKERSCAP { b.flowctrl <- true } }
.. . : , , , .
completed , :

// func (b *Balancer) completed(w *Worker) { w.pending-- heap.Remove(&b.pool, w.index) heap.Push(&b.pool, w) if b.queue--; b.queue == WORKERS*WORKERSCAP-1 { b.flowctrl <- true } }
, , , . : , dispatch , , , - ?
, . - download(), :

// func download(url string) { fileName := IMGDIR + "/" + url[strings.LastIndex(url, "/")+1:] output, err := os.Create(fileName) defer output.Close() response, err := http.Get(url) if err != nil { fmt.Println("Error while downloading", url, "-", err) return } defer response.Body.Close() io.Copy(output, response.Body) }
- - ( ) ( , ):

// : var ( WORKERS = 5 // WORKERSCAP = 5 // ATATASTREAM = 291 //id ATATAPOS = 0 // IMGDIR = "img" // ) // : func init() { flag.IntVar(&WORKERS, "w", WORKERS, " ") flag.IntVar(&ATATASTREAM, "s", ATATASTREAM, "id ") flag.IntVar(&ATATAPOS, "p", ATATAPOS, " ") flag.StringVar(&IMGDIR, "d", IMGDIR, " ") }
- . , , os/signal , ctrl-c :

import ( "io" "os" "os/signal" "sync" "flag" "fmt" "container/heap" "strconv" "strings" "github.com/opesun/goquery" "net/http" )
!


func main() { // flag.Parse() // , if err := os.MkdirAll(IMGDIR, 666); err != nil { panic(err) } // links := make(chan string) quit := make(chan bool) b := new(Balancer) b.init(links) // keys keys := make(chan os.Signal, 1) signal.Notify(keys, os.Interrupt) // go b.balance(quit) go generator(links, ATATASTREAM, ATATAPOS) fmt.Println(" ") // : for { select { case <-keys: // : fmt.Println("CTRL-C: ") quit <- true // case <-quit: // fmt.Println(" !") return } } }
.. ( )



.. . , - ! .

time.Sleep , . , "" .
UPD. - ( 3 744 , 3.07Gb ) .

, .

pastbin.com

PS , #291 () "" 10 , ( "-p=200" ) .

UPD2: PPS "" : - Go, , . , , - + . , : , .

,
vimeo.com/49718712 - Rob Pike - 'Concurrency Is Not Parallelism'
sites.google.com/site/gopatterns - Go Language Patterns
golangtutorials.blogspot.ru/2011/05/table-of-contents.html - GoLang Tutorials
talks.golang.org - Go

, !

), . . , , , .

, , goquery (http://github.com/opesun/goquery). , :

// atatagrab project main.go package main import ( "fmt" // : "github.com/opesun/goquery" "strconv" ) const ( // : ENDMESSAGE = "TooLateToDieYoung" ) // func generator(out chan string, stream, start int) { for pos := start; ; pos += 20 { // : x, err := goquery.ParseUrl("http://home.atata.com/streams/" + strconv.Itoa(stream) + "?order=date&from=" + strconv.Itoa(pos)) if err == nil { // : for _, url := range x.Find("figure a.image").Attrs("href") { out <- "http://atata.com/" + url } // - .. if len(x.Find("li.last.hide")) > 0 { out <- ENDMESSAGE //.. return } } } }



,
, , ( ). - , , . , , , :

" , - ?"

, . PMFC (Poor Man's Flow Control " ". , ( ) ).

" , , ?"

! - " " " ". ? , :
( ) - . - ( - ?).
. "":

// type Worker struct { urls chan string // pending int // - index int // wg *sync.WaitGroup // }
: Worker , struct - record .
" ?" - ? Go : . , - , , :

- - - . " " ( WaitGroup ) sync . : WaitGroup , , . , .
, Worker'a.

"" work ( , "", this. ):

// func (w *Worker) work(done chan *Worker) { for { url := <-w.urls // w.wg.Add(1) // download(url) // w.wg.Done() // done <- w // } }
sync :

import ( "sync" )
: done , work() . , ( ).
download() - , . .

Worker Pool , , ( Wikipedia: ( ) ), . " container/heap ", ( - ).

: - , :

package main import ("fmt") // type Animal interface { Say() string } type Cat struct{} func (c Cat) Say() string { return "!" } type Parrot struct { name string } func (p Parrot) Say() string { return p.name + " !" } func main() { kitty := Cat{} popka := Parrot{name: ""} animals := []Animal{kitty, popka} for _, animal := range animals { fmt.Println(animal.Say()) } }
, container.Heap ( http://golang.org/pkg/container/heap/#Interface ) :

type Interface interface { sort.Interface Push(x interface{}) // add x as element Len() Pop() interface{} // remove and return element Len() - 1. }
, Push Pop sort.Interface ( http://golang.org/pkg/sort/#Interface ), :

type Interface interface { // Len is the number of elements in the collection. Len() int // Less returns whether the element with index i should sort // before the element with index j. Less(i, j int) bool // Swap swaps the elements with indexes i and j. Swap(i, j int) }
, :

// "": type Pool []*Worker // - : func (p Pool) Less(i, j int) bool { return p[i].pending < p[j].pending } // : func (p Pool) Len() int { return len(p) } // : func (p Pool) Swap(i, j int) { if i >= 0 && i < len(p) && j >= 0 && j < len(p) { p[i], p[j] = p[j], p[i] p[i].index, p[j].index = i, j } } // : func (p *Pool) Push(x interface{}) { n := len(*p) worker := x.(*Worker) worker.index = n *p = append(*p, worker) } // : func (p *Pool) Pop() interface{} { old := *p n := len(old) item := old[n-1] item.index = -1 *p = old[0 : n-1] return item }
, interface{} - , ( , interface{}). any , . : http://research.swtch.com/interfaces
, heap.* Pool, "" .
"container/heap" :

import ( "container/heap" )
. , , , :

// type Balancer struct { pool Pool // "" done chan *Worker // requests chan string // flowctrl chan bool // PMFC queue int // wg *sync.WaitGroup // }
, init() :

// . func (b *Balancer) init(in chan string) { b.requests = make(chan string) b.flowctrl = make(chan bool) b.done = make(chan *Worker) b.wg = new(sync.WaitGroup) // Flow Control: go func() { for { b.requests <- <-in // <-b.flowctrl // } }() // : heap.Init(&b.pool) for i := 0; i < WORKERS; i++ { w := &Worker{ urls: make(chan string, WORKERSCAP), index: 0, pending: 0, wg: b.wg, } go w.work(b.done) // heap.Push(&b.pool, w) // } }
- "" . , - - !
, : make() , . , - " " ; , .

init() WORKERS WORKERSCAP , :

var ( WORKERS = 5 // WORKERSCAP = 5 // )
. , , ( , . , . , , ) :

// func (b *Balancer) balance(quit chan bool) { lastjobs := false // , for { select { // : case <-quit: // b.wg.Wait() // .. quit <- true //.. case url := <-b.requests: // ( flow controller) if url != ENDMESSAGE { // - ? b.dispatch(url) // , } else { lastjobs = true // } case w := <-b.done: // , b.completed(w) // if lastjobs { if w.pending == 0 { // .. heap.Remove(&b.pool, w.index) // } if len(b.pool) == 0 { // // quit <- true // } } } } }
:

// func (b *Balancer) dispatch(url string) { w := heap.Pop(&b.pool).(*Worker) // .. w.urls <- url //.. . w.pending++ // "".. heap.Push(&b.pool, w) //.. if b.queue++; b.queue < WORKERS*WORKERSCAP { b.flowctrl <- true } }
.. . : , , , .
completed , :

// func (b *Balancer) completed(w *Worker) { w.pending-- heap.Remove(&b.pool, w.index) heap.Push(&b.pool, w) if b.queue--; b.queue == WORKERS*WORKERSCAP-1 { b.flowctrl <- true } }
, , , . : , dispatch , , , - ?
, . - download(), :

// func download(url string) { fileName := IMGDIR + "/" + url[strings.LastIndex(url, "/")+1:] output, err := os.Create(fileName) defer output.Close() response, err := http.Get(url) if err != nil { fmt.Println("Error while downloading", url, "-", err) return } defer response.Body.Close() io.Copy(output, response.Body) }
- - ( ) ( , ):

// : var ( WORKERS = 5 // WORKERSCAP = 5 // ATATASTREAM = 291 //id ATATAPOS = 0 // IMGDIR = "img" // ) // : func init() { flag.IntVar(&WORKERS, "w", WORKERS, " ") flag.IntVar(&ATATASTREAM, "s", ATATASTREAM, "id ") flag.IntVar(&ATATAPOS, "p", ATATAPOS, " ") flag.StringVar(&IMGDIR, "d", IMGDIR, " ") }
- . , , os/signal , ctrl-c :

import ( "io" "os" "os/signal" "sync" "flag" "fmt" "container/heap" "strconv" "strings" "github.com/opesun/goquery" "net/http" )
!


func main() { // flag.Parse() // , if err := os.MkdirAll(IMGDIR, 666); err != nil { panic(err) } // links := make(chan string) quit := make(chan bool) b := new(Balancer) b.init(links) // keys keys := make(chan os.Signal, 1) signal.Notify(keys, os.Interrupt) // go b.balance(quit) go generator(links, ATATASTREAM, ATATAPOS) fmt.Println(" ") // : for { select { case <-keys: // : fmt.Println("CTRL-C: ") quit <- true // case <-quit: // fmt.Println(" !") return } } }
.. ( )



.. . , - ! .

time.Sleep , . , "" .
UPD. - ( 3 744 , 3.07Gb ) .

, .

pastbin.com

PS , #291 () "" 10 , ( "-p=200" ) .

UPD2: PPS "" : - Go, , . , , - + . , : , .

,
vimeo.com/49718712 - Rob Pike - 'Concurrency Is Not Parallelism'
sites.google.com/site/gopatterns - Go Language Patterns
golangtutorials.blogspot.ru/2011/05/table-of-contents.html - GoLang Tutorials
talks.golang.org - Go

, !

), . . , , , .

, , goquery (http://github.com/opesun/goquery). , :

// atatagrab project main.go package main import ( "fmt" // : "github.com/opesun/goquery" "strconv" ) const ( // : ENDMESSAGE = "TooLateToDieYoung" ) // func generator(out chan string, stream, start int) { for pos := start; ; pos += 20 { // : x, err := goquery.ParseUrl("http://home.atata.com/streams/" + strconv.Itoa(stream) + "?order=date&from=" + strconv.Itoa(pos)) if err == nil { // : for _, url := range x.Find("figure a.image").Attrs("href") { out <- "http://atata.com/" + url } // - .. if len(x.Find("li.last.hide")) > 0 { out <- ENDMESSAGE //.. return } } } }



,
, , ( ). - , , . , , , :

" , - ?"

, . PMFC (Poor Man's Flow Control " ". , ( ) ).

" , , ?"

! - " " " ". ? , :
( ) - . - ( - ?).
. "":

// type Worker struct { urls chan string // pending int // - index int // wg *sync.WaitGroup // }
: Worker , struct - record .
" ?" - ? Go : . , - , , :

- - - . " " ( WaitGroup ) sync . : WaitGroup , , . , .
, Worker'a.

"" work ( , "", this. ):

// func (w *Worker) work(done chan *Worker) { for { url := <-w.urls // w.wg.Add(1) // download(url) // w.wg.Done() // done <- w // } }
sync :

import ( "sync" )
: done , work() . , ( ).
download() - , . .

Worker Pool , , ( Wikipedia: ( ) ), . " container/heap ", ( - ).

: - , :

package main import ("fmt") // type Animal interface { Say() string } type Cat struct{} func (c Cat) Say() string { return "!" } type Parrot struct { name string } func (p Parrot) Say() string { return p.name + " !" } func main() { kitty := Cat{} popka := Parrot{name: ""} animals := []Animal{kitty, popka} for _, animal := range animals { fmt.Println(animal.Say()) } }
, container.Heap ( http://golang.org/pkg/container/heap/#Interface ) :

type Interface interface { sort.Interface Push(x interface{}) // add x as element Len() Pop() interface{} // remove and return element Len() - 1. }
, Push Pop sort.Interface ( http://golang.org/pkg/sort/#Interface ), :

type Interface interface { // Len is the number of elements in the collection. Len() int // Less returns whether the element with index i should sort // before the element with index j. Less(i, j int) bool // Swap swaps the elements with indexes i and j. Swap(i, j int) }
, :

// "": type Pool []*Worker // - : func (p Pool) Less(i, j int) bool { return p[i].pending < p[j].pending } // : func (p Pool) Len() int { return len(p) } // : func (p Pool) Swap(i, j int) { if i >= 0 && i < len(p) && j >= 0 && j < len(p) { p[i], p[j] = p[j], p[i] p[i].index, p[j].index = i, j } } // : func (p *Pool) Push(x interface{}) { n := len(*p) worker := x.(*Worker) worker.index = n *p = append(*p, worker) } // : func (p *Pool) Pop() interface{} { old := *p n := len(old) item := old[n-1] item.index = -1 *p = old[0 : n-1] return item }
, interface{} - , ( , interface{}). any , . : http://research.swtch.com/interfaces
, heap.* Pool, "" .
"container/heap" :

import ( "container/heap" )
. , , , :

// type Balancer struct { pool Pool // "" done chan *Worker // requests chan string // flowctrl chan bool // PMFC queue int // wg *sync.WaitGroup // }
, init() :

// . func (b *Balancer) init(in chan string) { b.requests = make(chan string) b.flowctrl = make(chan bool) b.done = make(chan *Worker) b.wg = new(sync.WaitGroup) // Flow Control: go func() { for { b.requests <- <-in // <-b.flowctrl // } }() // : heap.Init(&b.pool) for i := 0; i < WORKERS; i++ { w := &Worker{ urls: make(chan string, WORKERSCAP), index: 0, pending: 0, wg: b.wg, } go w.work(b.done) // heap.Push(&b.pool, w) // } }
- "" . , - - !
, : make() , . , - " " ; , .

init() WORKERS WORKERSCAP , :

var ( WORKERS = 5 // WORKERSCAP = 5 // )
. , , ( , . , . , , ) :

// func (b *Balancer) balance(quit chan bool) { lastjobs := false // , for { select { // : case <-quit: // b.wg.Wait() // .. quit <- true //.. case url := <-b.requests: // ( flow controller) if url != ENDMESSAGE { // - ? b.dispatch(url) // , } else { lastjobs = true // } case w := <-b.done: // , b.completed(w) // if lastjobs { if w.pending == 0 { // .. heap.Remove(&b.pool, w.index) // } if len(b.pool) == 0 { // // quit <- true // } } } } }
:

// func (b *Balancer) dispatch(url string) { w := heap.Pop(&b.pool).(*Worker) // .. w.urls <- url //.. . w.pending++ // "".. heap.Push(&b.pool, w) //.. if b.queue++; b.queue < WORKERS*WORKERSCAP { b.flowctrl <- true } }
.. . : , , , .
completed , :

// func (b *Balancer) completed(w *Worker) { w.pending-- heap.Remove(&b.pool, w.index) heap.Push(&b.pool, w) if b.queue--; b.queue == WORKERS*WORKERSCAP-1 { b.flowctrl <- true } }
, , , . : , dispatch , , , - ?
, . - download(), :

// func download(url string) { fileName := IMGDIR + "/" + url[strings.LastIndex(url, "/")+1:] output, err := os.Create(fileName) defer output.Close() response, err := http.Get(url) if err != nil { fmt.Println("Error while downloading", url, "-", err) return } defer response.Body.Close() io.Copy(output, response.Body) }
- - ( ) ( , ):

// : var ( WORKERS = 5 // WORKERSCAP = 5 // ATATASTREAM = 291 //id ATATAPOS = 0 // IMGDIR = "img" // ) // : func init() { flag.IntVar(&WORKERS, "w", WORKERS, " ") flag.IntVar(&ATATASTREAM, "s", ATATASTREAM, "id ") flag.IntVar(&ATATAPOS, "p", ATATAPOS, " ") flag.StringVar(&IMGDIR, "d", IMGDIR, " ") }
- . , , os/signal , ctrl-c :

import ( "io" "os" "os/signal" "sync" "flag" "fmt" "container/heap" "strconv" "strings" "github.com/opesun/goquery" "net/http" )
!


func main() { // flag.Parse() // , if err := os.MkdirAll(IMGDIR, 666); err != nil { panic(err) } // links := make(chan string) quit := make(chan bool) b := new(Balancer) b.init(links) // keys keys := make(chan os.Signal, 1) signal.Notify(keys, os.Interrupt) // go b.balance(quit) go generator(links, ATATASTREAM, ATATAPOS) fmt.Println(" ") // : for { select { case <-keys: // : fmt.Println("CTRL-C: ") quit <- true // case <-quit: // fmt.Println(" !") return } } }
.. ( )



.. . , - ! .

time.Sleep , . , "" .
UPD. - ( 3 744 , 3.07Gb ) .

, .

pastbin.com

PS , #291 () "" 10 , ( "-p=200" ) .

UPD2: PPS "" : - Go, , . , , - + . , : , .

,
vimeo.com/49718712 - Rob Pike - 'Concurrency Is Not Parallelism'
sites.google.com/site/gopatterns - Go Language Patterns
golangtutorials.blogspot.ru/2011/05/table-of-contents.html - GoLang Tutorials
talks.golang.org - Go

, !
 ),      .                 .  ,  ,         ,        . 

, , goquery (http://github.com/opesun/goquery). , :

// atatagrab project main.go package main import ( "fmt" // : "github.com/opesun/goquery" "strconv" ) const ( // : ENDMESSAGE = "TooLateToDieYoung" ) // func generator(out chan string, stream, start int) { for pos := start; ; pos += 20 { // : x, err := goquery.ParseUrl("http://home.atata.com/streams/" + strconv.Itoa(stream) + "?order=date&from=" + strconv.Itoa(pos)) if err == nil { // : for _, url := range x.Find("figure a.image").Attrs("href") { out <- "http://atata.com/" + url } // - .. if len(x.Find("li.last.hide")) > 0 { out <- ENDMESSAGE //.. return } } } }



,
, , ( ). - , , . , , , :

" , - ?"

, . PMFC (Poor Man's Flow Control " ". , ( ) ).

" , , ?"

! - " " " ". ? , :
( ) - . - ( - ?).
. "":

// type Worker struct { urls chan string // pending int // - index int // wg *sync.WaitGroup // }
: Worker , struct - record .
" ?" - ? Go : . , - , , :

- - - . " " ( WaitGroup ) sync . : WaitGroup , , . , .
, Worker'a.

"" work ( , "", this. ):

// func (w *Worker) work(done chan *Worker) { for { url := <-w.urls // w.wg.Add(1) // download(url) // w.wg.Done() // done <- w // } }
sync :

import ( "sync" )
: done , work() . , ( ).
download() - , . .

Worker Pool , , ( Wikipedia: ( ) ), . " container/heap ", ( - ).

: - , :

package main import ("fmt") // type Animal interface { Say() string } type Cat struct{} func (c Cat) Say() string { return "!" } type Parrot struct { name string } func (p Parrot) Say() string { return p.name + " !" } func main() { kitty := Cat{} popka := Parrot{name: ""} animals := []Animal{kitty, popka} for _, animal := range animals { fmt.Println(animal.Say()) } }
, container.Heap ( http://golang.org/pkg/container/heap/#Interface ) :

type Interface interface { sort.Interface Push(x interface{}) // add x as element Len() Pop() interface{} // remove and return element Len() - 1. }
, Push Pop sort.Interface ( http://golang.org/pkg/sort/#Interface ), :

type Interface interface { // Len is the number of elements in the collection. Len() int // Less returns whether the element with index i should sort // before the element with index j. Less(i, j int) bool // Swap swaps the elements with indexes i and j. Swap(i, j int) }
, :

// "": type Pool []*Worker // - : func (p Pool) Less(i, j int) bool { return p[i].pending < p[j].pending } // : func (p Pool) Len() int { return len(p) } // : func (p Pool) Swap(i, j int) { if i >= 0 && i < len(p) && j >= 0 && j < len(p) { p[i], p[j] = p[j], p[i] p[i].index, p[j].index = i, j } } // : func (p *Pool) Push(x interface{}) { n := len(*p) worker := x.(*Worker) worker.index = n *p = append(*p, worker) } // : func (p *Pool) Pop() interface{} { old := *p n := len(old) item := old[n-1] item.index = -1 *p = old[0 : n-1] return item }
, interface{} - , ( , interface{}). any , . : http://research.swtch.com/interfaces
, heap.* Pool, "" .
"container/heap" :

import ( "container/heap" )
. , , , :

// type Balancer struct { pool Pool // "" done chan *Worker // requests chan string // flowctrl chan bool // PMFC queue int // wg *sync.WaitGroup // }
, init() :

// . func (b *Balancer) init(in chan string) { b.requests = make(chan string) b.flowctrl = make(chan bool) b.done = make(chan *Worker) b.wg = new(sync.WaitGroup) // Flow Control: go func() { for { b.requests <- <-in // <-b.flowctrl // } }() // : heap.Init(&b.pool) for i := 0; i < WORKERS; i++ { w := &Worker{ urls: make(chan string, WORKERSCAP), index: 0, pending: 0, wg: b.wg, } go w.work(b.done) // heap.Push(&b.pool, w) // } }
- "" . , - - !
, : make() , . , - " " ; , .

init() WORKERS WORKERSCAP , :

var ( WORKERS = 5 // WORKERSCAP = 5 // )
. , , ( , . , . , , ) :

// func (b *Balancer) balance(quit chan bool) { lastjobs := false // , for { select { // : case <-quit: // b.wg.Wait() // .. quit <- true //.. case url := <-b.requests: // ( flow controller) if url != ENDMESSAGE { // - ? b.dispatch(url) // , } else { lastjobs = true // } case w := <-b.done: // , b.completed(w) // if lastjobs { if w.pending == 0 { // .. heap.Remove(&b.pool, w.index) // } if len(b.pool) == 0 { // // quit <- true // } } } } }
:

// func (b *Balancer) dispatch(url string) { w := heap.Pop(&b.pool).(*Worker) // .. w.urls <- url //.. . w.pending++ // "".. heap.Push(&b.pool, w) //.. if b.queue++; b.queue < WORKERS*WORKERSCAP { b.flowctrl <- true } }
.. . : , , , .
completed , :

// func (b *Balancer) completed(w *Worker) { w.pending-- heap.Remove(&b.pool, w.index) heap.Push(&b.pool, w) if b.queue--; b.queue == WORKERS*WORKERSCAP-1 { b.flowctrl <- true } }
, , , . : , dispatch , , , - ?
, . - download(), :

// func download(url string) { fileName := IMGDIR + "/" + url[strings.LastIndex(url, "/")+1:] output, err := os.Create(fileName) defer output.Close() response, err := http.Get(url) if err != nil { fmt.Println("Error while downloading", url, "-", err) return } defer response.Body.Close() io.Copy(output, response.Body) }
- - ( ) ( , ):

// : var ( WORKERS = 5 // WORKERSCAP = 5 // ATATASTREAM = 291 //id ATATAPOS = 0 // IMGDIR = "img" // ) // : func init() { flag.IntVar(&WORKERS, "w", WORKERS, " ") flag.IntVar(&ATATASTREAM, "s", ATATASTREAM, "id ") flag.IntVar(&ATATAPOS, "p", ATATAPOS, " ") flag.StringVar(&IMGDIR, "d", IMGDIR, " ") }
- . , , os/signal , ctrl-c :

import ( "io" "os" "os/signal" "sync" "flag" "fmt" "container/heap" "strconv" "strings" "github.com/opesun/goquery" "net/http" )
!


func main() { // flag.Parse() // , if err := os.MkdirAll(IMGDIR, 666); err != nil { panic(err) } // links := make(chan string) quit := make(chan bool) b := new(Balancer) b.init(links) // keys keys := make(chan os.Signal, 1) signal.Notify(keys, os.Interrupt) // go b.balance(quit) go generator(links, ATATASTREAM, ATATAPOS) fmt.Println(" ") // : for { select { case <-keys: // : fmt.Println("CTRL-C: ") quit <- true // case <-quit: // fmt.Println(" !") return } } }
.. ( )



.. . , - ! .

time.Sleep , . , "" .
UPD. - ( 3 744 , 3.07Gb ) .

, .

pastbin.com

PS , #291 () "" 10 , ( "-p=200" ) .

UPD2: PPS "" : - Go, , . , , - + . , : , .

,
vimeo.com/49718712 - Rob Pike - 'Concurrency Is Not Parallelism'
sites.google.com/site/gopatterns - Go Language Patterns
golangtutorials.blogspot.ru/2011/05/table-of-contents.html - GoLang Tutorials
talks.golang.org - Go

, !
), . . , , , .

, , goquery (http://github.com/opesun/goquery). , :

// atatagrab project main.go package main import ( "fmt" // : "github.com/opesun/goquery" "strconv" ) const ( // : ENDMESSAGE = "TooLateToDieYoung" ) // func generator(out chan string, stream, start int) { for pos := start; ; pos += 20 { // : x, err := goquery.ParseUrl("http://home.atata.com/streams/" + strconv.Itoa(stream) + "?order=date&from=" + strconv.Itoa(pos)) if err == nil { // : for _, url := range x.Find("figure a.image").Attrs("href") { out <- "http://atata.com/" + url } // - .. if len(x.Find("li.last.hide")) > 0 { out <- ENDMESSAGE //.. return } } } }



,
, , ( ). - , , . , , , :

" , - ?"

, . PMFC (Poor Man's Flow Control " ". , ( ) ).

" , , ?"

! - " " " ". ? , :
( ) - . - ( - ?).
. "":

// type Worker struct { urls chan string // pending int // - index int // wg *sync.WaitGroup // }
: Worker , struct - record .
" ?" - ? Go : . , - , , :

- - - . " " ( WaitGroup ) sync . : WaitGroup , , . , .
, Worker'a.

"" work ( , "", this. ):

// func (w *Worker) work(done chan *Worker) { for { url := <-w.urls // w.wg.Add(1) // download(url) // w.wg.Done() // done <- w // } }
sync :

import ( "sync" )
: done , work() . , ( ).
download() - , . .

Worker Pool , , ( Wikipedia: ( ) ), . " container/heap ", ( - ).

: - , :

package main import ("fmt") // type Animal interface { Say() string } type Cat struct{} func (c Cat) Say() string { return "!" } type Parrot struct { name string } func (p Parrot) Say() string { return p.name + " !" } func main() { kitty := Cat{} popka := Parrot{name: ""} animals := []Animal{kitty, popka} for _, animal := range animals { fmt.Println(animal.Say()) } }
, container.Heap ( http://golang.org/pkg/container/heap/#Interface ) :

type Interface interface { sort.Interface Push(x interface{}) // add x as element Len() Pop() interface{} // remove and return element Len() - 1. }
, Push Pop sort.Interface ( http://golang.org/pkg/sort/#Interface ), :

type Interface interface { // Len is the number of elements in the collection. Len() int // Less returns whether the element with index i should sort // before the element with index j. Less(i, j int) bool // Swap swaps the elements with indexes i and j. Swap(i, j int) }
, :

// "": type Pool []*Worker // - : func (p Pool) Less(i, j int) bool { return p[i].pending < p[j].pending } // : func (p Pool) Len() int { return len(p) } // : func (p Pool) Swap(i, j int) { if i >= 0 && i < len(p) && j >= 0 && j < len(p) { p[i], p[j] = p[j], p[i] p[i].index, p[j].index = i, j } } // : func (p *Pool) Push(x interface{}) { n := len(*p) worker := x.(*Worker) worker.index = n *p = append(*p, worker) } // : func (p *Pool) Pop() interface{} { old := *p n := len(old) item := old[n-1] item.index = -1 *p = old[0 : n-1] return item }
, interface{} - , ( , interface{}). any , . : http://research.swtch.com/interfaces
, heap.* Pool, "" .
"container/heap" :

import ( "container/heap" )
. , , , :

// type Balancer struct { pool Pool // "" done chan *Worker // requests chan string // flowctrl chan bool // PMFC queue int // wg *sync.WaitGroup // }
, init() :

// . func (b *Balancer) init(in chan string) { b.requests = make(chan string) b.flowctrl = make(chan bool) b.done = make(chan *Worker) b.wg = new(sync.WaitGroup) // Flow Control: go func() { for { b.requests <- <-in // <-b.flowctrl // } }() // : heap.Init(&b.pool) for i := 0; i < WORKERS; i++ { w := &Worker{ urls: make(chan string, WORKERSCAP), index: 0, pending: 0, wg: b.wg, } go w.work(b.done) // heap.Push(&b.pool, w) // } }
- "" . , - - !
, : make() , . , - " " ; , .

init() WORKERS WORKERSCAP , :

var ( WORKERS = 5 // WORKERSCAP = 5 // )
. , , ( , . , . , , ) :

// func (b *Balancer) balance(quit chan bool) { lastjobs := false // , for { select { // : case <-quit: // b.wg.Wait() // .. quit <- true //.. case url := <-b.requests: // ( flow controller) if url != ENDMESSAGE { // - ? b.dispatch(url) // , } else { lastjobs = true // } case w := <-b.done: // , b.completed(w) // if lastjobs { if w.pending == 0 { // .. heap.Remove(&b.pool, w.index) // } if len(b.pool) == 0 { // // quit <- true // } } } } }
:

// func (b *Balancer) dispatch(url string) { w := heap.Pop(&b.pool).(*Worker) // .. w.urls <- url //.. . w.pending++ // "".. heap.Push(&b.pool, w) //.. if b.queue++; b.queue < WORKERS*WORKERSCAP { b.flowctrl <- true } }
.. . : , , , .
completed , :

// func (b *Balancer) completed(w *Worker) { w.pending-- heap.Remove(&b.pool, w.index) heap.Push(&b.pool, w) if b.queue--; b.queue == WORKERS*WORKERSCAP-1 { b.flowctrl <- true } }
, , , . : , dispatch , , , - ?
, . - download(), :

// func download(url string) { fileName := IMGDIR + "/" + url[strings.LastIndex(url, "/")+1:] output, err := os.Create(fileName) defer output.Close() response, err := http.Get(url) if err != nil { fmt.Println("Error while downloading", url, "-", err) return } defer response.Body.Close() io.Copy(output, response.Body) }
- - ( ) ( , ):

// : var ( WORKERS = 5 // WORKERSCAP = 5 // ATATASTREAM = 291 //id ATATAPOS = 0 // IMGDIR = "img" // ) // : func init() { flag.IntVar(&WORKERS, "w", WORKERS, " ") flag.IntVar(&ATATASTREAM, "s", ATATASTREAM, "id ") flag.IntVar(&ATATAPOS, "p", ATATAPOS, " ") flag.StringVar(&IMGDIR, "d", IMGDIR, " ") }
- . , , os/signal , ctrl-c :

import ( "io" "os" "os/signal" "sync" "flag" "fmt" "container/heap" "strconv" "strings" "github.com/opesun/goquery" "net/http" )
!


func main() { // flag.Parse() // , if err := os.MkdirAll(IMGDIR, 666); err != nil { panic(err) } // links := make(chan string) quit := make(chan bool) b := new(Balancer) b.init(links) // keys keys := make(chan os.Signal, 1) signal.Notify(keys, os.Interrupt) // go b.balance(quit) go generator(links, ATATASTREAM, ATATAPOS) fmt.Println(" ") // : for { select { case <-keys: // : fmt.Println("CTRL-C: ") quit <- true // case <-quit: // fmt.Println(" !") return } } }
.. ( )



.. . , - ! .

time.Sleep , . , "" .
UPD. - ( 3 744 , 3.07Gb ) .

, .

pastbin.com

PS , #291 () "" 10 , ( "-p=200" ) .

UPD2: PPS "" : - Go, , . , , - + . , : , .

,
vimeo.com/49718712 - Rob Pike - 'Concurrency Is Not Parallelism'
sites.google.com/site/gopatterns - Go Language Patterns
golangtutorials.blogspot.ru/2011/05/table-of-contents.html - GoLang Tutorials
talks.golang.org - Go

, !

), . . , , , .

, , goquery (http://github.com/opesun/goquery). , :

// atatagrab project main.go package main import ( "fmt" // : "github.com/opesun/goquery" "strconv" ) const ( // : ENDMESSAGE = "TooLateToDieYoung" ) // func generator(out chan string, stream, start int) { for pos := start; ; pos += 20 { // : x, err := goquery.ParseUrl("http://home.atata.com/streams/" + strconv.Itoa(stream) + "?order=date&from=" + strconv.Itoa(pos)) if err == nil { // : for _, url := range x.Find("figure a.image").Attrs("href") { out <- "http://atata.com/" + url } // - .. if len(x.Find("li.last.hide")) > 0 { out <- ENDMESSAGE //.. return } } } }



,
, , ( ). - , , . , , , :

" , - ?"

, . PMFC (Poor Man's Flow Control " ". , ( ) ).

" , , ?"

! - " " " ". ? , :
( ) - . - ( - ?).
. "":

// type Worker struct { urls chan string // pending int // - index int // wg *sync.WaitGroup // }
: Worker , struct - record .
" ?" - ? Go : . , - , , :

- - - . " " ( WaitGroup ) sync . : WaitGroup , , . , .
, Worker'a.

"" work ( , "", this. ):

// func (w *Worker) work(done chan *Worker) { for { url := <-w.urls // w.wg.Add(1) // download(url) // w.wg.Done() // done <- w // } }
sync :

import ( "sync" )
: done , work() . , ( ).
download() - , . .

Worker Pool , , ( Wikipedia: ( ) ), . " container/heap ", ( - ).

: - , :

package main import ("fmt") // type Animal interface { Say() string } type Cat struct{} func (c Cat) Say() string { return "!" } type Parrot struct { name string } func (p Parrot) Say() string { return p.name + " !" } func main() { kitty := Cat{} popka := Parrot{name: ""} animals := []Animal{kitty, popka} for _, animal := range animals { fmt.Println(animal.Say()) } }
, container.Heap ( http://golang.org/pkg/container/heap/#Interface ) :

type Interface interface { sort.Interface Push(x interface{}) // add x as element Len() Pop() interface{} // remove and return element Len() - 1. }
, Push Pop sort.Interface ( http://golang.org/pkg/sort/#Interface ), :

type Interface interface { // Len is the number of elements in the collection. Len() int // Less returns whether the element with index i should sort // before the element with index j. Less(i, j int) bool // Swap swaps the elements with indexes i and j. Swap(i, j int) }
, :

// "": type Pool []*Worker // - : func (p Pool) Less(i, j int) bool { return p[i].pending < p[j].pending } // : func (p Pool) Len() int { return len(p) } // : func (p Pool) Swap(i, j int) { if i >= 0 && i < len(p) && j >= 0 && j < len(p) { p[i], p[j] = p[j], p[i] p[i].index, p[j].index = i, j } } // : func (p *Pool) Push(x interface{}) { n := len(*p) worker := x.(*Worker) worker.index = n *p = append(*p, worker) } // : func (p *Pool) Pop() interface{} { old := *p n := len(old) item := old[n-1] item.index = -1 *p = old[0 : n-1] return item }
, interface{} - , ( , interface{}). any , . : http://research.swtch.com/interfaces
, heap.* Pool, "" .
"container/heap" :

import ( "container/heap" )
. , , , :

// type Balancer struct { pool Pool // "" done chan *Worker // requests chan string // flowctrl chan bool // PMFC queue int // wg *sync.WaitGroup // }
, init() :

// . func (b *Balancer) init(in chan string) { b.requests = make(chan string) b.flowctrl = make(chan bool) b.done = make(chan *Worker) b.wg = new(sync.WaitGroup) // Flow Control: go func() { for { b.requests <- <-in // <-b.flowctrl // } }() // : heap.Init(&b.pool) for i := 0; i < WORKERS; i++ { w := &Worker{ urls: make(chan string, WORKERSCAP), index: 0, pending: 0, wg: b.wg, } go w.work(b.done) // heap.Push(&b.pool, w) // } }
- "" . , - - !
, : make() , . , - " " ; , .

init() WORKERS WORKERSCAP , :

var ( WORKERS = 5 // WORKERSCAP = 5 // )
. , , ( , . , . , , ) :

// func (b *Balancer) balance(quit chan bool) { lastjobs := false // , for { select { // : case <-quit: // b.wg.Wait() // .. quit <- true //.. case url := <-b.requests: // ( flow controller) if url != ENDMESSAGE { // - ? b.dispatch(url) // , } else { lastjobs = true // } case w := <-b.done: // , b.completed(w) // if lastjobs { if w.pending == 0 { // .. heap.Remove(&b.pool, w.index) // } if len(b.pool) == 0 { // // quit <- true // } } } } }
:

// func (b *Balancer) dispatch(url string) { w := heap.Pop(&b.pool).(*Worker) // .. w.urls <- url //.. . w.pending++ // "".. heap.Push(&b.pool, w) //.. if b.queue++; b.queue < WORKERS*WORKERSCAP { b.flowctrl <- true } }
.. . : , , , .
completed , :

// func (b *Balancer) completed(w *Worker) { w.pending-- heap.Remove(&b.pool, w.index) heap.Push(&b.pool, w) if b.queue--; b.queue == WORKERS*WORKERSCAP-1 { b.flowctrl <- true } }
, , , . : , dispatch , , , - ?
, . - download(), :

// func download(url string) { fileName := IMGDIR + "/" + url[strings.LastIndex(url, "/")+1:] output, err := os.Create(fileName) defer output.Close() response, err := http.Get(url) if err != nil { fmt.Println("Error while downloading", url, "-", err) return } defer response.Body.Close() io.Copy(output, response.Body) }
- - ( ) ( , ):

// : var ( WORKERS = 5 // WORKERSCAP = 5 // ATATASTREAM = 291 //id ATATAPOS = 0 // IMGDIR = "img" // ) // : func init() { flag.IntVar(&WORKERS, "w", WORKERS, " ") flag.IntVar(&ATATASTREAM, "s", ATATASTREAM, "id ") flag.IntVar(&ATATAPOS, "p", ATATAPOS, " ") flag.StringVar(&IMGDIR, "d", IMGDIR, " ") }
- . , , os/signal , ctrl-c :

import ( "io" "os" "os/signal" "sync" "flag" "fmt" "container/heap" "strconv" "strings" "github.com/opesun/goquery" "net/http" )
!


func main() { // flag.Parse() // , if err := os.MkdirAll(IMGDIR, 666); err != nil { panic(err) } // links := make(chan string) quit := make(chan bool) b := new(Balancer) b.init(links) // keys keys := make(chan os.Signal, 1) signal.Notify(keys, os.Interrupt) // go b.balance(quit) go generator(links, ATATASTREAM, ATATAPOS) fmt.Println(" ") // : for { select { case <-keys: // : fmt.Println("CTRL-C: ") quit <- true // case <-quit: // fmt.Println(" !") return } } }
.. ( )



.. . , - ! .

time.Sleep , . , "" .
UPD. - ( 3 744 , 3.07Gb ) .

, .

pastbin.com

PS , #291 () "" 10 , ( "-p=200" ) .

UPD2: PPS "" : - Go, , . , , - + . , : , .

,
vimeo.com/49718712 - Rob Pike - 'Concurrency Is Not Parallelism'
sites.google.com/site/gopatterns - Go Language Patterns
golangtutorials.blogspot.ru/2011/05/table-of-contents.html - GoLang Tutorials
talks.golang.org - Go

, !

), . . , , , .

, , goquery (http://github.com/opesun/goquery). , :

// atatagrab project main.go package main import ( "fmt" // : "github.com/opesun/goquery" "strconv" ) const ( // : ENDMESSAGE = "TooLateToDieYoung" ) // func generator(out chan string, stream, start int) { for pos := start; ; pos += 20 { // : x, err := goquery.ParseUrl("http://home.atata.com/streams/" + strconv.Itoa(stream) + "?order=date&from=" + strconv.Itoa(pos)) if err == nil { // : for _, url := range x.Find("figure a.image").Attrs("href") { out <- "http://atata.com/" + url } // - .. if len(x.Find("li.last.hide")) > 0 { out <- ENDMESSAGE //.. return } } } }



,
, , ( ). - , , . , , , :

" , - ?"

, . PMFC (Poor Man's Flow Control " ". , ( ) ).

" , , ?"

! - " " " ". ? , :
( ) - . - ( - ?).
. "":

// type Worker struct { urls chan string // pending int // - index int // wg *sync.WaitGroup // }
: Worker , struct - record .
" ?" - ? Go : . , - , , :

- - - . " " ( WaitGroup ) sync . : WaitGroup , , . , .
, Worker'a.

"" work ( , "", this. ):

// func (w *Worker) work(done chan *Worker) { for { url := <-w.urls // w.wg.Add(1) // download(url) // w.wg.Done() // done <- w // } }
sync :

import ( "sync" )
: done , work() . , ( ).
download() - , . .

Worker Pool , , ( Wikipedia: ( ) ), . " container/heap ", ( - ).

: - , :

package main import ("fmt") // type Animal interface { Say() string } type Cat struct{} func (c Cat) Say() string { return "!" } type Parrot struct { name string } func (p Parrot) Say() string { return p.name + " !" } func main() { kitty := Cat{} popka := Parrot{name: ""} animals := []Animal{kitty, popka} for _, animal := range animals { fmt.Println(animal.Say()) } }
, container.Heap ( http://golang.org/pkg/container/heap/#Interface ) :

type Interface interface { sort.Interface Push(x interface{}) // add x as element Len() Pop() interface{} // remove and return element Len() - 1. }
, Push Pop sort.Interface ( http://golang.org/pkg/sort/#Interface ), :

type Interface interface { // Len is the number of elements in the collection. Len() int // Less returns whether the element with index i should sort // before the element with index j. Less(i, j int) bool // Swap swaps the elements with indexes i and j. Swap(i, j int) }
, :

// "": type Pool []*Worker // - : func (p Pool) Less(i, j int) bool { return p[i].pending < p[j].pending } // : func (p Pool) Len() int { return len(p) } // : func (p Pool) Swap(i, j int) { if i >= 0 && i < len(p) && j >= 0 && j < len(p) { p[i], p[j] = p[j], p[i] p[i].index, p[j].index = i, j } } // : func (p *Pool) Push(x interface{}) { n := len(*p) worker := x.(*Worker) worker.index = n *p = append(*p, worker) } // : func (p *Pool) Pop() interface{} { old := *p n := len(old) item := old[n-1] item.index = -1 *p = old[0 : n-1] return item }
, interface{} - , ( , interface{}). any , . : http://research.swtch.com/interfaces
, heap.* Pool, "" .
"container/heap" :

import ( "container/heap" )
. , , , :

// type Balancer struct { pool Pool // "" done chan *Worker // requests chan string // flowctrl chan bool // PMFC queue int // wg *sync.WaitGroup // }
, init() :

// . func (b *Balancer) init(in chan string) { b.requests = make(chan string) b.flowctrl = make(chan bool) b.done = make(chan *Worker) b.wg = new(sync.WaitGroup) // Flow Control: go func() { for { b.requests <- <-in // <-b.flowctrl // } }() // : heap.Init(&b.pool) for i := 0; i < WORKERS; i++ { w := &Worker{ urls: make(chan string, WORKERSCAP), index: 0, pending: 0, wg: b.wg, } go w.work(b.done) // heap.Push(&b.pool, w) // } }
- "" . , - - !
, : make() , . , - " " ; , .

init() WORKERS WORKERSCAP , :

var ( WORKERS = 5 // WORKERSCAP = 5 // )
. , , ( , . , . , , ) :

// func (b *Balancer) balance(quit chan bool) { lastjobs := false // , for { select { // : case <-quit: // b.wg.Wait() // .. quit <- true //.. case url := <-b.requests: // ( flow controller) if url != ENDMESSAGE { // - ? b.dispatch(url) // , } else { lastjobs = true // } case w := <-b.done: // , b.completed(w) // if lastjobs { if w.pending == 0 { // .. heap.Remove(&b.pool, w.index) // } if len(b.pool) == 0 { // // quit <- true // } } } } }
:

// func (b *Balancer) dispatch(url string) { w := heap.Pop(&b.pool).(*Worker) // .. w.urls <- url //.. . w.pending++ // "".. heap.Push(&b.pool, w) //.. if b.queue++; b.queue < WORKERS*WORKERSCAP { b.flowctrl <- true } }
.. . : , , , .
completed , :

// func (b *Balancer) completed(w *Worker) { w.pending-- heap.Remove(&b.pool, w.index) heap.Push(&b.pool, w) if b.queue--; b.queue == WORKERS*WORKERSCAP-1 { b.flowctrl <- true } }
, , , . : , dispatch , , , - ?
, . - download(), :

// func download(url string) { fileName := IMGDIR + "/" + url[strings.LastIndex(url, "/")+1:] output, err := os.Create(fileName) defer output.Close() response, err := http.Get(url) if err != nil { fmt.Println("Error while downloading", url, "-", err) return } defer response.Body.Close() io.Copy(output, response.Body) }
- - ( ) ( , ):

// : var ( WORKERS = 5 // WORKERSCAP = 5 // ATATASTREAM = 291 //id ATATAPOS = 0 // IMGDIR = "img" // ) // : func init() { flag.IntVar(&WORKERS, "w", WORKERS, " ") flag.IntVar(&ATATASTREAM, "s", ATATASTREAM, "id ") flag.IntVar(&ATATAPOS, "p", ATATAPOS, " ") flag.StringVar(&IMGDIR, "d", IMGDIR, " ") }
- . , , os/signal , ctrl-c :

import ( "io" "os" "os/signal" "sync" "flag" "fmt" "container/heap" "strconv" "strings" "github.com/opesun/goquery" "net/http" )
!


func main() { // flag.Parse() // , if err := os.MkdirAll(IMGDIR, 666); err != nil { panic(err) } // links := make(chan string) quit := make(chan bool) b := new(Balancer) b.init(links) // keys keys := make(chan os.Signal, 1) signal.Notify(keys, os.Interrupt) // go b.balance(quit) go generator(links, ATATASTREAM, ATATAPOS) fmt.Println(" ") // : for { select { case <-keys: // : fmt.Println("CTRL-C: ") quit <- true // case <-quit: // fmt.Println(" !") return } } }
.. ( )



.. . , - ! .

time.Sleep , . , "" .
UPD. - ( 3 744 , 3.07Gb ) .

, .

pastbin.com

PS , #291 () "" 10 , ( "-p=200" ) .

UPD2: PPS "" : - Go, , . , , - + . , : , .

,
vimeo.com/49718712 - Rob Pike - 'Concurrency Is Not Parallelism'
sites.google.com/site/gopatterns - Go Language Patterns
golangtutorials.blogspot.ru/2011/05/table-of-contents.html - GoLang Tutorials
talks.golang.org - Go

, !

Source: https://habr.com/ru/post/In198150/


All Articles