initial commit: collect the book info
Signed-off-by: Vincent Batts <vbatts@hashbangbash.com>
This commit is contained in:
parent
21764d4db4
commit
8fcf30be13
3 changed files with 76 additions and 0 deletions
5
go.mod
Normal file
5
go.mod
Normal file
|
@ -0,0 +1,5 @@
|
|||
module git.thisco.de/vbatts/book-fetch
|
||||
|
||||
go 1.16
|
||||
|
||||
require github.com/PuerkitoBio/goquery v1.6.1
|
10
go.sum
Normal file
10
go.sum
Normal file
|
@ -0,0 +1,10 @@
|
|||
github.com/PuerkitoBio/goquery v1.6.1 h1:FgjbQZKl5HTmcn4sKBgvx8vv63nhyhIpv7lJpFGCWpk=
|
||||
github.com/PuerkitoBio/goquery v1.6.1/go.mod h1:GsLWisAFVj4WgDibEWF4pvYnkVQBpKBKeU+7zCJoLcc=
|
||||
github.com/andybalholm/cascadia v1.1.0 h1:BuuO6sSfQNFRu1LppgbD25Hr2vLYW25JvxHs5zzsLTo=
|
||||
github.com/andybalholm/cascadia v1.1.0/go.mod h1:GsXiBklL0woXo1j/WYWtSYYC4ouU9PqHO0sqidkEA4Y=
|
||||
golang.org/x/crypto v0.0.0-20190308221718-c2843e01d9a2/go.mod h1:djNgcEr1/C05ACkg1iLfiJU5Ep61QUkGW8qpdssI0+w=
|
||||
golang.org/x/net v0.0.0-20180218175443-cbe0f9307d01/go.mod h1:mL1N/T3taQHkDXs73rZJwtUhF3w3ftmwwsq0BUmARs4=
|
||||
golang.org/x/net v0.0.0-20200202094626-16171245cfb2 h1:CCH4IOTTfewWjGOlSp+zGcjutRKlBEZQ6wTn8ozI/nI=
|
||||
golang.org/x/net v0.0.0-20200202094626-16171245cfb2/go.mod h1:z5CRVTTTmAJ677TzLLGU+0bjPO0LkuOLi4/5GtJWs/s=
|
||||
golang.org/x/sys v0.0.0-20190215142949-d0b11bdaac8a/go.mod h1:STP8DvDyc/dI5b8T5hshtkjS+E42TnysNCUPdjciGhY=
|
||||
golang.org/x/text v0.3.0/go.mod h1:NqM8EUOU14njkJ3fqMW+pc6Ldnwhi/IjpwHt7yyuwOQ=
|
61
main.go
Normal file
61
main.go
Normal file
|
@ -0,0 +1,61 @@
|
|||
package main
|
||||
|
||||
import (
|
||||
"fmt"
|
||||
"log"
|
||||
"net/http"
|
||||
"strings"
|
||||
|
||||
"github.com/PuerkitoBio/goquery"
|
||||
)
|
||||
|
||||
type Book struct {
|
||||
URL string
|
||||
Title string
|
||||
Files []string
|
||||
}
|
||||
|
||||
func BookScrape(bookURL string) (*Book, error) {
|
||||
// Request the HTML page.
|
||||
res, err := http.Get(bookURL)
|
||||
if err != nil {
|
||||
return nil, err
|
||||
}
|
||||
defer res.Body.Close()
|
||||
if res.StatusCode != 200 {
|
||||
return nil, fmt.Errorf("status code error: %d %s", res.StatusCode, res.Status)
|
||||
}
|
||||
|
||||
// Load the HTML document
|
||||
doc, err := goquery.NewDocumentFromReader(res.Body)
|
||||
if err != nil {
|
||||
return nil, err
|
||||
}
|
||||
/*
|
||||
<audio class="wp-audio-shortcode" id="audio-24287-11" preload="none" style="width: 100%;" controls="controls"><source class="lazy lazy-hidden" type="audio/mpeg" src="https://ipaudio.club/wp-content/uploads/GOLN/Water%20Dancer%20(Ta%20Nehisi%20Coates)/11.mp3?_=11" /><a href="https://ipaudio.club/wp-content/uploads/GOLN/Water%20Dancer%20(Ta%20Nehisi%20Coates)/11.mp3">https://ipaudio.club/wp-content/uploads/GOLN/Water%20Dancer%20(Ta%20Nehisi%20Coates)/11.mp3</a></audio>
|
||||
*/
|
||||
|
||||
b := Book{}
|
||||
b.URL = bookURL
|
||||
|
||||
// Find the review items
|
||||
doc.Find(".lazy-hidden").Each(func(i int, s *goquery.Selection) {
|
||||
//title := s.Find("source").Text()
|
||||
if src, exists := s.Attr("src"); exists && strings.Contains(src, ".mp3") {
|
||||
fmt.Println(i, src)
|
||||
b.Files = append(b.Files, src)
|
||||
}
|
||||
})
|
||||
doc.Find("title").Each(func(i int, s *goquery.Selection) {
|
||||
b.Title = s.Text()
|
||||
})
|
||||
return &b, nil
|
||||
}
|
||||
|
||||
func main() {
|
||||
b, err := BookScrape("https://goldenaudiobooks.com/the-water-dancer-oprahs-book-club-audiobook/")
|
||||
if err != nil {
|
||||
log.Fatal(err)
|
||||
}
|
||||
fmt.Println(b)
|
||||
}
|
Loading…
Reference in a new issue