initial commit: collect the book info

Signed-off-by: Vincent Batts <vbatts@hashbangbash.com>
This commit is contained in:
Vincent Batts 2021-05-17 07:42:39 -05:00
parent 21764d4db4
commit 8fcf30be13
No known key found for this signature in database
GPG key ID: 524F155275DF0C3E
3 changed files with 76 additions and 0 deletions

5
go.mod Normal file
View file

@ -0,0 +1,5 @@
module git.thisco.de/vbatts/book-fetch
go 1.16
require github.com/PuerkitoBio/goquery v1.6.1

10
go.sum Normal file
View file

@ -0,0 +1,10 @@
github.com/PuerkitoBio/goquery v1.6.1 h1:FgjbQZKl5HTmcn4sKBgvx8vv63nhyhIpv7lJpFGCWpk=
github.com/PuerkitoBio/goquery v1.6.1/go.mod h1:GsLWisAFVj4WgDibEWF4pvYnkVQBpKBKeU+7zCJoLcc=
github.com/andybalholm/cascadia v1.1.0 h1:BuuO6sSfQNFRu1LppgbD25Hr2vLYW25JvxHs5zzsLTo=
github.com/andybalholm/cascadia v1.1.0/go.mod h1:GsXiBklL0woXo1j/WYWtSYYC4ouU9PqHO0sqidkEA4Y=
golang.org/x/crypto v0.0.0-20190308221718-c2843e01d9a2/go.mod h1:djNgcEr1/C05ACkg1iLfiJU5Ep61QUkGW8qpdssI0+w=
golang.org/x/net v0.0.0-20180218175443-cbe0f9307d01/go.mod h1:mL1N/T3taQHkDXs73rZJwtUhF3w3ftmwwsq0BUmARs4=
golang.org/x/net v0.0.0-20200202094626-16171245cfb2 h1:CCH4IOTTfewWjGOlSp+zGcjutRKlBEZQ6wTn8ozI/nI=
golang.org/x/net v0.0.0-20200202094626-16171245cfb2/go.mod h1:z5CRVTTTmAJ677TzLLGU+0bjPO0LkuOLi4/5GtJWs/s=
golang.org/x/sys v0.0.0-20190215142949-d0b11bdaac8a/go.mod h1:STP8DvDyc/dI5b8T5hshtkjS+E42TnysNCUPdjciGhY=
golang.org/x/text v0.3.0/go.mod h1:NqM8EUOU14njkJ3fqMW+pc6Ldnwhi/IjpwHt7yyuwOQ=

61
main.go Normal file
View file

@ -0,0 +1,61 @@
package main
import (
"fmt"
"log"
"net/http"
"strings"
"github.com/PuerkitoBio/goquery"
)
type Book struct {
URL string
Title string
Files []string
}
func BookScrape(bookURL string) (*Book, error) {
// Request the HTML page.
res, err := http.Get(bookURL)
if err != nil {
return nil, err
}
defer res.Body.Close()
if res.StatusCode != 200 {
return nil, fmt.Errorf("status code error: %d %s", res.StatusCode, res.Status)
}
// Load the HTML document
doc, err := goquery.NewDocumentFromReader(res.Body)
if err != nil {
return nil, err
}
/*
<audio class="wp-audio-shortcode" id="audio-24287-11" preload="none" style="width: 100%;" controls="controls"><source class="lazy lazy-hidden" type="audio/mpeg" src="https://ipaudio.club/wp-content/uploads/GOLN/Water%20Dancer%20(Ta%20Nehisi%20Coates)/11.mp3?_=11" /><a href="https://ipaudio.club/wp-content/uploads/GOLN/Water%20Dancer%20(Ta%20Nehisi%20Coates)/11.mp3">https://ipaudio.club/wp-content/uploads/GOLN/Water%20Dancer%20(Ta%20Nehisi%20Coates)/11.mp3</a></audio>
*/
b := Book{}
b.URL = bookURL
// Find the review items
doc.Find(".lazy-hidden").Each(func(i int, s *goquery.Selection) {
//title := s.Find("source").Text()
if src, exists := s.Attr("src"); exists && strings.Contains(src, ".mp3") {
fmt.Println(i, src)
b.Files = append(b.Files, src)
}
})
doc.Find("title").Each(func(i int, s *goquery.Selection) {
b.Title = s.Text()
})
return &b, nil
}
func main() {
b, err := BookScrape("https://goldenaudiobooks.com/the-water-dancer-oprahs-book-club-audiobook/")
if err != nil {
log.Fatal(err)
}
fmt.Println(b)
}