172 lines
4.7 KiB
Go
172 lines
4.7 KiB
Go
/*
|
|
Copyright © 2021 Vincent Batts <vbatts@hashbangbash.com>
|
|
|
|
Permission is hereby granted, free of charge, to any person obtaining a copy
|
|
of this software and associated documentation files (the "Software"), to deal
|
|
in the Software without restriction, including without limitation the rights
|
|
to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
|
|
copies of the Software, and to permit persons to whom the Software is
|
|
furnished to do so, subject to the following conditions:
|
|
|
|
The above copyright notice and this permission notice shall be included in
|
|
all copies or substantial portions of the Software.
|
|
|
|
THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
|
|
IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
|
|
FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
|
|
AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
|
|
LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
|
|
OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
|
|
THE SOFTWARE.
|
|
*/
|
|
package goldenaudiobooks
|
|
|
|
import (
|
|
"fmt"
|
|
"io"
|
|
"net/http"
|
|
"net/url"
|
|
"os"
|
|
"path/filepath"
|
|
"strconv"
|
|
|
|
"github.com/PuerkitoBio/goquery"
|
|
"github.com/cheggaaa/pb/v3"
|
|
log "github.com/sirupsen/logrus"
|
|
)
|
|
|
|
type Book struct {
|
|
URL string
|
|
Title string
|
|
Files []string
|
|
Image string
|
|
}
|
|
|
|
func BookScrape(bookURL string) (*Book, error) {
|
|
// Request the HTML page.
|
|
res, err := http.Get(bookURL)
|
|
if err != nil {
|
|
return nil, err
|
|
}
|
|
defer res.Body.Close()
|
|
if res.StatusCode != 200 {
|
|
return nil, fmt.Errorf("status code error: %d %s", res.StatusCode, res.Status)
|
|
}
|
|
|
|
// Load the HTML document
|
|
doc, err := goquery.NewDocumentFromReader(res.Body)
|
|
if err != nil {
|
|
return nil, err
|
|
}
|
|
/*
|
|
<audio class="wp-audio-shortcode" id="audio-24287-11" preload="none" style="width: 100%;" controls="controls"><source class="lazy lazy-hidden" type="audio/mpeg" src="https://ipaudio.club/wp-content/uploads/GOLN/Water%20Dancer%20(Ta%20Nehisi%20Coates)/11.mp3?_=11" /><a href="https://ipaudio.club/wp-content/uploads/GOLN/Water%20Dancer%20(Ta%20Nehisi%20Coates)/11.mp3">https://ipaudio.club/wp-content/uploads/GOLN/Water%20Dancer%20(Ta%20Nehisi%20Coates)/11.mp3</a></audio>
|
|
*/
|
|
|
|
b := Book{}
|
|
b.URL = bookURL
|
|
|
|
// Find the review items
|
|
doc.Find("source").Each(func(i int, s *goquery.Selection) {
|
|
//title := s.Find("source").Text()
|
|
if t, exists := s.Attr("type"); exists && t == "audio/mpeg" {
|
|
src, _ := s.Attr("src")
|
|
fmt.Println(i, src)
|
|
b.Files = append(b.Files, src)
|
|
}
|
|
})
|
|
doc.Find("title").Each(func(i int, s *goquery.Selection) {
|
|
b.Title = s.Text()
|
|
})
|
|
doc.Find("meta").Each(func(i int, s *goquery.Selection) {
|
|
if p, exists := s.Attr("property"); exists && p == "og:image" {
|
|
b.Image, _ = s.Attr("content")
|
|
}
|
|
})
|
|
return &b, nil
|
|
}
|
|
|
|
// BookFetcher creates a folder in dest, from the title in Book, and downloads the files there.
|
|
// If no title is present, then one is generated and set in Book b.
|
|
func BookFetcher(b *Book, dest string, createLocal bool) error {
|
|
p := dest
|
|
if !createLocal {
|
|
p = filepath.Join(dest, b.Title)
|
|
if err := os.MkdirAll(p, os.FileMode(0755)); err != nil {
|
|
return err
|
|
}
|
|
}
|
|
log.Infof("Title: %q", b.Title)
|
|
log.Infof("Image: %q", b.Image)
|
|
|
|
err := func() error {
|
|
fd, err := os.OpenFile(filepath.Join(p, "cover.jpg"), os.O_RDWR|os.O_CREATE, os.FileMode(0644))
|
|
if err != nil {
|
|
return err
|
|
}
|
|
|
|
resp, err := http.Get(b.Image)
|
|
if err != nil {
|
|
return err
|
|
}
|
|
//log.Infof("%#v", resp)
|
|
if resp.StatusCode != http.StatusOK {
|
|
return fmt.Errorf("status: %d; url: %q", resp.StatusCode, b.Image)
|
|
}
|
|
size, err := strconv.ParseInt(resp.Header.Get("content-length"), 10, 64)
|
|
if err != nil {
|
|
size = -1
|
|
}
|
|
bar := pb.Full.Start64(size)
|
|
|
|
barReader := bar.NewProxyReader(resp.Body)
|
|
i, err := io.Copy(fd, barReader)
|
|
if err != nil {
|
|
return err
|
|
}
|
|
bar.Finish()
|
|
fd.Close()
|
|
log.Infof("wrote 'cover.jpg' (%d)", i)
|
|
|
|
return nil
|
|
}()
|
|
if err != nil {
|
|
return err
|
|
}
|
|
|
|
for _, f := range b.Files {
|
|
u, err := url.Parse(f)
|
|
if err != nil {
|
|
return err
|
|
}
|
|
fname := filepath.Base(u.Path)
|
|
fd, err := os.OpenFile(filepath.Join(p, fname), os.O_RDWR|os.O_CREATE, os.FileMode(0644))
|
|
if err != nil {
|
|
return err
|
|
}
|
|
|
|
log.Infof("Fetching %q", f)
|
|
resp, err := http.Get(f)
|
|
if err != nil {
|
|
return err
|
|
}
|
|
//log.Infof("%#v", resp)
|
|
if resp.StatusCode != http.StatusOK {
|
|
return fmt.Errorf("status: %d; url: %q", resp.StatusCode, f)
|
|
}
|
|
size, err := strconv.ParseInt(resp.Header.Get("content-length"), 10, 64)
|
|
if err != nil {
|
|
size = -1
|
|
}
|
|
bar := pb.Full.Start64(size)
|
|
|
|
barReader := bar.NewProxyReader(resp.Body)
|
|
i, err := io.Copy(fd, barReader)
|
|
if err != nil {
|
|
return err
|
|
}
|
|
bar.Finish()
|
|
fd.Close()
|
|
log.Infof("wrote %q (%d)", fname, i)
|
|
}
|
|
return nil
|
|
}
|