feat: add endpoint to generate RSS feed
This commit is contained in:
parent
a77c05bdee
commit
9ba90663e4
|
@ -0,0 +1,119 @@
|
|||
package internal
|
||||
|
||||
import (
|
||||
"context"
|
||||
"errors"
|
||||
"fmt"
|
||||
"io"
|
||||
"net/http"
|
||||
"strings"
|
||||
|
||||
"github.com/PuerkitoBio/goquery"
|
||||
)
|
||||
|
||||
const (
|
||||
lubimyCzytacBaseURL = "https://lubimyczytac.pl"
|
||||
)
|
||||
|
||||
var (
|
||||
ErrAuthorNotFound = errors.New("author not found")
|
||||
ErrUnexpectedStatus = errors.New("unexpected http status was returned by lubimyczytac.pl")
|
||||
)
|
||||
|
||||
type Book struct {
|
||||
Title string
|
||||
URL string
|
||||
}
|
||||
|
||||
type Author struct {
|
||||
Name string
|
||||
ShortDescription string
|
||||
URL string
|
||||
Books []Book
|
||||
}
|
||||
|
||||
type LubimyCzytacClient struct {
|
||||
http *http.Client
|
||||
}
|
||||
|
||||
func NewLubimyCzytacClient(client *http.Client) *LubimyCzytacClient {
|
||||
return &LubimyCzytacClient{http: client}
|
||||
}
|
||||
|
||||
func (c *LubimyCzytacClient) GetAuthor(ctx context.Context, authorID string) (Author, error) {
|
||||
req, err := http.NewRequestWithContext(
|
||||
ctx,
|
||||
http.MethodGet,
|
||||
fmt.Sprintf("%s/autor/%s/x", lubimyCzytacBaseURL, authorID),
|
||||
nil,
|
||||
)
|
||||
if err != nil {
|
||||
return Author{}, fmt.Errorf("http.NewRequestWithContext: %w", err)
|
||||
}
|
||||
|
||||
resp, err := c.http.Do(req)
|
||||
if err != nil {
|
||||
return Author{}, fmt.Errorf("httpClient.Do: %w", err)
|
||||
}
|
||||
defer func() {
|
||||
_ = resp.Body.Close()
|
||||
}()
|
||||
if resp.StatusCode != http.StatusOK {
|
||||
_, _ = io.Copy(io.Discard, resp.Body)
|
||||
if resp.StatusCode == http.StatusNotFound {
|
||||
return Author{}, ErrAuthorNotFound
|
||||
}
|
||||
return Author{}, ErrUnexpectedStatus
|
||||
}
|
||||
|
||||
p, err := newAuthorPageParser(resp.Body)
|
||||
if err != nil {
|
||||
_, _ = io.Copy(io.Discard, resp.Body)
|
||||
return Author{}, fmt.Errorf("newAuthorPageParser: %w", err)
|
||||
}
|
||||
|
||||
return Author{
|
||||
Name: p.name(),
|
||||
ShortDescription: p.shortDescription(),
|
||||
URL: p.url(),
|
||||
Books: p.books(),
|
||||
}, nil
|
||||
}
|
||||
|
||||
type authorPageParser struct {
|
||||
doc *goquery.Document
|
||||
}
|
||||
|
||||
func newAuthorPageParser(r io.Reader) (authorPageParser, error) {
|
||||
doc, err := goquery.NewDocumentFromReader(r)
|
||||
if err != nil {
|
||||
return authorPageParser{}, fmt.Errorf("goquery.NewDocumentFromReader: %w", err)
|
||||
}
|
||||
return authorPageParser{
|
||||
doc: doc,
|
||||
}, nil
|
||||
}
|
||||
|
||||
func (p authorPageParser) name() string {
|
||||
return strings.TrimSpace(p.doc.Find("#author-info .title-container").Text())
|
||||
}
|
||||
|
||||
func (p authorPageParser) url() string {
|
||||
return strings.TrimSpace(p.doc.Find(`meta[property="og:url"]`).AttrOr("content", ""))
|
||||
}
|
||||
|
||||
func (p authorPageParser) shortDescription() string {
|
||||
return strings.TrimSpace(p.doc.Find(`meta[name="description"]`).AttrOr("content", ""))
|
||||
}
|
||||
|
||||
func (p authorPageParser) books() []Book {
|
||||
booksSel := p.doc.Find("#authorBooks .authorAllBooks__single")
|
||||
books := make([]Book, booksSel.Length())
|
||||
booksSel.Each(func(i int, selection *goquery.Selection) {
|
||||
books[i] = Book{
|
||||
Title: strings.TrimSpace(selection.Find(".authorAllBooks__singleTextTitle").Text()),
|
||||
URL: lubimyCzytacBaseURL + selection.Find(".authorAllBooks__singleTextTitle").AttrOr("href", ""),
|
||||
}
|
||||
})
|
||||
return books
|
||||
}
|
133
main.go
133
main.go
|
@ -3,21 +3,17 @@ package main
|
|||
import (
|
||||
"context"
|
||||
"encoding/xml"
|
||||
"fmt"
|
||||
"github.com/PuerkitoBio/goquery"
|
||||
"io"
|
||||
"log"
|
||||
"net/http"
|
||||
"net/url"
|
||||
"os"
|
||||
"os/signal"
|
||||
"strings"
|
||||
"time"
|
||||
|
||||
"github.com/Kichiyaki/lubimyczytacrss/internal"
|
||||
)
|
||||
|
||||
const (
|
||||
lubimyCzytacDomain = "lubimyczytac.pl"
|
||||
defaultTimeout = 5 * time.Second
|
||||
defaultLubimyCzytacClientTimeout = 5 * time.Second
|
||||
)
|
||||
|
||||
func main() {
|
||||
|
@ -50,20 +46,6 @@ func main() {
|
|||
}
|
||||
}
|
||||
|
||||
type rssMain struct {
|
||||
XMLName xml.Name `xml:"rss"`
|
||||
Version string `xml:"version,attr"`
|
||||
Channel rssChannel `xml:"channel"`
|
||||
}
|
||||
|
||||
type rssChannel struct {
|
||||
XMLName xml.Name `xml:"channel"`
|
||||
Link string `xml:"link"`
|
||||
Description string `xml:"description"`
|
||||
Language string `xml:"language"`
|
||||
Items []rssItem `xml:"items"`
|
||||
}
|
||||
|
||||
type rssItem struct {
|
||||
XMLName xml.Name `xml:"item"`
|
||||
Title string `xml:"title"`
|
||||
|
@ -72,84 +54,67 @@ type rssItem struct {
|
|||
Description string `xml:"description"`
|
||||
}
|
||||
|
||||
func newHandler() http.Handler {
|
||||
client := &http.Client{
|
||||
Timeout: defaultTimeout,
|
||||
type rssChannel struct {
|
||||
XMLName xml.Name `xml:"channel"`
|
||||
Title string `xml:"title"`
|
||||
Link string `xml:"link"`
|
||||
Description string `xml:"description"`
|
||||
Language string `xml:"language"`
|
||||
Items []rssItem `xml:"items"`
|
||||
}
|
||||
|
||||
type rssMain struct {
|
||||
XMLName xml.Name `xml:"rss"`
|
||||
Version string `xml:"version,attr"`
|
||||
Channel rssChannel `xml:"channel"`
|
||||
}
|
||||
|
||||
func rssMainFromAuthor(author internal.Author) rssMain {
|
||||
items := make([]rssItem, len(author.Books))
|
||||
for i, b := range author.Books {
|
||||
items[i] = rssItem{
|
||||
Title: b.Title,
|
||||
Link: b.URL,
|
||||
GUID: b.URL,
|
||||
Description: "",
|
||||
}
|
||||
}
|
||||
return rssMain{
|
||||
Version: "2.0",
|
||||
Channel: rssChannel{
|
||||
Title: author.Name,
|
||||
Description: author.ShortDescription,
|
||||
Link: author.URL,
|
||||
Items: items,
|
||||
},
|
||||
}
|
||||
}
|
||||
|
||||
func newHandler() http.Handler {
|
||||
client := internal.NewLubimyCzytacClient(&http.Client{
|
||||
Timeout: defaultLubimyCzytacClientTimeout,
|
||||
})
|
||||
|
||||
return http.HandlerFunc(func(w http.ResponseWriter, r *http.Request) {
|
||||
if r.Method != http.MethodGet {
|
||||
w.WriteHeader(http.StatusMethodNotAllowed)
|
||||
return
|
||||
}
|
||||
|
||||
req, err := http.NewRequestWithContext(r.Context(), http.MethodGet, r.URL.Path[1:], nil)
|
||||
if err != nil || req.URL.Host != lubimyCzytacDomain {
|
||||
w.WriteHeader(http.StatusBadRequest)
|
||||
author, err := client.GetAuthor(r.Context(), r.URL.Path[1:])
|
||||
if err == internal.ErrAuthorNotFound {
|
||||
w.WriteHeader(http.StatusNotFound)
|
||||
_, _ = w.Write([]byte(`author not found`))
|
||||
return
|
||||
}
|
||||
|
||||
resp, err := client.Do(req)
|
||||
if err != nil {
|
||||
w.WriteHeader(http.StatusInternalServerError)
|
||||
return
|
||||
}
|
||||
defer func() {
|
||||
_ = resp.Body.Close()
|
||||
}()
|
||||
|
||||
p, err := newAuthorPageParser(resp.Body)
|
||||
if err != nil {
|
||||
_, _ = io.Copy(io.Discard, resp.Body)
|
||||
w.WriteHeader(http.StatusBadRequest)
|
||||
_, _ = w.Write([]byte(`something went wrong while getting author info: ` + err.Error()))
|
||||
return
|
||||
}
|
||||
|
||||
w.Header().Set("Content-Type", "text/xml; charset=utf-8")
|
||||
w.WriteHeader(http.StatusOK)
|
||||
if err := xml.NewEncoder(w).Encode(rssMain{
|
||||
Version: "2.0",
|
||||
Channel: rssChannel{
|
||||
Link: req.URL.String(),
|
||||
Items: p.items(req.URL),
|
||||
},
|
||||
}); err != nil {
|
||||
w.WriteHeader(http.StatusInternalServerError)
|
||||
return
|
||||
}
|
||||
_ = xml.NewEncoder(w).Encode(rssMainFromAuthor(author))
|
||||
})
|
||||
}
|
||||
|
||||
type authorPageParser struct {
|
||||
doc *goquery.Document
|
||||
}
|
||||
|
||||
func newAuthorPageParser(r io.Reader) (authorPageParser, error) {
|
||||
doc, err := goquery.NewDocumentFromReader(r)
|
||||
if err != nil {
|
||||
return authorPageParser{}, fmt.Errorf("goquery.NewDocumentFromReader: %w", err)
|
||||
}
|
||||
return authorPageParser{
|
||||
doc: doc,
|
||||
}, nil
|
||||
}
|
||||
|
||||
func (p authorPageParser) items(baseURL *url.URL) []rssItem {
|
||||
booksSel := p.doc.Find("#authorBooks .authorAllBooks__single")
|
||||
items := make([]rssItem, booksSel.Length())
|
||||
booksSel.Each(func(i int, selection *goquery.Selection) {
|
||||
link := url.URL{
|
||||
Scheme: baseURL.Scheme,
|
||||
Host: baseURL.Host,
|
||||
Path: selection.Find(".authorAllBooks__singleTextTitle").AttrOr("href", ""),
|
||||
}
|
||||
linkStr := link.String()
|
||||
title := strings.TrimSpace(selection.Find(".authorAllBooks__singleTextTitle").Text())
|
||||
items[i] = rssItem{
|
||||
Title: title,
|
||||
Link: linkStr,
|
||||
GUID: linkStr,
|
||||
Description: "",
|
||||
}
|
||||
})
|
||||
return items
|
||||
}
|
||||
|
|
Loading…
Reference in New Issue