feat: add endpoint to generate RSS feed

This commit is contained in:
Dawid Wysokiński 2022-05-26 07:48:13 +02:00
parent a6425232ec
commit a77c05bdee
Signed by: Kichiyaki
GPG Key ID: 1ECC5DE481BE5184
3 changed files with 134 additions and 1 deletions

7
go.mod
View File

@ -1,3 +1,10 @@
module github.com/Kichiyaki/lubimyczytacrss
go 1.18
require github.com/PuerkitoBio/goquery v1.8.0
require (
github.com/andybalholm/cascadia v1.3.1 // indirect
golang.org/x/net v0.0.0-20210916014120-12bc252f5db8 // indirect
)

11
go.sum Normal file
View File

@ -0,0 +1,11 @@
github.com/PuerkitoBio/goquery v1.8.0 h1:PJTF7AmFCFKk1N6V6jmKfrNH9tV5pNE6lZMkG0gta/U=
github.com/PuerkitoBio/goquery v1.8.0/go.mod h1:ypIiRMtY7COPGk+I/YbZLbxsxn9g5ejnI2HSMtkjZvI=
github.com/andybalholm/cascadia v1.3.1 h1:nhxRkql1kdYCc8Snf7D5/D3spOX+dBgjA6u8x004T2c=
github.com/andybalholm/cascadia v1.3.1/go.mod h1:R4bJ1UQfqADjvDa4P6HZHLh/3OxWWEqc0Sk8XGwHqvA=
golang.org/x/net v0.0.0-20210916014120-12bc252f5db8 h1:/6y1LfuqNuQdHAm0jjtPtgRcxIxjVZgm5OTu8/QhZvk=
golang.org/x/net v0.0.0-20210916014120-12bc252f5db8/go.mod h1:9nx3DQGgdP8bBQD5qxJ1jj9UTztislL4KSBs9R2vV5Y=
golang.org/x/sys v0.0.0-20201119102817-f84b799fce68/go.mod h1:h1NjWce9XRLGQEsW7wpKNCjG9DtNlClVuFLEZdDNbEs=
golang.org/x/sys v0.0.0-20210423082822-04245dca01da/go.mod h1:h1NjWce9XRLGQEsW7wpKNCjG9DtNlClVuFLEZdDNbEs=
golang.org/x/term v0.0.0-20201126162022-7de9c90e9dd1/go.mod h1:bj7SfCRtBDWHUb9snDiAeCFNEtKQo2Wmx5Cou7ajbmo=
golang.org/x/text v0.3.6/go.mod h1:5Zoc/QRtKVWzQhOtBMvqHzDpF6irO9z98xDceosuGiQ=
golang.org/x/tools v0.0.0-20180917221912-90fa682c2a6e/go.mod h1:n7NCudcB/nEzxVGmLbDWY5pfWTLqBcC2KZ6jyYvM4mQ=

117
main.go
View File

@ -2,17 +2,28 @@ package main
import (
"context"
"encoding/xml"
"fmt"
"github.com/PuerkitoBio/goquery"
"io"
"log"
"net/http"
"net/url"
"os"
"os/signal"
"strings"
"time"
)
const (
lubimyCzytacDomain = "lubimyczytac.pl"
defaultTimeout = 5 * time.Second
)
func main() {
httpSrv := &http.Server{
Addr: ":9234",
Handler: nil,
Handler: newHandler(),
ReadTimeout: 2 * time.Second,
ReadHeaderTimeout: 2 * time.Second,
WriteTimeout: 2 * time.Second,
@ -38,3 +49,107 @@ func main() {
log.Fatalln("httpSrv.Shutdown:", err)
}
}
type rssMain struct {
XMLName xml.Name `xml:"rss"`
Version string `xml:"version,attr"`
Channel rssChannel `xml:"channel"`
}
type rssChannel struct {
XMLName xml.Name `xml:"channel"`
Link string `xml:"link"`
Description string `xml:"description"`
Language string `xml:"language"`
Items []rssItem `xml:"items"`
}
type rssItem struct {
XMLName xml.Name `xml:"item"`
Title string `xml:"title"`
Link string `xml:"link"`
GUID string `xml:"guid"`
Description string `xml:"description"`
}
func newHandler() http.Handler {
client := &http.Client{
Timeout: defaultTimeout,
}
return http.HandlerFunc(func(w http.ResponseWriter, r *http.Request) {
if r.Method != http.MethodGet {
w.WriteHeader(http.StatusMethodNotAllowed)
return
}
req, err := http.NewRequestWithContext(r.Context(), http.MethodGet, r.URL.Path[1:], nil)
if err != nil || req.URL.Host != lubimyCzytacDomain {
w.WriteHeader(http.StatusBadRequest)
return
}
resp, err := client.Do(req)
if err != nil {
w.WriteHeader(http.StatusInternalServerError)
return
}
defer func() {
_ = resp.Body.Close()
}()
p, err := newAuthorPageParser(resp.Body)
if err != nil {
_, _ = io.Copy(io.Discard, resp.Body)
w.WriteHeader(http.StatusBadRequest)
return
}
w.Header().Set("Content-Type", "text/xml; charset=utf-8")
w.WriteHeader(http.StatusOK)
if err := xml.NewEncoder(w).Encode(rssMain{
Version: "2.0",
Channel: rssChannel{
Link: req.URL.String(),
Items: p.items(req.URL),
},
}); err != nil {
w.WriteHeader(http.StatusInternalServerError)
return
}
})
}
type authorPageParser struct {
doc *goquery.Document
}
func newAuthorPageParser(r io.Reader) (authorPageParser, error) {
doc, err := goquery.NewDocumentFromReader(r)
if err != nil {
return authorPageParser{}, fmt.Errorf("goquery.NewDocumentFromReader: %w", err)
}
return authorPageParser{
doc: doc,
}, nil
}
func (p authorPageParser) items(baseURL *url.URL) []rssItem {
booksSel := p.doc.Find("#authorBooks .authorAllBooks__single")
items := make([]rssItem, booksSel.Length())
booksSel.Each(func(i int, selection *goquery.Selection) {
link := url.URL{
Scheme: baseURL.Scheme,
Host: baseURL.Host,
Path: selection.Find(".authorAllBooks__singleTextTitle").AttrOr("href", ""),
}
linkStr := link.String()
title := strings.TrimSpace(selection.Find(".authorAllBooks__singleTextTitle").Text())
items[i] = rssItem{
Title: title,
Link: linkStr,
GUID: linkStr,
Description: "",
}
})
return items
}