feat: add endpoint to generate RSS feed (author) (#2)

This commit is contained in:
Dawid Wysokiński 2022-05-29 13:01:07 +02:00 committed by GitHub
parent a6425232ec
commit 97047eec2b
No known key found for this signature in database
GPG Key ID: 4AEE18F83AFDEB23
9 changed files with 431 additions and 2 deletions

View File

@ -32,6 +32,9 @@ linters:
linters-settings:
lll:
line-length: 150
misspell:
ignore-words:
- autor
issues:
exclude-rules:

14
go.mod
View File

@ -1,3 +1,17 @@
module github.com/Kichiyaki/lubimyczytacrss
go 1.18
require (
github.com/PuerkitoBio/goquery v1.8.0
github.com/go-chi/chi/v5 v5.0.7
github.com/stretchr/testify v1.7.1
)
require (
github.com/andybalholm/cascadia v1.3.1 // indirect
github.com/davecgh/go-spew v1.1.0 // indirect
github.com/pmezard/go-difflib v1.0.0 // indirect
golang.org/x/net v0.0.0-20210916014120-12bc252f5db8 // indirect
gopkg.in/yaml.v3 v3.0.0-20200313102051-9f266ea9e77c // indirect
)

24
go.sum Normal file
View File

@ -0,0 +1,24 @@
github.com/PuerkitoBio/goquery v1.8.0 h1:PJTF7AmFCFKk1N6V6jmKfrNH9tV5pNE6lZMkG0gta/U=
github.com/PuerkitoBio/goquery v1.8.0/go.mod h1:ypIiRMtY7COPGk+I/YbZLbxsxn9g5ejnI2HSMtkjZvI=
github.com/andybalholm/cascadia v1.3.1 h1:nhxRkql1kdYCc8Snf7D5/D3spOX+dBgjA6u8x004T2c=
github.com/andybalholm/cascadia v1.3.1/go.mod h1:R4bJ1UQfqADjvDa4P6HZHLh/3OxWWEqc0Sk8XGwHqvA=
github.com/davecgh/go-spew v1.1.0 h1:ZDRjVQ15GmhC3fiQ8ni8+OwkZQO4DARzQgrnXU1Liz8=
github.com/davecgh/go-spew v1.1.0/go.mod h1:J7Y8YcW2NihsgmVo/mv3lAwl/skON4iLHjSsI+c5H38=
github.com/go-chi/chi/v5 v5.0.7 h1:rDTPXLDHGATaeHvVlLcR4Qe0zftYethFucbjVQ1PxU8=
github.com/go-chi/chi/v5 v5.0.7/go.mod h1:DslCQbL2OYiznFReuXYUmQ2hGd1aDpCnlMNITLSKoi8=
github.com/pmezard/go-difflib v1.0.0 h1:4DBwDE0NGyQoBHbLQYPwSUPoCMWR5BEzIk/f1lZbAQM=
github.com/pmezard/go-difflib v1.0.0/go.mod h1:iKH77koFhYxTK1pcRnkKkqfTogsbg7gZNVY4sRDYZ/4=
github.com/stretchr/objx v0.1.0/go.mod h1:HFkY916IF+rwdDfMAkV7OtwuqBVzrE8GR6GFx+wExME=
github.com/stretchr/testify v1.7.1 h1:5TQK59W5E3v0r2duFAb7P95B6hEeOyEnHRa8MjYSMTY=
github.com/stretchr/testify v1.7.1/go.mod h1:6Fq8oRcR53rry900zMqJjRRixrwX3KX962/h/Wwjteg=
golang.org/x/net v0.0.0-20210916014120-12bc252f5db8 h1:/6y1LfuqNuQdHAm0jjtPtgRcxIxjVZgm5OTu8/QhZvk=
golang.org/x/net v0.0.0-20210916014120-12bc252f5db8/go.mod h1:9nx3DQGgdP8bBQD5qxJ1jj9UTztislL4KSBs9R2vV5Y=
golang.org/x/sys v0.0.0-20201119102817-f84b799fce68/go.mod h1:h1NjWce9XRLGQEsW7wpKNCjG9DtNlClVuFLEZdDNbEs=
golang.org/x/sys v0.0.0-20210423082822-04245dca01da/go.mod h1:h1NjWce9XRLGQEsW7wpKNCjG9DtNlClVuFLEZdDNbEs=
golang.org/x/term v0.0.0-20201126162022-7de9c90e9dd1/go.mod h1:bj7SfCRtBDWHUb9snDiAeCFNEtKQo2Wmx5Cou7ajbmo=
golang.org/x/text v0.3.6/go.mod h1:5Zoc/QRtKVWzQhOtBMvqHzDpF6irO9z98xDceosuGiQ=
golang.org/x/tools v0.0.0-20180917221912-90fa682c2a6e/go.mod h1:n7NCudcB/nEzxVGmLbDWY5pfWTLqBcC2KZ6jyYvM4mQ=
gopkg.in/check.v1 v0.0.0-20161208181325-20d25e280405 h1:yhCVgyC4o1eVCa2tZl7eS0r+SDo693bJlVdllGtEeKM=
gopkg.in/check.v1 v0.0.0-20161208181325-20d25e280405/go.mod h1:Co6ibVJAznAaIkqp8huTwlJQCZ016jof/cbN4VW5Yz0=
gopkg.in/yaml.v3 v3.0.0-20200313102051-9f266ea9e77c h1:dUUwHk2QECo/6vqA44rthZ8ie2QXMNeKRTHCNY2nXvo=
gopkg.in/yaml.v3 v3.0.0-20200313102051-9f266ea9e77c/go.mod h1:K4uyk7z7BCEPqu6E+C64Yfv1cQ7kz7rIZviUmN+EgEM=

View File

@ -0,0 +1,146 @@
package lubimyczytac
import (
"context"
"errors"
"fmt"
"io"
"net/http"
"net/url"
"strings"
"github.com/PuerkitoBio/goquery"
)
const (
defaultBaseURL = "https://lubimyczytac.pl"
)
var (
ErrAuthorNotFound = errors.New("author not found")
ErrUnexpectedStatus = errors.New("unexpected http status was returned by lubimyczytac.pl")
)
type Book struct {
Title string
URL string
}
type Author struct {
ID string
Name string
ShortDescription string
URL string
Books []Book
}
type Client struct {
http *http.Client
baseURL string
}
type ClientOption func(c *Client)
func WithBaseURL(baseURL string) ClientOption {
return func(c *Client) {
c.baseURL = baseURL
}
}
func NewClient(client *http.Client, opts ...ClientOption) *Client {
c := &Client{http: client, baseURL: defaultBaseURL}
for _, opt := range opts {
opt(c)
}
return c
}
func (c *Client) GetAuthor(ctx context.Context, id string) (Author, error) {
req, err := http.NewRequestWithContext(
ctx,
http.MethodGet,
fmt.Sprintf("%s/autor/%s/x", c.baseURL, id),
nil,
)
if err != nil {
return Author{}, fmt.Errorf("http.NewRequestWithContext: %w", err)
}
resp, err := c.http.Do(req)
if err != nil {
return Author{}, fmt.Errorf("httpClient.Do: %w", err)
}
defer func() {
_ = resp.Body.Close()
}()
if resp.StatusCode != http.StatusOK {
_, _ = io.Copy(io.Discard, resp.Body)
if resp.StatusCode == http.StatusNotFound {
return Author{}, ErrAuthorNotFound
}
return Author{}, ErrUnexpectedStatus
}
p, err := newAuthorPageParser(resp.Body)
if err != nil {
_, _ = io.Copy(io.Discard, resp.Body)
return Author{}, fmt.Errorf("newAuthorPageParser: %w", err)
}
return Author{
ID: id,
Name: p.name(),
ShortDescription: p.shortDescription(),
URL: p.url(),
Books: p.books(),
}, nil
}
type authorPageParser struct {
doc *goquery.Document
baseURL *url.URL
}
func newAuthorPageParser(r io.Reader) (authorPageParser, error) {
doc, err := goquery.NewDocumentFromReader(r)
if err != nil {
return authorPageParser{}, fmt.Errorf("goquery.NewDocumentFromReader: %w", err)
}
baseURL, err := url.Parse(doc.Find("head base").AttrOr("href", ""))
if err != nil {
return authorPageParser{}, fmt.Errorf("url.Parse: %w", err)
}
return authorPageParser{
doc: doc,
baseURL: baseURL,
}, nil
}
func (p authorPageParser) name() string {
return strings.TrimSpace(p.doc.Find("#author-info .title-container").Text())
}
func (p authorPageParser) url() string {
return strings.TrimSpace(p.doc.Find(`meta[property="og:url"]`).AttrOr("content", ""))
}
func (p authorPageParser) shortDescription() string {
return strings.TrimSpace(p.doc.Find(`meta[name="description"]`).AttrOr("content", ""))
}
func (p authorPageParser) books() []Book {
booksSel := p.doc.Find("#authorBooks .authorAllBooks__single")
books := make([]Book, booksSel.Length())
booksSel.Each(func(i int, selection *goquery.Selection) {
bookUrl := url.URL{
Scheme: p.baseURL.Scheme,
Host: p.baseURL.Host,
Path: selection.Find(".authorAllBooks__singleTextTitle").AttrOr("href", ""),
}
books[i] = Book{
Title: strings.TrimSpace(selection.Find(".authorAllBooks__singleTextTitle").Text()),
URL: bookUrl.String(),
}
})
return books
}

View File

@ -0,0 +1,48 @@
package lubimyczytac_test
import (
"context"
"fmt"
"net/http"
"net/http/httptest"
"testing"
"github.com/Kichiyaki/lubimyczytacrss/internal/lubimyczytac/testdata"
"github.com/stretchr/testify/assert"
"github.com/Kichiyaki/lubimyczytacrss/internal/lubimyczytac"
)
func TestClient_GetAuthor(t *testing.T) {
t.Parallel()
t.Run("OK", func(t *testing.T) {
t.Parallel()
for _, author := range testdata.Authors {
author := author
t.Run("authorID="+author.ID, func(t *testing.T) {
t.Parallel()
srv := httptest.NewServer(http.HandlerFunc(func(w http.ResponseWriter, r *http.Request) {
if r.URL.Path != fmt.Sprintf("/autor/%s/x", author.ID) || r.Method != http.MethodGet {
w.WriteHeader(http.StatusBadRequest)
return
}
w.WriteHeader(http.StatusOK)
_, _ = w.Write(author.HTML)
}))
defer srv.Close()
result, err := lubimyczytac.
NewClient(srv.Client(), lubimyczytac.WithBaseURL(srv.URL)).
GetAuthor(context.Background(), author.ID)
assert.NoError(t, err)
assert.Equal(t, author.Author, result)
})
}
})
}

File diff suppressed because one or more lines are too long

File diff suppressed because one or more lines are too long

View File

@ -0,0 +1,83 @@
package testdata
import (
_ "embed"
"github.com/Kichiyaki/lubimyczytacrss/internal/lubimyczytac"
)
//go:embed remigiusz_mroz.html
var remigiuszMrozHTML []byte
//go:embed john_flanagan.html
var johnFlanaganHTML []byte
type AuthorWithHTML struct {
lubimyczytac.Author
HTML []byte
}
var Authors = []AuthorWithHTML{
{
Author: lubimyczytac.Author{
ID: "82094",
Name: "Remigiusz Mróz",
ShortDescription: "Polski pisarz, autor powieści kryminalnych oraz cyklu publicystycznego „Kurs pisania”.Ukończył z wyróżnieniem Akademię Leona Koźmińskiego w Warszawie, gd...",
URL: "https://lubimyczytac.pl/autor/82094/remigiusz-mroz",
Books: []lubimyczytac.Book{
{Title: "Obrazy z przeszłości", URL: "https://lubimyczytac.pl/ksiazka/5016998/obrazy-z-przeszlosci"},
{Title: "Skazanie", URL: "https://lubimyczytac.pl/ksiazka/5009453/skazanie"},
{Title: "Behawiorysta", URL: "https://lubimyczytac.pl/ksiazka/5006528/behawiorysta"},
{Title: "Projekt Riese", URL: "https://lubimyczytac.pl/ksiazka/4998407/projekt-riese"},
{Title: "Immunitet", URL: "https://lubimyczytac.pl/ksiazka/4990168/immunitet"},
{Title: "Przepaść", URL: "https://lubimyczytac.pl/ksiazka/4988766/przepasc"},
{Title: "Egzekucja", URL: "https://lubimyczytac.pl/ksiazka/4983192/egzekucja"},
{Title: "Wybaczam ci", URL: "https://lubimyczytac.pl/ksiazka/4975543/wybaczam-ci"},
{Title: "Inwigilacja", URL: "https://lubimyczytac.pl/ksiazka/4968996/inwigilacja"},
{Title: "Ekstremista", URL: "https://lubimyczytac.pl/ksiazka/4968712/ekstremista"},
{Title: "Afekt", URL: "https://lubimyczytac.pl/ksiazka/4962195/afekt"},
{Title: "Głębia osobliwości cz. 2", URL: "https://lubimyczytac.pl/ksiazka/5009924/glebia-osobliwosci-cz-2"},
{Title: "Szepty spoza nicości", URL: "https://lubimyczytac.pl/ksiazka/4955433/szepty-spoza-nicosci"},
{Title: "W cieniu prawa", URL: "https://lubimyczytac.pl/ksiazka/4947287/w-cieniu-prawa"},
{Title: "Księgarenka przy ulicy Wiśniowej", URL: "https://lubimyczytac.pl/ksiazka/4944645/ksiegarenka-przy-ulicy-wisniowej"},
{Title: "Rewizja", URL: "https://lubimyczytac.pl/ksiazka/4943912/rewizja"},
{Title: "Halny", URL: "https://lubimyczytac.pl/ksiazka/4943948/halny"},
{Title: "Precedens", URL: "https://lubimyczytac.pl/ksiazka/4939105/precedens"},
{Title: "Osiedle RZNiW", URL: "https://lubimyczytac.pl/ksiazka/4932748/osiedle-rzniw"},
{Title: "Lot 202", URL: "https://lubimyczytac.pl/ksiazka/4926011/lot-202"},
},
},
HTML: remigiuszMrozHTML,
},
{
Author: lubimyczytac.Author{
ID: "19013",
Name: "John Flanagan",
ShortDescription: "John Flanagan Urodzony i wychowany w Sydney w Australii, John Flanagan od dzieciństwa marzył o tym, by zostać pisarzem. Nie było łatwo. Pracował w agencji r...",
URL: "https://lubimyczytac.pl/autor/19013/john-flanagan",
Books: []lubimyczytac.Book{
{Title: "Morska pogoń", URL: "https://lubimyczytac.pl/ksiazka/5016250/morska-pogon"},
{Title: "Ruiny Gorlanu", URL: "https://lubimyczytac.pl/ksiazka/4989803/ruiny-gorlanu"},
{Title: "Ziemia skuta lodem", URL: "https://lubimyczytac.pl/ksiazka/4989807/ziemia-skuta-lodem"},
{Title: "Bitwa o Skandię", URL: "https://lubimyczytac.pl/ksiazka/4989809/bitwa-o-skandie"},
{Title: "Ucieczka z zamku Falaise", URL: "https://lubimyczytac.pl/ksiazka/4984975/ucieczka-z-zamku-falaise"},
{Title: "Płonący most", URL: "https://lubimyczytac.pl/ksiazka/4989806/plonacy-most"},
{Title: "Zaginiony książę", URL: "https://lubimyczytac.pl/ksiazka/4935503/zaginiony-ksiaze"},
{Title: "Powrót Temudżeinów", URL: "https://lubimyczytac.pl/ksiazka/4897979/powrot-temudzeinow"},
{Title: "Pojedynek w Araluenie", URL: "https://lubimyczytac.pl/ksiazka/4861058/pojedynek-w-araluenie"},
{Title: "Klan Czerwonego Lisa", URL: "https://lubimyczytac.pl/ksiazka/4851290/klan-czerwonego-lisa"},
{Title: "Kaldera", URL: "https://lubimyczytac.pl/ksiazka/4811916/kaldera"},
{Title: "Bitwa na Wrzosowiskach", URL: "https://lubimyczytac.pl/ksiazka/3874675/bitwa-na-wrzosowiskach"},
{Title: "Nieznany ląd", URL: "https://lubimyczytac.pl/ksiazka/303434/nieznany-lad"},
{Title: "Turniej w Gorlanie", URL: "https://lubimyczytac.pl/ksiazka/266058/turniej-w-gorlanie"},
{Title: "Góra Skorpiona", URL: "https://lubimyczytac.pl/ksiazka/230915/gora-skorpiona"},
{Title: "Niewolnicy z Socorro", URL: "https://lubimyczytac.pl/ksiazka/220609/niewolnicy-z-socorro"},
{Title: "Królewski zwiadowca", URL: "https://lubimyczytac.pl/ksiazka/192775/krolewski-zwiadowca"},
{Title: "Pościg", URL: "https://lubimyczytac.pl/ksiazka/167327/poscig"},
{Title: "Najeźdźcy", URL: "https://lubimyczytac.pl/ksiazka/144610/najezdzcy"},
{Title: "Zaginione historie", URL: "https://lubimyczytac.pl/ksiazka/131440/zaginione-historie"},
},
},
HTML: johnFlanaganHTML,
},
}

113
main.go
View File

@ -2,17 +2,42 @@ package main
import (
"context"
"encoding/xml"
"log"
"net/http"
"os"
"os/signal"
"time"
"github.com/go-chi/chi/v5/middleware"
"github.com/go-chi/chi/v5"
"github.com/Kichiyaki/lubimyczytacrss/internal/lubimyczytac"
)
const (
defaultClientTimeout = 5 * time.Second
)
func main() {
r := chi.NewRouter()
r.Use(
middleware.RealIP,
middleware.RequestLogger(&middleware.DefaultLogFormatter{
NoColor: true,
Logger: log.Default(),
}),
middleware.Recoverer,
middleware.Heartbeat("/health"),
)
newHandler(lubimyczytac.NewClient(&http.Client{
Timeout: defaultClientTimeout,
})).register(r)
httpSrv := &http.Server{
Addr: ":9234",
Handler: nil,
Handler: r,
ReadTimeout: 2 * time.Second,
ReadHeaderTimeout: 2 * time.Second,
WriteTimeout: 2 * time.Second,
@ -35,6 +60,90 @@ func main() {
ctxShutdown, cancelCtxShutdown := context.WithTimeout(context.Background(), 10*time.Second)
defer cancelCtxShutdown()
if err := httpSrv.Shutdown(ctxShutdown); err != nil {
log.Fatalln("httpSrv.Shutdown:", err)
log.Println("httpSrv.Shutdown:", err)
}
}
type rssItem struct {
XMLName xml.Name `xml:"item"`
Title string `xml:"title"`
Link string `xml:"link"`
GUID string `xml:"guid"`
Description string `xml:"description"`
}
func rssItemsFromBooks(books []lubimyczytac.Book) []rssItem {
items := make([]rssItem, len(books))
for i, b := range books {
items[i] = rssItem{
Title: b.Title,
Link: b.URL,
GUID: b.URL,
Description: "",
}
}
return items
}
type rssChannel struct {
XMLName xml.Name `xml:"channel"`
Title string `xml:"title"`
Link string `xml:"link"`
Description string `xml:"description"`
Language string `xml:"language"`
Items []rssItem `xml:"items"`
}
func rssChannelFromAuthor(author lubimyczytac.Author) rssChannel {
return rssChannel{
Title: author.Name,
Description: author.ShortDescription,
Link: author.URL,
Items: rssItemsFromBooks(author.Books),
}
}
type rssMain struct {
XMLName xml.Name `xml:"rss"`
Version string `xml:"version,attr"`
Channel rssChannel `xml:"channel"`
}
func rssMainFromAuthor(author lubimyczytac.Author) rssMain {
return rssMain{
Version: "2.0",
Channel: rssChannelFromAuthor(author),
}
}
type handler struct {
client *lubimyczytac.Client
}
func newHandler(client *lubimyczytac.Client) *handler {
return &handler{client: client}
}
func (h *handler) register(r chi.Router) {
r.Get("/api/v1/rss/author/{authorID}", h.getRSSAuthor)
}
func (h *handler) getRSSAuthor(w http.ResponseWriter, r *http.Request) {
ctx := r.Context()
author, err := h.client.GetAuthor(ctx, chi.URLParamFromCtx(ctx, "authorID"))
if err == lubimyczytac.ErrAuthorNotFound {
w.WriteHeader(http.StatusNotFound)
_, _ = w.Write([]byte(`author not found`))
return
}
if err != nil {
w.WriteHeader(http.StatusInternalServerError)
_, _ = w.Write([]byte(`something went wrong while getting author info: ` + err.Error()))
return
}
w.Header().Set("Content-Type", "text/xml; charset=utf-8")
w.WriteHeader(http.StatusOK)
_ = xml.NewEncoder(w).Encode(rssMainFromAuthor(author))
}