commit d3cd46f5c898bc3ed80f6235e9e473330463968c Author: Julian Daube Date: Sun Mar 12 14:47:59 2017 +0100 initial commit diff --git a/cmd/apiserver/main.go b/cmd/apiserver/main.go new file mode 100644 index 0000000..6fcd64c --- /dev/null +++ b/cmd/apiserver/main.go @@ -0,0 +1,258 @@ +package main + +import ( + "encoding/json" + "flag" + "fmt" + "image" + "image/jpeg" + "image/png" + "log" + "net/http" + "net/url" + "strconv" + "strings" + "time" + + cache "github.com/patrickmn/go-cache" + + "go.dedaa.de/julixau/reichelt" +) + +var ( + addr = flag.String("http", ":8080", "The Address to bind to ") +) + +type Handler struct { + *reichelt.Connection + + cache *cache.Cache +} + +func NotFound(resp http.ResponseWriter) { + resp.WriteHeader(http.StatusNotFound) + fmt.Fprint(resp, "404") +} + +func InternalError(resp http.ResponseWriter) { + resp.WriteHeader(http.StatusInternalServerError) + fmt.Fprint(resp, "500") +} + +func (h Handler) Search(resp http.ResponseWriter, path []string) { + if len(path) == 0 { + NotFound(resp) + return + } + log.Println("level 2 request:", path) + + query, err := url.PathUnescape(path[0]) + if err != nil { + log.Println("illegal query:", path[0], ":", err) + NotFound(resp) + return + } + + parts, err := h.FindPart(query) + if err != nil { + log.Println("error retrieving part:", err) + InternalError(resp) + } + + // insert to cache + if h.cache != nil { + for _, p := range parts { + h.cache.Set(strconv.Itoa(p.Number), &p, cache.NoExpiration) + } + } + + encoder := json.NewEncoder(resp) + encoder.Encode(parts) +} + +func (h Handler) Picture(resp http.ResponseWriter, path []string) { + if len(path) == 0 { + NotFound(resp) + return + } + log.Println("level 2 request:", path) + serve := func(img image.Image) { + resp.Header().Set("Content-type", "image/png") + resp.WriteHeader(http.StatusOK) + if err := png.Encode(resp, img); err != nil { + log.Println("could not encode png:", err) + } + } + + number, err := strconv.Atoi(path[0]) + if err != nil { + log.Println("encountered decode error:", err) + NotFound(resp) + } + + if h.cache != nil { + if x, ok := h.cache.Get(path[0] + "-image"); ok { + serve(*(x.(*image.Image))) + return + } + } + + img, err := h.GetImage(reichelt.Part{Number: number}, 99999, 9999) + + if err != nil { + log.Println("error retrieving picture:", err) + InternalError(resp) + return + } + + decodedImg, err := jpeg.Decode(img) + if err != nil { + log.Println("could no decode image:", err) + InternalError(resp) + return + } + if h.cache != nil { + h.cache.Set(path[0]+"-image", &decodedImg, cache.NoExpiration) + } + serve(decodedImg) +} + +func (h Handler) Price(resp http.ResponseWriter, path []string) { + if len(path) == 0 { + NotFound(resp) + return + } + + log.Println("level 2 request:", path) + + number, err := strconv.Atoi(path[0]) + if err != nil { + log.Println("encountered decode error:", err) + NotFound(resp) + } + var price float32 + + if h.cache != nil { + if x, ok := h.cache.Get(path[0] + "-price"); ok { + price = x.(float32) + goto cached + } + } + + price = h.GetPrice(reichelt.Part{Number: number}) + if h.cache != nil { + h.cache.Set(path[0]+"-price", price, time.Second*30) + } + +cached: + encoder := json.NewEncoder(resp) + encoder.Encode(price) +} + +func (h Handler) Meta(resp http.ResponseWriter, path []string) { + if len(path) == 0 { + NotFound(resp) + return + } + + log.Println("level 2 request:", path) + + number, err := strconv.Atoi(path[0]) + if err != nil { + log.Println("encountered decode error:", err) + NotFound(resp) + } + + // implement caching to avoid many queries to reichelt server + var meta reichelt.Meta + if h.cache != nil { + if x, ok := h.cache.Get(path[0] + "-meta"); ok { + meta = x.(reichelt.Meta) + goto cached + } + } + + meta, err = h.GetMeta(reichelt.Part{Number: number}) + if err != nil { + log.Println("encountered error:", err) + InternalError(resp) + } + if h.cache != nil { + h.cache.Set(path[0]+"-meta", meta, cache.NoExpiration) + } + +cached: + encoder := json.NewEncoder(resp) + + if len(path) > 1 { + if strings.ToLower(path[1]) == "overview" { + var headlines []string + for k, _ := range meta { + headlines = append(headlines, k) + } + encoder.Encode(headlines) + return + } + + if query, err := url.QueryUnescape(path[1]); err != nil { + NotFound(resp) + log.Println("illegal query:", path[1]) + } else { + // query is more concrete + subset, ok := meta[query] + if !ok { + NotFound(resp) + return + } + encoder.Encode(subset) + } + } else { + encoder.Encode(meta) + } + +} + +func (h Handler) ServeHTTP(resp http.ResponseWriter, req *http.Request) { + // find out whether there was URL encoded data in the query + path := req.URL.RawPath + if path == "" { + path = req.URL.Path + } + p := strings.Split(path, "/") + if len(p) < 2 { + NotFound(resp) + return + } + p = p[1:] + log.Println("level 1 request:", p) + + switch p[0] { + case "search": + h.Search(resp, p[1:]) + case "image": + h.Picture(resp, p[1:]) + case "price": + h.Price(resp, p[1:]) + case "meta": + h.Meta(resp, p[1:]) + default: + NotFound(resp) + } +} + +// a Simple request server +// exposing a simple api +// to search and retrieve +// - price +// - productimage +// for a product +func main() { + flag.Parse() + conn, err := reichelt.NewConnection() + if err != nil { + log.Fatal("could not create connection to reichelt:", err) + } + + log.Println("start serving on:", *addr) + log.Fatal(http.ListenAndServe(*addr, Handler{conn, cache.New(cache.NoExpiration, 0)})) +} diff --git a/connection.go b/connection.go new file mode 100644 index 0000000..51d4b72 --- /dev/null +++ b/connection.go @@ -0,0 +1,40 @@ +package reichelt + +import ( + "fmt" + "net/http" + "net/http/cookiejar" +) + +type Connection struct { + client http.Client + + queryCount int +} + +func NewConnection() (c *Connection, err error) { + jar, err := cookiejar.New(nil) + + if err != nil { + return nil, err + } + + c = &Connection{ + client: http.Client{ + Jar: jar, + }, + } + + // get reichelt SID cookie set + resp, err := c.client.Get(apiurl) + if err != nil { + return nil, err + } + defer resp.Body.Close() + + if resp.StatusCode != http.StatusOK { + return nil, fmt.Errorf("Wrong Status response: %d(%s)", resp.StatusCode, resp.Status) + } + + return c, nil +} diff --git a/connection_test.go b/connection_test.go new file mode 100644 index 0000000..f7de20a --- /dev/null +++ b/connection_test.go @@ -0,0 +1,74 @@ +package reichelt + +import ( + "image/jpeg" + "log" + "net/url" + "testing" +) + +var conn *Connection + +func SetupConnection(t *testing.T) { + c, err := NewConnection() + if err != nil { + t.Fatal("create Connection:", err) + } + + // look for cookie + url, _ := url.Parse(apiurl) + cookies := c.Jar.Cookies(url) + found := false + for _, k := range cookies { + t.Log("cookie set:", k) + if k.Name == "Reichelt_SID" { + found = true + break + } + } + + if !found { + log.Fatal("connection did not get cookie") + } + + t.Log("connection created successfully") + conn = c +} + +func TestPart(t *testing.T) { + t.Run("createConnection", SetupConnection) + + // get part + parts, err := conn.FindPart("1N4001") + if err != nil { + t.Fatal("find part:", err) + } + + if len(parts) == 0 { + log.Fatal("not enough parts were retrieved") + } + + t.Log(parts) + + // get prices + p := conn.GetPrice(parts[0]) + if p == 0 { + t.Fatal("get Price") + } + + t.Log(parts[0], ":", p) + + // get image for part + imgReader, err := conn.GetImage(parts[0], 1000, 1000) + if err != nil { + t.Fatal("get product image:", err) + } + defer imgReader.Close() + img, err := jpeg.Decode(imgReader) + + if err != nil { + t.Fatal("jpg decode:", err) + } + + t.Log("image size:", img.Bounds()) +} diff --git a/meta.go b/meta.go new file mode 100644 index 0000000..d128b92 --- /dev/null +++ b/meta.go @@ -0,0 +1,69 @@ +package reichelt + +import ( + "strconv" + "strings" + + "github.com/andybalholm/cascadia" + + "golang.org/x/net/html" +) + +type Meta map[string]map[string]string + +var ( + metaSelector = cascadia.MustCompile(".av_propview") + metaItemNameSelector = cascadia.MustCompile(".av_propname") + metaItemValueSelector = cascadia.MustCompile(".av_propvalue") +) + +// Get Metadata connected to specified part +func (c *Connection) GetMeta(p Part) (Meta, error) { + resp, err := c.client.Get("https://www.reichelt.de/index.html?ACTION=3&ARTICLE=" + strconv.Itoa(p.Number)) + + if err != nil { + return nil, err + } + defer resp.Body.Close() + + doc, err := html.Parse(resp.Body) + + if err != nil { + return nil, err + } + + nodes := metaSelector.MatchAll(doc) + if nodes == nil { + return nil, nil + } + + result := make(Meta) + + for _, n := range nodes { + if n.FirstChild == nil || n.FirstChild.FirstChild == nil { + continue + } + + headline := n.FirstChild.FirstChild.Data + data := make(map[string]string) + + names := metaItemNameSelector.MatchAll(n) + values := metaItemValueSelector.MatchAll(n) + + if len(names) != len(values) { + continue + } + + for i := range names { + if names[i].FirstChild == nil || values[i].FirstChild == nil { + continue + } + + data[names[i].FirstChild.Data] = strings.Trim(values[i].FirstChild.Data, " ") + } + + result[headline] = data + } + + return result, nil +} diff --git a/part.go b/part.go new file mode 100644 index 0000000..24b333f --- /dev/null +++ b/part.go @@ -0,0 +1,110 @@ +package reichelt + +import ( + "encoding/json" + "fmt" + "net/http" + "net/url" + "strconv" + "strings" + + "github.com/andybalholm/cascadia" + + "golang.org/x/net/html" +) + +type Part struct { + Number int `json:"article_artid"` + + Description string `json:"article_lang_besch"` +} + +const apiurl = "https://www.reichelt.de/index.html" + +type ResponseField struct { + NumFound int `json:"numFound"` + MaxScore float32 `json:"maxScore"` + + Docs []Part `json:"docs"` +} + +type SearchResponse struct { + Response ResponseField `json:"response"` +} + +var ( + priceSelector = cascadia.MustCompile("#av_price") +) + +// Search for a part like using the sites search engine +// can be used to resolv partnumbers to internal ones +func (c *Connection) FindPart(query string) ([]Part, error) { + resp, err := c.client.Get(apiurl + "?ACTION=514&id=8&term=" + url.PathEscape(query)) + c.queryCount++ + + if err != nil { + return nil, err + } + + defer resp.Body.Close() + if resp.StatusCode != http.StatusOK { + return nil, fmt.Errorf("wrong response status: %d", resp.StatusCode) + } + + reader := json.NewDecoder(resp.Body) + response := SearchResponse{} + if err = reader.Decode(&response); err != nil { + return nil, err + } + + return response.Response.Docs, nil +} + +// Returns the Price of the Part +// or 0 if there was an error +func (c *Connection) GetPrice(p Part) float32 { + resp, err := c.client.Get(apiurl + "?ACTION=3&ARTICLE=" + strconv.Itoa(p.Number)) + + if err != nil { + // log.Println("price:", "get request:", err) + return 0 + } + + defer resp.Body.Close() + + if resp.StatusCode != http.StatusOK { + // log.Println("price:", "wrong result:", resp.Status) + return 0 + } + + doc, err := html.Parse(resp.Body) + if err != nil { + // log.Println("price:", "parse html:", err) + return 0 + } + priceTag := priceSelector.MatchFirst(doc) + + if priceTag == nil { + // log.Println("price:", "selector returned nothing") + return 0 + } + + // retrieve first child of node + // since inner Text is saved as child node + // NOTE: This might be the wrong node, but if it is, + // code below this one will fail anyway, so we dont check + // the node type here + price := priceTag.FirstChild.Data + + // split before € sign + i := strings.Index(price, " €") + if i == len(price) || len(price) == 0 { + return 0 + } + + // need to convert german decimals (using ,) to american decimals + // using . + str := strings.Replace(price[:i-1], ",", ".", 1) + ret, _ := strconv.ParseFloat(str, 32) + return float32(ret) +} diff --git a/picture.go b/picture.go new file mode 100644 index 0000000..1a2f440 --- /dev/null +++ b/picture.go @@ -0,0 +1,18 @@ +package reichelt + +import ( + "io" + "strconv" +) + +// gets the product image of a reichelt article using the internal Part number +// the reader will return a image/jpg file +func (c *Connection) GetImage(p Part, w, h uint) (io.ReadCloser, error) { + resp, err := c.client.Get("https://www.reichelt.de/artimage/resize_" + strconv.Itoa(int(w)) + "x" + strconv.Itoa(int(h)) + "/" + strconv.Itoa(p.Number)) + + if err != nil { + return nil, err + } + + return resp.Body, nil +}