Monorepo for Tangled
tangled.org
1package xrpc
2
3import (
4 "context"
5 "crypto/sha256"
6 "fmt"
7 "io"
8 "net/http"
9 "path/filepath"
10 "slices"
11 "strings"
12
13 "github.com/bluesky-social/indigo/atproto/atclient"
14 "github.com/bluesky-social/indigo/atproto/syntax"
15 "github.com/go-git/go-git/v5/plumbing/object"
16 "tangled.org/core/knotmirror/xrpc/gitea"
17)
18
19func (x *Xrpc) GetBlob(w http.ResponseWriter, r *http.Request) {
20 var (
21 repoQuery = r.URL.Query().Get("repo")
22 ref = r.URL.Query().Get("ref") // ref can be empty (git.Open handles this)
23 path = r.URL.Query().Get("path")
24 )
25
26 repo, err := syntax.ParseDID(repoQuery)
27 if err != nil {
28 writeJson(w, http.StatusBadRequest, atclient.ErrorBody{Name: "BadRequest", Message: fmt.Sprintf("repo parameter invalid: %s", repoQuery)})
29 return
30 }
31
32 l := x.logger.With("method", "git.getBlob", "repo", repo, "ref", ref, "path", path)
33 l.Debug("request")
34
35 if path == "" {
36 writeJson(w, http.StatusBadRequest, atclient.ErrorBody{Name: "BadRequest", Message: "missing path parameter"})
37 return
38 }
39
40 ctx := r.Context()
41
42 repoPath, err := x.makeRepoPath(ctx, repo)
43 if err != nil {
44 writeJson(w, http.StatusNotFound, atclient.ErrorBody{Name: "RepoNotFound", Message: fmt.Sprintf("unknown repository: %s", repo)})
45 return
46 }
47
48 entry, err := x.getFile(ctx, repoPath, ref, path)
49 if err != nil {
50 l.Warn("local mirror failed, trying proxy", "err", err)
51 if x.proxyToKnot(w, r, repo) {
52 return
53 }
54 writeJson(w, http.StatusInternalServerError, atclient.ErrorBody{Name: "InternalServerError", Message: "failed to get blob"})
55 return
56 }
57 size, reader, err := gitea.ReadBlob(ctx, repoPath, entry.Hash)
58 if err != nil {
59 l.Warn("local mirror failed, trying proxy", "err", err)
60 if x.proxyToKnot(w, r, repo) {
61 return
62 }
63 writeJson(w, http.StatusInternalServerError, atclient.ErrorBody{Name: "InternalServerError", Message: "failed to get blob"})
64 return
65 }
66 defer reader.Close()
67
68 // default to octet-stream for large blobs
69 if size > 1000*1000 { // 1MB
70 w.Header().Set("Content-Type", "application/octet-stream")
71 if _, err := io.Copy(w, reader); err != nil {
72 l.Error("failed to serve the blob", "err", err)
73 }
74 return
75 }
76
77 contents, err := io.ReadAll(reader)
78 if err != nil {
79 l.Error("failed to read blob content", "err", err)
80 writeJson(w, http.StatusInternalServerError, atclient.ErrorBody{Name: "InternalServerError", Message: "failed to read the blob"})
81 return
82 }
83
84 mimeType := http.DetectContentType(contents)
85 // override MIME types for formats that http.DetectContentType does not recognize
86 switch filepath.Ext(path) {
87 case ".svg":
88 mimeType = "image/svg+xml"
89 case ".avif":
90 mimeType = "image/avif"
91 case ".jxl":
92 mimeType = "image/jxl"
93 case ".heic", ".heif":
94 mimeType = "image/heif"
95 }
96
97 switch {
98 case strings.HasPrefix(mimeType, "image/"), strings.HasPrefix(mimeType, "video/"):
99 eTag := fmt.Sprintf("\"%x\"", sha256.Sum256(contents))
100 if clientETag := r.Header.Get("If-None-Match"); clientETag == eTag {
101 w.WriteHeader(http.StatusNotModified)
102 return
103 }
104 w.Header().Set("ETag", eTag)
105 w.Header().Set("Content-Type", mimeType)
106
107 case strings.HasPrefix(mimeType, "text/") || isTextualMimeType(mimeType):
108 w.Header().Set("Cache-Control", "public, no-cache")
109 // serve all text content as text/plain
110 w.Header().Set("Content-Type", "text/plain; charset=utf-8")
111
112 default:
113 l.Error("attempted to serve disallowed file type", "mimetype", mimeType)
114 writeJson(w, http.StatusInternalServerError, atclient.ErrorBody{Name: "InvalidRequest", Message: "only image, video, and text files can be accessed directly"})
115 return
116 }
117 w.Write(contents)
118}
119
120func (x *Xrpc) getFile(ctx context.Context, repoPath, ref, path string) (*object.TreeEntry, error) {
121 rev := ref
122 if rev == "" {
123 rev = "HEAD"
124 }
125
126 head, err := gitea.GetCommit(ctx, repoPath, rev)
127 if err != nil {
128 return nil, fmt.Errorf("get head commit: %w", err)
129 }
130
131 treePath := filepath.Dir(path)
132 name := filepath.Base(path)
133
134 // find subTree
135 subRev := head.Hash.String() + "^{tree}"
136 if treePath != "." {
137 subRev = head.Hash.String() + ":" + treePath
138 }
139 subTree, err := gitea.GetTree(ctx, repoPath, subRev)
140 if err != nil {
141 return nil, fmt.Errorf("get subtree %s: %w", subRev, err)
142 }
143
144 // find entry
145 entry, err := func(subTree *object.Tree) (*object.TreeEntry, error) {
146 for _, entry := range subTree.Entries {
147 if entry.Name == name {
148 return &entry, nil
149 }
150 }
151 return nil, fmt.Errorf("object doesn't exist")
152 }(subTree)
153 if err != nil {
154 return nil, fmt.Errorf("get file: %w", err)
155 }
156
157 return entry, nil
158}
159
160var textualMimeTypes = []string{
161 "application/json",
162 "application/xml",
163 "application/yaml",
164 "application/x-yaml",
165 "application/toml",
166 "application/javascript",
167 "application/ecmascript",
168}
169
170// isTextualMimeType returns true if the MIME type represents textual content
171// that should be served as text/plain for security reasons
172func isTextualMimeType(mimeType string) bool {
173 return slices.Contains(textualMimeTypes, mimeType)
174}