pax_global_header00006660000000000000000000000064132735356500014523gustar00rootroot0000000000000052 comment=e2ffdb16a802fe2bb95e2e35ff34f0e53aeef34f text-0.1.0/000077500000000000000000000000001327353565000125055ustar00rootroot00000000000000text-0.1.0/License000066400000000000000000000020341327353565000140110ustar00rootroot00000000000000Copyright 2012 Keith Rarick Permission is hereby granted, free of charge, to any person obtaining a copy of this software and associated documentation files (the "Software"), to deal in the Software without restriction, including without limitation the rights to use, copy, modify, merge, publish, distribute, sublicense, and/or sell copies of the Software, and to permit persons to whom the Software is furnished to do so, subject to the following conditions: The above copyright notice and this permission notice shall be included in all copies or substantial portions of the Software. THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. text-0.1.0/Readme000066400000000000000000000001771327353565000136320ustar00rootroot00000000000000This is a Go package for manipulating paragraphs of text. See http://go.pkgdoc.org/github.com/kr/text for full documentation. text-0.1.0/cmd/000077500000000000000000000000001327353565000132505ustar00rootroot00000000000000text-0.1.0/cmd/agg/000077500000000000000000000000001327353565000140065ustar00rootroot00000000000000text-0.1.0/cmd/agg/doc.go000066400000000000000000000045071327353565000151100ustar00rootroot00000000000000/* Agg computes aggregate values over tabular text. It behaves somewhat like the SQL “GROUP BY” clause. Usage: agg [function...] It reads input from stdin as a sequence of records, one per line. It treats each line as a set of fields separated by white space. One field (the first, by default) is designated as the key. Successive lines with equal keys are grouped into a group, and agg produces one line of output for each group. (Note that only contiguous input lines can form a group. If you need to make sure that all records for a given key are grouped together, sort the input first.) For each remaining field, agg applies a function to all the values in the group, producing a single output value. The command line arguments specify which functions to use, one per field in the input table. Functions The available functions are: key group by this field (default for field 1) first value from first line of group (default for rest) last value from last line of group sample value from any line of group, uniformly at random prefix longest common string prefix join:sep concatenate strings with given sep smin lexically least string smax lexically greatest string min numerically least value max numerically greatest value sum numeric sum mean arithmetic mean count number of records (ignores input value) const:val print val, ignoring input drop omit the column entirely The numeric functions skip items that don't parse as numbers. Examples Using the following input: $ cat >input -rwx alice 100 /home/alice/bin/crdt -rw- alice 210002 /home/alice/thesis.tex -rw- bob 10051 /home/bob/expenses.tab -rwx kr 862060 /home/kr/bin/blog -rwx kr 304608 /home/kr/bin/agg Disk usage for each user, plus where that disk usage occurs (longest common prefix of filesystem paths): $ agg = 0 { sym, argmap[i] = sym[:p], sym[p+1:] } if sym == "key" { key, sym = i, "first" } f, ok := symtab[sym] if !ok { log.Fatalf("bad function: %q", sym) } funcmap[i] = f } sc := bufio.NewScanner(os.Stdin) var g *group for sc.Scan() { ss := strings.Fields(sc.Text()) if !matches(g, ss) { emit(g) g = &group{key: ss[key]} } mergeLine(g, ss) } emit(g) } type group struct { key string agg []agg } func matches(g *group, ss []string) bool { return g != nil && g.key == ss[key] } func emit(g *group) { if g == nil { return } rest := false for i, a := range g.agg { if f, ok := funcmap[i]; ok && f == nil { continue } if rest { fmt.Print("\t") } rest = true fmt.Print(a) } fmt.Println() } func mergeLine(g *group, ss []string) { for i, s := range ss { if i >= len(g.agg) { f := funcmap[i] if f == nil { f = first } g.agg = append(g.agg, f(s, argmap[i])) } else { g.agg[i].merge(s) } } } text-0.1.0/cmd/agg/num.go000066400000000000000000000033231327353565000151350ustar00rootroot00000000000000package main import ( "math/big" "strconv" ) func min(s, arg string) agg { return newBinop(s, opmin) } func max(s, arg string) agg { return newBinop(s, opmax) } func sum(s, arg string) agg { return newBinop(s, opsum) } type binop struct { v *big.Float f func(a, b *big.Float) *big.Float } func newBinop(s string, f func(a, b *big.Float) *big.Float) *binop { v, _ := parseFloat(s) return &binop{v, f} } func (o *binop) String() string { if o.v == nil { return "NaN" } return o.v.Text('f', -1) } func (o *binop) merge(s string) { v, ok := parseFloat(s) if !ok { return } o.v = o.f(o.v, v) } func opmin(a, b *big.Float) *big.Float { if a != nil && (b == nil || a.Cmp(b) <= 0) { return a } return b } func opmax(a, b *big.Float) *big.Float { if a != nil && (b == nil || a.Cmp(b) >= 0) { return a } return b } func opsum(a, b *big.Float) *big.Float { if a == nil { return b } else if b == nil { return a } return a.Add(a, b) } type meanagg struct { v *big.Float d float64 // actually an integer } func mean(s, arg string) agg { v, ok := parseFloat(s) if !ok { return &meanagg{new(big.Float), 0} } return &meanagg{v, 1} } func (m *meanagg) String() string { if m.d == 0 { return "NaN" } v := new(big.Float).Quo(m.v, big.NewFloat(m.d)) return v.Text('f', -1) } func (m *meanagg) merge(s string) { v, ok := parseFloat(s) if !ok { return } m.v.Add(m.v, v) m.d++ } func parseFloat(s string) (*big.Float, bool) { v, _, err := big.ParseFloat(s, 0, 1000, big.ToNearestEven) return v, err == nil } type counter int func count(init, arg string) agg { return new(counter) } func (c *counter) String() string { return strconv.Itoa(int(*c) + 1) } func (c *counter) merge(string) { *c++ } text-0.1.0/cmd/agg/string.go000066400000000000000000000030351327353565000156440ustar00rootroot00000000000000package main import ( "math/rand" "strings" ) func first(s, arg string) agg { return &sbinop{s, opfirst} } func last(s, arg string) agg { return &sbinop{s, oplast} } func prefix(s, arg string) agg { return &sbinop{s, opprefix} } func join(s, arg string) agg { return &sbinop{s, opjoin(arg)} } func smin(s, arg string) agg { return &sbinop{s, opsmin} } func smax(s, arg string) agg { return &sbinop{s, opsmax} } type sbinop struct { s string f func(a, b string) string } func (o *sbinop) String() string { return o.s } func (o *sbinop) merge(s string) { o.s = o.f(o.s, s) } func opfirst(a, b string) string { return a } func oplast(a, b string) string { return b } func opprefix(a, b string) string { for i := range a { if i >= len(b) || a[i] != b[i] { return a[:i] } } return a } func opjoin(sep string) func(a, b string) string { return func(a, b string) string { return a + sep + b // TODO(kr): too slow? maybe strings.Join? } } func opsmin(a, b string) string { if strings.Compare(a, b) <= 0 { return a } return b } func opsmax(a, b string) string { if strings.Compare(a, b) >= 0 { return a } return b } type sampler struct { n int s string } func sample(s, arg string) agg { return &sampler{1, s} } func (p *sampler) String() string { return p.s } func (p *sampler) merge(s string) { p.n++ if rand.Intn(p.n) == 0 { p.s = s } } type constant string func constf(init, arg string) agg { return constant(arg) } func (c constant) String() string { return string(c) } func (c constant) merge(string) {} text-0.1.0/colwriter/000077500000000000000000000000001327353565000145175ustar00rootroot00000000000000text-0.1.0/colwriter/Readme000066400000000000000000000002711327353565000156370ustar00rootroot00000000000000Package colwriter provides a write filter that formats input lines in multiple columns. The package is a straightforward translation from /src/cmd/draw/mc.c in Plan 9 from User Space. text-0.1.0/colwriter/column.go000066400000000000000000000063431327353565000163510ustar00rootroot00000000000000// Package colwriter provides a write filter that formats // input lines in multiple columns. // // The package is a straightforward translation from // /src/cmd/draw/mc.c in Plan 9 from User Space. package colwriter import ( "bytes" "io" "unicode/utf8" ) const ( tab = 4 ) const ( // Print each input line ending in a colon ':' separately. BreakOnColon uint = 1 << iota ) // A Writer is a filter that arranges input lines in as many columns as will // fit in its width. Tab '\t' chars in the input are translated to sequences // of spaces ending at multiples of 4 positions. // // If BreakOnColon is set, each input line ending in a colon ':' is written // separately. // // The Writer assumes that all Unicode code points have the same width; this // may not be true in some fonts. type Writer struct { w io.Writer buf []byte width int flag uint } // NewWriter allocates and initializes a new Writer writing to w. // Parameter width controls the total number of characters on each line // across all columns. func NewWriter(w io.Writer, width int, flag uint) *Writer { return &Writer{ w: w, width: width, flag: flag, } } // Write writes p to the writer w. The only errors returned are ones // encountered while writing to the underlying output stream. func (w *Writer) Write(p []byte) (n int, err error) { var linelen int var lastWasColon bool for i, c := range p { w.buf = append(w.buf, c) linelen++ if c == '\t' { w.buf[len(w.buf)-1] = ' ' for linelen%tab != 0 { w.buf = append(w.buf, ' ') linelen++ } } if w.flag&BreakOnColon != 0 && c == ':' { lastWasColon = true } else if lastWasColon { if c == '\n' { pos := bytes.LastIndex(w.buf[:len(w.buf)-1], []byte{'\n'}) if pos < 0 { pos = 0 } line := w.buf[pos:] w.buf = w.buf[:pos] if err = w.columnate(); err != nil { if len(line) < i { return i - len(line), err } return 0, err } if n, err := w.w.Write(line); err != nil { if r := len(line) - n; r < i { return i - r, err } return 0, err } } lastWasColon = false } if c == '\n' { linelen = 0 } } return len(p), nil } // Flush should be called after the last call to Write to ensure that any data // buffered in the Writer is written to output. func (w *Writer) Flush() error { return w.columnate() } func (w *Writer) columnate() error { words := bytes.Split(w.buf, []byte{'\n'}) w.buf = nil if len(words[len(words)-1]) == 0 { words = words[:len(words)-1] } maxwidth := 0 for _, wd := range words { if n := utf8.RuneCount(wd); n > maxwidth { maxwidth = n } } maxwidth++ // space char wordsPerLine := w.width / maxwidth if wordsPerLine <= 0 { wordsPerLine = 1 } nlines := (len(words) + wordsPerLine - 1) / wordsPerLine for i := 0; i < nlines; i++ { col := 0 endcol := 0 for j := i; j < len(words); j += nlines { endcol += maxwidth _, err := w.w.Write(words[j]) if err != nil { return err } col += utf8.RuneCount(words[j]) if j+nlines < len(words) { for col < endcol { _, err := w.w.Write([]byte{' '}) if err != nil { return err } col++ } } } _, err := w.w.Write([]byte{'\n'}) if err != nil { return err } } return nil } text-0.1.0/colwriter/column_test.go000066400000000000000000000030201327353565000173750ustar00rootroot00000000000000package colwriter import ( "bytes" "testing" ) var src = ` .git .gitignore .godir Procfile: README.md api.go apps.go auth.go darwin.go data.go dyno.go: env.go git.go help.go hkdist linux.go ls.go main.go plugin.go run.go scale.go ssh.go tail.go term unix.go update.go version.go windows.go `[1:] var tests = []struct { wid int flag uint src string want string }{ {80, 0, "", ""}, {80, 0, src, ` .git README.md darwin.go git.go ls.go scale.go unix.go .gitignore api.go data.go help.go main.go ssh.go update.go .godir apps.go dyno.go: hkdist plugin.go tail.go version.go Procfile: auth.go env.go linux.go run.go term windows.go `[1:]}, {80, BreakOnColon, src, ` .git .gitignore .godir Procfile: README.md api.go apps.go auth.go darwin.go data.go dyno.go: env.go hkdist main.go scale.go term version.go git.go linux.go plugin.go ssh.go unix.go windows.go help.go ls.go run.go tail.go update.go `[1:]}, {20, 0, ` Hello Γειά σου 안녕 今日は `[1:], ` Hello 안녕 Γειά σου 今日は `[1:]}, } func TestWriter(t *testing.T) { for _, test := range tests { b := new(bytes.Buffer) w := NewWriter(b, test.wid, test.flag) if _, err := w.Write([]byte(test.src)); err != nil { t.Error(err) } if err := w.Flush(); err != nil { t.Error(err) } if g := b.String(); test.want != g { t.Log("\n" + test.want) t.Log("\n" + g) t.Errorf("%q != %q", test.want, g) } } } text-0.1.0/doc.go000066400000000000000000000001441327353565000136000ustar00rootroot00000000000000// Package text provides rudimentary functions for manipulating text in // paragraphs. package text text-0.1.0/go.mod000066400000000000000000000001001327353565000136020ustar00rootroot00000000000000module "github.com/kr/text" require "github.com/kr/pty" v1.1.1 text-0.1.0/indent.go000066400000000000000000000030011327353565000143070ustar00rootroot00000000000000package text import ( "io" ) // Indent inserts prefix at the beginning of each non-empty line of s. The // end-of-line marker is NL. func Indent(s, prefix string) string { return string(IndentBytes([]byte(s), []byte(prefix))) } // IndentBytes inserts prefix at the beginning of each non-empty line of b. // The end-of-line marker is NL. func IndentBytes(b, prefix []byte) []byte { var res []byte bol := true for _, c := range b { if bol && c != '\n' { res = append(res, prefix...) } res = append(res, c) bol = c == '\n' } return res } // Writer indents each line of its input. type indentWriter struct { w io.Writer bol bool pre [][]byte sel int off int } // NewIndentWriter makes a new write filter that indents the input // lines. Each line is prefixed in order with the corresponding // element of pre. If there are more lines than elements, the last // element of pre is repeated for each subsequent line. func NewIndentWriter(w io.Writer, pre ...[]byte) io.Writer { return &indentWriter{ w: w, pre: pre, bol: true, } } // The only errors returned are from the underlying indentWriter. func (w *indentWriter) Write(p []byte) (n int, err error) { for _, c := range p { if w.bol { var i int i, err = w.w.Write(w.pre[w.sel][w.off:]) w.off += i if err != nil { return n, err } } _, err = w.w.Write([]byte{c}) if err != nil { return n, err } n++ w.bol = c == '\n' if w.bol { w.off = 0 if w.sel < len(w.pre)-1 { w.sel++ } } } return n, nil } text-0.1.0/indent_test.go000066400000000000000000000036241327353565000153610ustar00rootroot00000000000000package text import ( "bytes" "testing" ) type T struct { inp, exp, pre string } var tests = []T{ { "The quick brown fox\njumps over the lazy\ndog.\nBut not quickly.\n", "xxxThe quick brown fox\nxxxjumps over the lazy\nxxxdog.\nxxxBut not quickly.\n", "xxx", }, { "The quick brown fox\njumps over the lazy\ndog.\n\nBut not quickly.", "xxxThe quick brown fox\nxxxjumps over the lazy\nxxxdog.\n\nxxxBut not quickly.", "xxx", }, } func TestIndent(t *testing.T) { for _, test := range tests { got := Indent(test.inp, test.pre) if got != test.exp { t.Errorf("mismatch %q != %q", got, test.exp) } } } type IndentWriterTest struct { inp, exp string pre []string } var ts = []IndentWriterTest{ { ` The quick brown fox jumps over the lazy dog. But not quickly. `[1:], ` xxxThe quick brown fox xxxjumps over the lazy xxxdog. xxxBut not quickly. `[1:], []string{"xxx"}, }, { ` The quick brown fox jumps over the lazy dog. But not quickly. `[1:], ` xxaThe quick brown fox xxxjumps over the lazy xxxdog. xxxBut not quickly. `[1:], []string{"xxa", "xxx"}, }, { ` The quick brown fox jumps over the lazy dog. But not quickly. `[1:], ` xxaThe quick brown fox xxbjumps over the lazy xxcdog. xxxBut not quickly. `[1:], []string{"xxa", "xxb", "xxc", "xxx"}, }, { ` The quick brown fox jumps over the lazy dog. But not quickly.`[1:], ` xxaThe quick brown fox xxxjumps over the lazy xxxdog. xxx xxxBut not quickly.`[1:], []string{"xxa", "xxx"}, }, } func TestIndentWriter(t *testing.T) { for _, test := range ts { b := new(bytes.Buffer) pre := make([][]byte, len(test.pre)) for i := range test.pre { pre[i] = []byte(test.pre[i]) } w := NewIndentWriter(b, pre...) if _, err := w.Write([]byte(test.inp)); err != nil { t.Error(err) } if got := b.String(); got != test.exp { t.Errorf("mismatch %q != %q", got, test.exp) t.Log(got) t.Log(test.exp) } } } text-0.1.0/mc/000077500000000000000000000000001327353565000131045ustar00rootroot00000000000000text-0.1.0/mc/Readme000066400000000000000000000005241327353565000142250ustar00rootroot00000000000000Command mc prints in multiple columns. Usage: mc [-] [-N] [file...] Mc splits the input into as many columns as will fit in N print positions. If the output is a tty, the default N is the number of characters in a terminal line; otherwise the default N is 80. Under option - each input line ending in a colon ':' is printed separately. text-0.1.0/mc/mc.go000066400000000000000000000023361327353565000140360ustar00rootroot00000000000000// Command mc prints in multiple columns. // // Usage: mc [-] [-N] [file...] // // Mc splits the input into as many columns as will fit in N // print positions. If the output is a tty, the default N is // the number of characters in a terminal line; otherwise the // default N is 80. Under option - each input line ending in // a colon ':' is printed separately. package main import ( "github.com/kr/pty" "github.com/kr/text/colwriter" "io" "log" "os" "strconv" ) func main() { var width int var flag uint args := os.Args[1:] for len(args) > 0 && len(args[0]) > 0 && args[0][0] == '-' { if len(args[0]) > 1 { width, _ = strconv.Atoi(args[0][1:]) } else { flag |= colwriter.BreakOnColon } args = args[1:] } if width < 1 { _, width, _ = pty.Getsize(os.Stdout) } if width < 1 { width = 80 } w := colwriter.NewWriter(os.Stdout, width, flag) if len(args) > 0 { for _, s := range args { if f, err := os.Open(s); err == nil { copyin(w, f) f.Close() } else { log.Println(err) } } } else { copyin(w, os.Stdin) } } func copyin(w *colwriter.Writer, r io.Reader) { if _, err := io.Copy(w, r); err != nil { log.Println(err) } if err := w.Flush(); err != nil { log.Println(err) } } text-0.1.0/wrap.go000066400000000000000000000042611327353565000140100ustar00rootroot00000000000000package text import ( "bytes" "math" ) var ( nl = []byte{'\n'} sp = []byte{' '} ) const defaultPenalty = 1e5 // Wrap wraps s into a paragraph of lines of length lim, with minimal // raggedness. func Wrap(s string, lim int) string { return string(WrapBytes([]byte(s), lim)) } // WrapBytes wraps b into a paragraph of lines of length lim, with minimal // raggedness. func WrapBytes(b []byte, lim int) []byte { words := bytes.Split(bytes.Replace(bytes.TrimSpace(b), nl, sp, -1), sp) var lines [][]byte for _, line := range WrapWords(words, 1, lim, defaultPenalty) { lines = append(lines, bytes.Join(line, sp)) } return bytes.Join(lines, nl) } // WrapWords is the low-level line-breaking algorithm, useful if you need more // control over the details of the text wrapping process. For most uses, either // Wrap or WrapBytes will be sufficient and more convenient. // // WrapWords splits a list of words into lines with minimal "raggedness", // treating each byte as one unit, accounting for spc units between adjacent // words on each line, and attempting to limit lines to lim units. Raggedness // is the total error over all lines, where error is the square of the // difference of the length of the line and lim. Too-long lines (which only // happen when a single word is longer than lim units) have pen penalty units // added to the error. func WrapWords(words [][]byte, spc, lim, pen int) [][][]byte { n := len(words) length := make([][]int, n) for i := 0; i < n; i++ { length[i] = make([]int, n) length[i][i] = len(words[i]) for j := i + 1; j < n; j++ { length[i][j] = length[i][j-1] + spc + len(words[j]) } } nbrk := make([]int, n) cost := make([]int, n) for i := range cost { cost[i] = math.MaxInt32 } for i := n - 1; i >= 0; i-- { if length[i][n-1] <= lim || i == n-1 { cost[i] = 0 nbrk[i] = n } else { for j := i + 1; j < n; j++ { d := lim - length[i][j-1] c := d*d + cost[j] if length[i][j-1] > lim { c += pen // too-long lines get a worse penalty } if c < cost[i] { cost[i] = c nbrk[i] = j } } } } var lines [][][]byte i := 0 for i < n { lines = append(lines, words[i:nbrk[i]]) i = nbrk[i] } return lines } text-0.1.0/wrap_test.go000066400000000000000000000022311327353565000150420ustar00rootroot00000000000000package text import ( "bytes" "testing" ) var text = "The quick brown fox jumps over the lazy dog." func TestWrap(t *testing.T) { exp := [][]string{ {"The", "quick", "brown", "fox"}, {"jumps", "over", "the", "lazy", "dog."}, } words := bytes.Split([]byte(text), sp) got := WrapWords(words, 1, 24, defaultPenalty) if len(exp) != len(got) { t.Fail() } for i := range exp { if len(exp[i]) != len(got[i]) { t.Fail() } for j := range exp[i] { if exp[i][j] != string(got[i][j]) { t.Fatal(i, exp[i][j], got[i][j]) } } } } func TestWrapNarrow(t *testing.T) { exp := "The\nquick\nbrown\nfox\njumps\nover\nthe\nlazy\ndog." if Wrap(text, 5) != exp { t.Fail() } } func TestWrapOneLine(t *testing.T) { exp := "The quick brown fox jumps over the lazy dog." if Wrap(text, 500) != exp { t.Fail() } } func TestWrapBug1(t *testing.T) { cases := []struct { limit int text string want string }{ {4, "aaaaa", "aaaaa"}, {4, "a aaaaa", "a\naaaaa"}, } for _, test := range cases { got := Wrap(test.text, test.limit) if got != test.want { t.Errorf("Wrap(%q, %d) = %q want %q", test.text, test.limit, got, test.want) } } }