commit fb547b549c91d554bcc6d846b56fe48667bb2693 Author: Oleg Borodin Date: Fri Jan 7 20:04:12 2022 +0200 import diff --git a/.gitignore b/.gitignore new file mode 100644 index 0000000..b626845 --- /dev/null +++ b/.gitignore @@ -0,0 +1,5 @@ +*.bin +*.db +tmp* +*~ +pmapp diff --git a/go.mod b/go.mod new file mode 100644 index 0000000..4b5c441 --- /dev/null +++ b/go.mod @@ -0,0 +1,9 @@ +module pmapp + +go 1.16 + +require ( + github.com/minio/highwayhash v1.0.2 + github.com/stretchr/testify v1.7.0 + github.com/syndtr/goleveldb v1.0.0 +) diff --git a/go.sum b/go.sum new file mode 100644 index 0000000..33f9914 --- /dev/null +++ b/go.sum @@ -0,0 +1,40 @@ +github.com/davecgh/go-spew v1.1.0 h1:ZDRjVQ15GmhC3fiQ8ni8+OwkZQO4DARzQgrnXU1Liz8= +github.com/davecgh/go-spew v1.1.0/go.mod h1:J7Y8YcW2NihsgmVo/mv3lAwl/skON4iLHjSsI+c5H38= +github.com/fsnotify/fsnotify v1.4.7/go.mod h1:jwhsz4b93w/PPRr/qN1Yymfu8t87LnFCMoQvtojpjFo= +github.com/golang/protobuf v1.2.0/go.mod h1:6lQm79b+lXiMfvg/cZm0SGofjICqVBUtrP5yJMmIC1U= +github.com/golang/snappy v0.0.0-20180518054509-2e65f85255db h1:woRePGFeVFfLKN/pOkfl+p/TAqKOfFu+7KPlMVpok/w= +github.com/golang/snappy v0.0.0-20180518054509-2e65f85255db/go.mod h1:/XxbfmMg8lxefKM7IXC3fBNl/7bRcc72aCRzEWrmP2Q= +github.com/hpcloud/tail v1.0.0 h1:nfCOvKYfkgYP8hkirhJocXT2+zOD8yUNjXaWfTlyFKI= +github.com/hpcloud/tail v1.0.0/go.mod h1:ab1qPbhIpdTxEkNHXyeSf5vhxWSCs/tWer42PpOxQnU= +github.com/minio/highwayhash v1.0.2 h1:Aak5U0nElisjDCfPSG79Tgzkn2gl66NxOMspRrKnA/g= +github.com/minio/highwayhash v1.0.2/go.mod h1:BQskDq+xkJ12lmlUUi7U0M5Swg3EWR+dLTk+kldvVxY= +github.com/onsi/ginkgo v1.6.0/go.mod h1:lLunBs/Ym6LB5Z9jYTR76FiuTmxDTDusOGeTQH+WWjE= +github.com/onsi/ginkgo v1.7.0 h1:WSHQ+IS43OoUrWtD1/bbclrwK8TTH5hzp+umCiuxHgs= +github.com/onsi/ginkgo v1.7.0/go.mod h1:lLunBs/Ym6LB5Z9jYTR76FiuTmxDTDusOGeTQH+WWjE= +github.com/onsi/gomega v1.4.3 h1:RE1xgDvH7imwFD45h+u2SgIfERHlS2yNG4DObb5BSKU= +github.com/onsi/gomega v1.4.3/go.mod h1:ex+gbHU/CVuBBDIJjb2X0qEXbFg53c61hWP/1CpauHY= +github.com/pmezard/go-difflib v1.0.0 h1:4DBwDE0NGyQoBHbLQYPwSUPoCMWR5BEzIk/f1lZbAQM= +github.com/pmezard/go-difflib v1.0.0/go.mod h1:iKH77koFhYxTK1pcRnkKkqfTogsbg7gZNVY4sRDYZ/4= +github.com/stretchr/objx v0.1.0/go.mod h1:HFkY916IF+rwdDfMAkV7OtwuqBVzrE8GR6GFx+wExME= +github.com/stretchr/testify v1.7.0 h1:nwc3DEeHmmLAfoZucVR881uASk0Mfjw8xYJ99tb5CcY= +github.com/stretchr/testify v1.7.0/go.mod h1:6Fq8oRcR53rry900zMqJjRRixrwX3KX962/h/Wwjteg= +github.com/syndtr/goleveldb v1.0.0 h1:fBdIW9lB4Iz0n9khmH8w27SJ3QEJ7+IgjPEwGSZiFdE= +github.com/syndtr/goleveldb v1.0.0/go.mod h1:ZVVdQEZoIme9iO1Ch2Jdy24qqXrMMOU6lpPAyBWyWuQ= +golang.org/x/net v0.0.0-20180906233101-161cd47e91fd h1:nTDtHvHSdCn1m6ITfMRqtOd/9+7a3s8RBNOZ3eYZzJA= +golang.org/x/net v0.0.0-20180906233101-161cd47e91fd/go.mod h1:mL1N/T3taQHkDXs73rZJwtUhF3w3ftmwwsq0BUmARs4= +golang.org/x/sync v0.0.0-20180314180146-1d60e4601c6f/go.mod h1:RxMgew5VJxzue5/jJTE5uejpjVlOe/izrB70Jof72aM= +golang.org/x/sys v0.0.0-20180909124046-d0be0721c37e/go.mod h1:STP8DvDyc/dI5b8T5hshtkjS+E42TnysNCUPdjciGhY= +golang.org/x/sys v0.0.0-20190130150945-aca44879d564 h1:o6ENHFwwr1TZ9CUPQcfo1HGvLP1OPsPOTB7xCIOPNmU= +golang.org/x/sys v0.0.0-20190130150945-aca44879d564/go.mod h1:STP8DvDyc/dI5b8T5hshtkjS+E42TnysNCUPdjciGhY= +golang.org/x/text v0.3.0 h1:g61tztE5qeGQ89tm6NTjjM9VPIm088od1l6aSorWRWg= +golang.org/x/text v0.3.0/go.mod h1:NqM8EUOU14njkJ3fqMW+pc6Ldnwhi/IjpwHt7yyuwOQ= +gopkg.in/check.v1 v0.0.0-20161208181325-20d25e280405 h1:yhCVgyC4o1eVCa2tZl7eS0r+SDo693bJlVdllGtEeKM= +gopkg.in/check.v1 v0.0.0-20161208181325-20d25e280405/go.mod h1:Co6ibVJAznAaIkqp8huTwlJQCZ016jof/cbN4VW5Yz0= +gopkg.in/fsnotify.v1 v1.4.7 h1:xOHLXZwVvI9hhs+cLKq5+I5onOuwQLhQwiu63xxlHs4= +gopkg.in/fsnotify.v1 v1.4.7/go.mod h1:Tz8NjZHkW78fSQdbUxIjBTcgA1z1m8ZHf0WmKUhAMys= +gopkg.in/tomb.v1 v1.0.0-20141024135613-dd632973f1e7 h1:uRGJdciOHaEIrze2W8Q3AKkepLTh2hOroT7a+7czfdQ= +gopkg.in/tomb.v1 v1.0.0-20141024135613-dd632973f1e7/go.mod h1:dt/ZhP58zS4L8KSrWDmTeBkI65Dw0HsyUHuEVlX15mw= +gopkg.in/yaml.v2 v2.2.1 h1:mUhvW9EsL+naU5Q3cakzfE91YhliOondGd6ZrsDBHQE= +gopkg.in/yaml.v2 v2.2.1/go.mod h1:hI93XBmqTisBFMUTm0b8Fm+jr3Dg1NNxqwp+5A1VGuI= +gopkg.in/yaml.v3 v3.0.0-20200313102051-9f266ea9e77c h1:dUUwHk2QECo/6vqA44rthZ8ie2QXMNeKRTHCNY2nXvo= +gopkg.in/yaml.v3 v3.0.0-20200313102051-9f266ea9e77c/go.mod h1:K4uyk7z7BCEPqu6E+C64Yfv1cQ7kz7rIZviUmN+EgEM= diff --git a/pmapp.go b/pmapp.go new file mode 100644 index 0000000..a5da8f2 --- /dev/null +++ b/pmapp.go @@ -0,0 +1,39 @@ +// +package main + +import ( + "fmt" + "pmapp/pmscanner" +) + +func main() { + err := run() + if err != nil { + fmt.Println("error:", err) + } +} + +const bSize int = 512 +var key []byte = []byte("01234567890012345678900123456789012") + +func run() error { + var err error + scanner, err := pmscanner.NewScanner("tmpdb", key, bSize) + if err != nil { + return err + } + err = scanner.Scan("data.bin") + if err != nil { + return err + } + err = scanner.Print() + if err != nil { + return err + } + return err +} +//EOF + + + + diff --git a/pmhasher/hasher.go b/pmhasher/hasher.go new file mode 100644 index 0000000..ce61aae --- /dev/null +++ b/pmhasher/hasher.go @@ -0,0 +1,36 @@ +package pmhasher + +import ( + "errors" + "github.com/minio/highwayhash" +) + + +type Hasher struct { + key []byte +} + +const keySize int = 32 + +func NewHahser(key []byte) (*Hasher, error) { + var err error + hasher := &Hasher{} + if len(key) < keySize { + return hasher, errors.New("too short init key") + } + hasher.key = key[0:keySize] + return hasher, err +} + +func (this *Hasher) Hash(data []byte) ([]byte, error) { + var err error + var sum []byte + hash, err := highwayhash.New(this.key) + if err != nil { + return sum, err + } + hash.Write(data) + sum = hash.Sum(nil) + return sum, err +} +//EOF diff --git a/pmhasher/hasher_test.go b/pmhasher/hasher_test.go new file mode 100644 index 0000000..6a4d174 --- /dev/null +++ b/pmhasher/hasher_test.go @@ -0,0 +1,79 @@ +// +package pmhasher + + +import ( + "encoding/hex" + "testing" + + "pmapp/tools" + "github.com/stretchr/testify/assert" +) + +const hashHexSample string = "5c5278baa03952257df4db5dcccc2e861dcf211094aac9ae1a3c7fdbc10fe0d3" +const repeat1 int = 1024 * 64 +const repeat2 int = 1024 * 64 + +func TestHasher01Pre(t *testing.T) { + key := []byte("1234567890123456789012345678901234567890") + hasher, err := NewHahser(key) + if err != nil { + t.Fatal(err) + } + for i := 0; i < repeat1; i++ { + data := []byte("1234567890123456789012345678901234567890") + hash, err := hasher.Hash(data) + if err != nil { + t.Fatal(err) + } + hashHex := hex.EncodeToString(hash) + assert.Equal(t, hashHexSample, hashHex, nil) + } +} + +func TestHasher02Equal(t *testing.T) { + key := tools.RandBytes(32) + hasher, err := NewHahser(key) + if err != nil { + t.Fatal(err) + } + for i := 0; i < repeat1; i++ { + data := tools.RandBytes(1024) + hash1, err := hasher.Hash(data) + if err != nil { + t.Fatal(err) + } + hash2, err := hasher.Hash(data) + if err != nil { + t.Fatal(err) + } + hash1Hex := hex.EncodeToString(hash1) + hash2Hex := hex.EncodeToString(hash2) + assert.Equal(t, hash1Hex, hash2Hex, nil) + } +} + +func TestHasher03Rand(t *testing.T) { + key := tools.RandBytes(32) + hasher, err := NewHahser(key) + if err != nil { + t.Fatal(err) + } + data1 := tools.RandBytes(16) + hash1, err := hasher.Hash(data1) + if err != nil { + t.Fatal(err) + } + hash1Hex := hex.EncodeToString(hash1) + + for i := 0; i < repeat2; i++ { + data2 := tools.RandBytes(16) + hash2, err := hasher.Hash(data2) + if err != nil { + t.Fatal(err) + } + hash2Hex := hex.EncodeToString(hash2) + assert.NotEqual(t, hash1Hex, hash2Hex, nil) + } +} +//EOF diff --git a/pmkeyvdb/keyvdb.go b/pmkeyvdb/keyvdb.go new file mode 100644 index 0000000..da5e8cf --- /dev/null +++ b/pmkeyvdb/keyvdb.go @@ -0,0 +1,161 @@ +// +package pmkeyvdb + +import ( + "bytes" + "errors" + "fmt" + "github.com/syndtr/goleveldb/leveldb" +) + +type DB struct { + db *leveldb.DB +} + +func NewDB() *DB { + return &DB{} +} + +func (this *DB) Open(filename string) error { + var err error + if this.db != nil { + this.Close() + } + db, err := leveldb.OpenFile(filename, nil) + if err != nil { + return errors.New(fmt.Sprintf("cannot open database: %v", err)) + } + this.db = db + return err +} + +func (this *DB) Set(key []byte, value []byte) error { + var err error + err = this.db.Put(key, value, nil) + if err != nil { + return errors.New("db not yet open") + + return err + } + return err +} + +func (this *DB) Get(key []byte) ([]byte, error) { + var err error + var value []byte + value, err = this.db.Get(key, nil) + if err != nil { + return value, err + } + return value, err +} + +func (this *DB) Has(key []byte) (bool, error) { + var err error + var has bool + if this.db == nil { + return has, errors.New("db not yet open") + } + has, err = this.db.Has(key, nil) + if err != nil { + return has, err + } + return has, err +} + +type Resolver = func(key []byte, val []byte) (ok bool) + +func (this *DB) First(sval []byte) ([]byte, bool, error) { + compf := func(key []byte, val []byte) (ok bool) { + if bytes.Equal(val, sval) { + return true + } + return false + } + return this.comp(compf) +} + +func (this *DB) comp(resolver Resolver) ([]byte, bool, error) { + var err error + var key []byte + var ok bool + + iter := this.db.NewIterator(nil, nil) + defer iter.Release() + for iter.Next() { + if resolver(iter.Key(), iter.Value()) { + ok = true + key = iter.Key() + break + } + } + err = iter.Error() + if err != nil { + return key, ok, err + } + return key, ok, err +} + + +func (this *DB) All(resolver Resolver) ([][]byte, bool, error) { + collect := make([][]byte, 0) + var ok bool + var err error + + iter := this.db.NewIterator(nil, nil) + defer iter.Release() + for iter.Next() { + if resolver(iter.Key(), iter.Value()) { + ok = true + collect = append(collect, iter.Key()) + } + } + err = iter.Error() + if err != nil { + return collect, ok, err + } + return collect, ok, err +} + +type Executor = func(key []byte, val []byte) (stop bool) + +func (this *DB) Iter(executor Executor) error { + var err error + iter := this.db.NewIterator(nil, nil) + defer iter.Release() + for iter.Next() { + if executor(iter.Key(), iter.Value()) { + break + } + } + err = iter.Error() + if err != nil { + return err + } + return err +} + +func (this *DB) Clean() error { + var err error + iter := this.db.NewIterator(nil, nil) + defer iter.Release() + for iter.Next() { + err = this.db.Delete(iter.Key(), nil) + if err != nil { + return err + } + } + err = iter.Error() + if err != nil { + return err + } + return err +} + + +func (this *DB) Close() { + if this.db != nil { + this.db.Close() + } +} +//EOF diff --git a/pmkeyvdb/keyvdb_test.go b/pmkeyvdb/keyvdb_test.go new file mode 100644 index 0000000..41c2732 --- /dev/null +++ b/pmkeyvdb/keyvdb_test.go @@ -0,0 +1,63 @@ +// +package pmkeyvdb + +import ( + "testing" + "path/filepath" + + "pmapp/tools" + "github.com/stretchr/testify/assert" +) + +const dbname string = "tmp.leveldb" +const repeat int = 1024 + +func TestHasher01SetGetRandKey(t *testing.T) { + var err error + dbpath := filepath.Join(t.TempDir(), dbname) + db := NewDB() + err = db.Open(dbpath) + defer db.Close() + if err != nil { + t.Fatal(err) + } + for i := 0; i < repeat; i++ { + key := tools.RandBytes(16) + ival := tools.RandBytes(128) + err = db.Set(key, ival) + if err != nil { + t.Fatal(err) + } + oval, err := db.Get(key) + if err != nil { + t.Fatal(err) + } + assert.Equal(t, ival, oval, nil) + } +} + +func TestHasher02SetGetEqKey(t *testing.T) { + var err error + dbpath := filepath.Join(t.TempDir(), dbname) + db := NewDB() + err = db.Open(dbpath) + defer db.Close() + if err != nil { + t.Fatal(err) + } + key := tools.RandBytes(128) + for i := 0; i < repeat; i++ { + ival := tools.RandBytes(128) + err = db.Set(key, ival) + if err != nil { + t.Fatal(err) + } + oval, err := db.Get(key) + if err != nil { + t.Fatal(err) + } + assert.Equal(t, ival, oval, nil) + } +} +//EOF + diff --git a/pmscanner/scanner.go b/pmscanner/scanner.go new file mode 100644 index 0000000..391974a --- /dev/null +++ b/pmscanner/scanner.go @@ -0,0 +1,130 @@ +package pmscanner + +import ( + "errors" + "fmt" + "os" + "io" + "strconv" + "encoding/hex" + + "pmapp/pmhasher" + "pmapp/pmkeyvdb" +) + + +type Scanner struct { + bSize int + keyvdb *pmkeyvdb.DB + hasher *pmhasher.Hasher +} + +func NewScanner(dbname string, key []byte, bSize int) (*Scanner, error) { + var err error + scanner := &Scanner{} + + keyvdb := pmkeyvdb.NewDB() + err = keyvdb.Open(dbname) + if err != nil { + return scanner, err + } + scanner.keyvdb = keyvdb + + hasher, err := pmhasher.NewHahser(key) + if err != nil { + return scanner, err + } + scanner.hasher = hasher + scanner.bSize = bSize + return scanner, err +} + +func (this *Scanner) Close() { + this.keyvdb.Close() +} + +func (this *Scanner) Scan(filename string) error { + var err error + + if this.keyvdb == nil { + return errors.New("db yet not open") + } + + err = this.keyvdb.Clean() + if err != nil { + return err + } + + file, err := os.OpenFile(filename, os.O_RDONLY, 0) + if err != nil { + return err + } + defer file.Close() + + for { + buffer := make([]byte, this.bSize) + read, err := file.Read(buffer); + if err == io.EOF { + break + } + hash, err := this.hasher.Hash(buffer[:read]) + if err != nil { + return err + } + err = this.Inc(hash) + if err != nil { + return err + } + } + return err +} + +func (this *Scanner) Print() error { + var err error + executor := func(key []byte, val []byte) bool { + keyHex := hex.EncodeToString(key) + counter, _ := strconv.Atoi(string(val)) + if counter > 1 { + fmt.Println(keyHex, counter) + } + return false + } + err = this.keyvdb.Iter(executor) + if err != nil { + return err + } + return err +} + +func (this *Scanner) Inc(key []byte) error { + var err error + var val []byte + has, err := this.keyvdb.Has(key) + if err != nil { + return err + } + if has { + val, err = this.keyvdb.Get(key) + if err != nil { + return err + } + } + var counter int + switch { + case len(val) == 0: + counter = 1 + default: + counter, err = strconv.Atoi(string(val)) + if err != nil { + return err + } + counter++ + } + nval := []byte(strconv.Itoa(counter)) + err = this.keyvdb.Set(key, nval) + if err != nil { + return err + } + return err +} +//EOF diff --git a/pmtools/randhex.go b/pmtools/randhex.go new file mode 100644 index 0000000..1718fc5 --- /dev/null +++ b/pmtools/randhex.go @@ -0,0 +1,24 @@ +// +package pmtools + +import ( + "encoding/hex" + "time" + "math/rand" +) + +func RandBytesHex(size int) string { + rand.Seed(time.Now().UnixNano()) + randBytes := make([]byte, size) + rand.Read(randBytes) + hexString := hex.EncodeToString(randBytes) + return hexString +} + +func RandBytes(size int) []byte { + rand.Seed(time.Now().UnixNano()) + randBytes := make([]byte, size) + rand.Read(randBytes) + return randBytes +} +//EOF