This commit is contained in:
2022-01-07 20:04:12 +02:00
commit fb547b549c
10 changed files with 586 additions and 0 deletions

5
.gitignore vendored Normal file
View File

@@ -0,0 +1,5 @@
*.bin
*.db
tmp*
*~
pmapp

9
go.mod Normal file
View File

@@ -0,0 +1,9 @@
module pmapp
go 1.16
require (
github.com/minio/highwayhash v1.0.2
github.com/stretchr/testify v1.7.0
github.com/syndtr/goleveldb v1.0.0
)

40
go.sum Normal file
View File

@@ -0,0 +1,40 @@
github.com/davecgh/go-spew v1.1.0 h1:ZDRjVQ15GmhC3fiQ8ni8+OwkZQO4DARzQgrnXU1Liz8=
github.com/davecgh/go-spew v1.1.0/go.mod h1:J7Y8YcW2NihsgmVo/mv3lAwl/skON4iLHjSsI+c5H38=
github.com/fsnotify/fsnotify v1.4.7/go.mod h1:jwhsz4b93w/PPRr/qN1Yymfu8t87LnFCMoQvtojpjFo=
github.com/golang/protobuf v1.2.0/go.mod h1:6lQm79b+lXiMfvg/cZm0SGofjICqVBUtrP5yJMmIC1U=
github.com/golang/snappy v0.0.0-20180518054509-2e65f85255db h1:woRePGFeVFfLKN/pOkfl+p/TAqKOfFu+7KPlMVpok/w=
github.com/golang/snappy v0.0.0-20180518054509-2e65f85255db/go.mod h1:/XxbfmMg8lxefKM7IXC3fBNl/7bRcc72aCRzEWrmP2Q=
github.com/hpcloud/tail v1.0.0 h1:nfCOvKYfkgYP8hkirhJocXT2+zOD8yUNjXaWfTlyFKI=
github.com/hpcloud/tail v1.0.0/go.mod h1:ab1qPbhIpdTxEkNHXyeSf5vhxWSCs/tWer42PpOxQnU=
github.com/minio/highwayhash v1.0.2 h1:Aak5U0nElisjDCfPSG79Tgzkn2gl66NxOMspRrKnA/g=
github.com/minio/highwayhash v1.0.2/go.mod h1:BQskDq+xkJ12lmlUUi7U0M5Swg3EWR+dLTk+kldvVxY=
github.com/onsi/ginkgo v1.6.0/go.mod h1:lLunBs/Ym6LB5Z9jYTR76FiuTmxDTDusOGeTQH+WWjE=
github.com/onsi/ginkgo v1.7.0 h1:WSHQ+IS43OoUrWtD1/bbclrwK8TTH5hzp+umCiuxHgs=
github.com/onsi/ginkgo v1.7.0/go.mod h1:lLunBs/Ym6LB5Z9jYTR76FiuTmxDTDusOGeTQH+WWjE=
github.com/onsi/gomega v1.4.3 h1:RE1xgDvH7imwFD45h+u2SgIfERHlS2yNG4DObb5BSKU=
github.com/onsi/gomega v1.4.3/go.mod h1:ex+gbHU/CVuBBDIJjb2X0qEXbFg53c61hWP/1CpauHY=
github.com/pmezard/go-difflib v1.0.0 h1:4DBwDE0NGyQoBHbLQYPwSUPoCMWR5BEzIk/f1lZbAQM=
github.com/pmezard/go-difflib v1.0.0/go.mod h1:iKH77koFhYxTK1pcRnkKkqfTogsbg7gZNVY4sRDYZ/4=
github.com/stretchr/objx v0.1.0/go.mod h1:HFkY916IF+rwdDfMAkV7OtwuqBVzrE8GR6GFx+wExME=
github.com/stretchr/testify v1.7.0 h1:nwc3DEeHmmLAfoZucVR881uASk0Mfjw8xYJ99tb5CcY=
github.com/stretchr/testify v1.7.0/go.mod h1:6Fq8oRcR53rry900zMqJjRRixrwX3KX962/h/Wwjteg=
github.com/syndtr/goleveldb v1.0.0 h1:fBdIW9lB4Iz0n9khmH8w27SJ3QEJ7+IgjPEwGSZiFdE=
github.com/syndtr/goleveldb v1.0.0/go.mod h1:ZVVdQEZoIme9iO1Ch2Jdy24qqXrMMOU6lpPAyBWyWuQ=
golang.org/x/net v0.0.0-20180906233101-161cd47e91fd h1:nTDtHvHSdCn1m6ITfMRqtOd/9+7a3s8RBNOZ3eYZzJA=
golang.org/x/net v0.0.0-20180906233101-161cd47e91fd/go.mod h1:mL1N/T3taQHkDXs73rZJwtUhF3w3ftmwwsq0BUmARs4=
golang.org/x/sync v0.0.0-20180314180146-1d60e4601c6f/go.mod h1:RxMgew5VJxzue5/jJTE5uejpjVlOe/izrB70Jof72aM=
golang.org/x/sys v0.0.0-20180909124046-d0be0721c37e/go.mod h1:STP8DvDyc/dI5b8T5hshtkjS+E42TnysNCUPdjciGhY=
golang.org/x/sys v0.0.0-20190130150945-aca44879d564 h1:o6ENHFwwr1TZ9CUPQcfo1HGvLP1OPsPOTB7xCIOPNmU=
golang.org/x/sys v0.0.0-20190130150945-aca44879d564/go.mod h1:STP8DvDyc/dI5b8T5hshtkjS+E42TnysNCUPdjciGhY=
golang.org/x/text v0.3.0 h1:g61tztE5qeGQ89tm6NTjjM9VPIm088od1l6aSorWRWg=
golang.org/x/text v0.3.0/go.mod h1:NqM8EUOU14njkJ3fqMW+pc6Ldnwhi/IjpwHt7yyuwOQ=
gopkg.in/check.v1 v0.0.0-20161208181325-20d25e280405 h1:yhCVgyC4o1eVCa2tZl7eS0r+SDo693bJlVdllGtEeKM=
gopkg.in/check.v1 v0.0.0-20161208181325-20d25e280405/go.mod h1:Co6ibVJAznAaIkqp8huTwlJQCZ016jof/cbN4VW5Yz0=
gopkg.in/fsnotify.v1 v1.4.7 h1:xOHLXZwVvI9hhs+cLKq5+I5onOuwQLhQwiu63xxlHs4=
gopkg.in/fsnotify.v1 v1.4.7/go.mod h1:Tz8NjZHkW78fSQdbUxIjBTcgA1z1m8ZHf0WmKUhAMys=
gopkg.in/tomb.v1 v1.0.0-20141024135613-dd632973f1e7 h1:uRGJdciOHaEIrze2W8Q3AKkepLTh2hOroT7a+7czfdQ=
gopkg.in/tomb.v1 v1.0.0-20141024135613-dd632973f1e7/go.mod h1:dt/ZhP58zS4L8KSrWDmTeBkI65Dw0HsyUHuEVlX15mw=
gopkg.in/yaml.v2 v2.2.1 h1:mUhvW9EsL+naU5Q3cakzfE91YhliOondGd6ZrsDBHQE=
gopkg.in/yaml.v2 v2.2.1/go.mod h1:hI93XBmqTisBFMUTm0b8Fm+jr3Dg1NNxqwp+5A1VGuI=
gopkg.in/yaml.v3 v3.0.0-20200313102051-9f266ea9e77c h1:dUUwHk2QECo/6vqA44rthZ8ie2QXMNeKRTHCNY2nXvo=
gopkg.in/yaml.v3 v3.0.0-20200313102051-9f266ea9e77c/go.mod h1:K4uyk7z7BCEPqu6E+C64Yfv1cQ7kz7rIZviUmN+EgEM=

39
pmapp.go Normal file
View File

@@ -0,0 +1,39 @@
//
package main
import (
"fmt"
"pmapp/pmscanner"
)
func main() {
err := run()
if err != nil {
fmt.Println("error:", err)
}
}
const bSize int = 512
var key []byte = []byte("01234567890012345678900123456789012")
func run() error {
var err error
scanner, err := pmscanner.NewScanner("tmpdb", key, bSize)
if err != nil {
return err
}
err = scanner.Scan("data.bin")
if err != nil {
return err
}
err = scanner.Print()
if err != nil {
return err
}
return err
}
//EOF

36
pmhasher/hasher.go Normal file
View File

@@ -0,0 +1,36 @@
package pmhasher
import (
"errors"
"github.com/minio/highwayhash"
)
type Hasher struct {
key []byte
}
const keySize int = 32
func NewHahser(key []byte) (*Hasher, error) {
var err error
hasher := &Hasher{}
if len(key) < keySize {
return hasher, errors.New("too short init key")
}
hasher.key = key[0:keySize]
return hasher, err
}
func (this *Hasher) Hash(data []byte) ([]byte, error) {
var err error
var sum []byte
hash, err := highwayhash.New(this.key)
if err != nil {
return sum, err
}
hash.Write(data)
sum = hash.Sum(nil)
return sum, err
}
//EOF

79
pmhasher/hasher_test.go Normal file
View File

@@ -0,0 +1,79 @@
//
package pmhasher
import (
"encoding/hex"
"testing"
"pmapp/tools"
"github.com/stretchr/testify/assert"
)
const hashHexSample string = "5c5278baa03952257df4db5dcccc2e861dcf211094aac9ae1a3c7fdbc10fe0d3"
const repeat1 int = 1024 * 64
const repeat2 int = 1024 * 64
func TestHasher01Pre(t *testing.T) {
key := []byte("1234567890123456789012345678901234567890")
hasher, err := NewHahser(key)
if err != nil {
t.Fatal(err)
}
for i := 0; i < repeat1; i++ {
data := []byte("1234567890123456789012345678901234567890")
hash, err := hasher.Hash(data)
if err != nil {
t.Fatal(err)
}
hashHex := hex.EncodeToString(hash)
assert.Equal(t, hashHexSample, hashHex, nil)
}
}
func TestHasher02Equal(t *testing.T) {
key := tools.RandBytes(32)
hasher, err := NewHahser(key)
if err != nil {
t.Fatal(err)
}
for i := 0; i < repeat1; i++ {
data := tools.RandBytes(1024)
hash1, err := hasher.Hash(data)
if err != nil {
t.Fatal(err)
}
hash2, err := hasher.Hash(data)
if err != nil {
t.Fatal(err)
}
hash1Hex := hex.EncodeToString(hash1)
hash2Hex := hex.EncodeToString(hash2)
assert.Equal(t, hash1Hex, hash2Hex, nil)
}
}
func TestHasher03Rand(t *testing.T) {
key := tools.RandBytes(32)
hasher, err := NewHahser(key)
if err != nil {
t.Fatal(err)
}
data1 := tools.RandBytes(16)
hash1, err := hasher.Hash(data1)
if err != nil {
t.Fatal(err)
}
hash1Hex := hex.EncodeToString(hash1)
for i := 0; i < repeat2; i++ {
data2 := tools.RandBytes(16)
hash2, err := hasher.Hash(data2)
if err != nil {
t.Fatal(err)
}
hash2Hex := hex.EncodeToString(hash2)
assert.NotEqual(t, hash1Hex, hash2Hex, nil)
}
}
//EOF

161
pmkeyvdb/keyvdb.go Normal file
View File

@@ -0,0 +1,161 @@
//
package pmkeyvdb
import (
"bytes"
"errors"
"fmt"
"github.com/syndtr/goleveldb/leveldb"
)
type DB struct {
db *leveldb.DB
}
func NewDB() *DB {
return &DB{}
}
func (this *DB) Open(filename string) error {
var err error
if this.db != nil {
this.Close()
}
db, err := leveldb.OpenFile(filename, nil)
if err != nil {
return errors.New(fmt.Sprintf("cannot open database: %v", err))
}
this.db = db
return err
}
func (this *DB) Set(key []byte, value []byte) error {
var err error
err = this.db.Put(key, value, nil)
if err != nil {
return errors.New("db not yet open")
return err
}
return err
}
func (this *DB) Get(key []byte) ([]byte, error) {
var err error
var value []byte
value, err = this.db.Get(key, nil)
if err != nil {
return value, err
}
return value, err
}
func (this *DB) Has(key []byte) (bool, error) {
var err error
var has bool
if this.db == nil {
return has, errors.New("db not yet open")
}
has, err = this.db.Has(key, nil)
if err != nil {
return has, err
}
return has, err
}
type Resolver = func(key []byte, val []byte) (ok bool)
func (this *DB) First(sval []byte) ([]byte, bool, error) {
compf := func(key []byte, val []byte) (ok bool) {
if bytes.Equal(val, sval) {
return true
}
return false
}
return this.comp(compf)
}
func (this *DB) comp(resolver Resolver) ([]byte, bool, error) {
var err error
var key []byte
var ok bool
iter := this.db.NewIterator(nil, nil)
defer iter.Release()
for iter.Next() {
if resolver(iter.Key(), iter.Value()) {
ok = true
key = iter.Key()
break
}
}
err = iter.Error()
if err != nil {
return key, ok, err
}
return key, ok, err
}
func (this *DB) All(resolver Resolver) ([][]byte, bool, error) {
collect := make([][]byte, 0)
var ok bool
var err error
iter := this.db.NewIterator(nil, nil)
defer iter.Release()
for iter.Next() {
if resolver(iter.Key(), iter.Value()) {
ok = true
collect = append(collect, iter.Key())
}
}
err = iter.Error()
if err != nil {
return collect, ok, err
}
return collect, ok, err
}
type Executor = func(key []byte, val []byte) (stop bool)
func (this *DB) Iter(executor Executor) error {
var err error
iter := this.db.NewIterator(nil, nil)
defer iter.Release()
for iter.Next() {
if executor(iter.Key(), iter.Value()) {
break
}
}
err = iter.Error()
if err != nil {
return err
}
return err
}
func (this *DB) Clean() error {
var err error
iter := this.db.NewIterator(nil, nil)
defer iter.Release()
for iter.Next() {
err = this.db.Delete(iter.Key(), nil)
if err != nil {
return err
}
}
err = iter.Error()
if err != nil {
return err
}
return err
}
func (this *DB) Close() {
if this.db != nil {
this.db.Close()
}
}
//EOF

63
pmkeyvdb/keyvdb_test.go Normal file
View File

@@ -0,0 +1,63 @@
//
package pmkeyvdb
import (
"testing"
"path/filepath"
"pmapp/tools"
"github.com/stretchr/testify/assert"
)
const dbname string = "tmp.leveldb"
const repeat int = 1024
func TestHasher01SetGetRandKey(t *testing.T) {
var err error
dbpath := filepath.Join(t.TempDir(), dbname)
db := NewDB()
err = db.Open(dbpath)
defer db.Close()
if err != nil {
t.Fatal(err)
}
for i := 0; i < repeat; i++ {
key := tools.RandBytes(16)
ival := tools.RandBytes(128)
err = db.Set(key, ival)
if err != nil {
t.Fatal(err)
}
oval, err := db.Get(key)
if err != nil {
t.Fatal(err)
}
assert.Equal(t, ival, oval, nil)
}
}
func TestHasher02SetGetEqKey(t *testing.T) {
var err error
dbpath := filepath.Join(t.TempDir(), dbname)
db := NewDB()
err = db.Open(dbpath)
defer db.Close()
if err != nil {
t.Fatal(err)
}
key := tools.RandBytes(128)
for i := 0; i < repeat; i++ {
ival := tools.RandBytes(128)
err = db.Set(key, ival)
if err != nil {
t.Fatal(err)
}
oval, err := db.Get(key)
if err != nil {
t.Fatal(err)
}
assert.Equal(t, ival, oval, nil)
}
}
//EOF

130
pmscanner/scanner.go Normal file
View File

@@ -0,0 +1,130 @@
package pmscanner
import (
"errors"
"fmt"
"os"
"io"
"strconv"
"encoding/hex"
"pmapp/pmhasher"
"pmapp/pmkeyvdb"
)
type Scanner struct {
bSize int
keyvdb *pmkeyvdb.DB
hasher *pmhasher.Hasher
}
func NewScanner(dbname string, key []byte, bSize int) (*Scanner, error) {
var err error
scanner := &Scanner{}
keyvdb := pmkeyvdb.NewDB()
err = keyvdb.Open(dbname)
if err != nil {
return scanner, err
}
scanner.keyvdb = keyvdb
hasher, err := pmhasher.NewHahser(key)
if err != nil {
return scanner, err
}
scanner.hasher = hasher
scanner.bSize = bSize
return scanner, err
}
func (this *Scanner) Close() {
this.keyvdb.Close()
}
func (this *Scanner) Scan(filename string) error {
var err error
if this.keyvdb == nil {
return errors.New("db yet not open")
}
err = this.keyvdb.Clean()
if err != nil {
return err
}
file, err := os.OpenFile(filename, os.O_RDONLY, 0)
if err != nil {
return err
}
defer file.Close()
for {
buffer := make([]byte, this.bSize)
read, err := file.Read(buffer);
if err == io.EOF {
break
}
hash, err := this.hasher.Hash(buffer[:read])
if err != nil {
return err
}
err = this.Inc(hash)
if err != nil {
return err
}
}
return err
}
func (this *Scanner) Print() error {
var err error
executor := func(key []byte, val []byte) bool {
keyHex := hex.EncodeToString(key)
counter, _ := strconv.Atoi(string(val))
if counter > 1 {
fmt.Println(keyHex, counter)
}
return false
}
err = this.keyvdb.Iter(executor)
if err != nil {
return err
}
return err
}
func (this *Scanner) Inc(key []byte) error {
var err error
var val []byte
has, err := this.keyvdb.Has(key)
if err != nil {
return err
}
if has {
val, err = this.keyvdb.Get(key)
if err != nil {
return err
}
}
var counter int
switch {
case len(val) == 0:
counter = 1
default:
counter, err = strconv.Atoi(string(val))
if err != nil {
return err
}
counter++
}
nval := []byte(strconv.Itoa(counter))
err = this.keyvdb.Set(key, nval)
if err != nil {
return err
}
return err
}
//EOF

24
pmtools/randhex.go Normal file
View File

@@ -0,0 +1,24 @@
//
package pmtools
import (
"encoding/hex"
"time"
"math/rand"
)
func RandBytesHex(size int) string {
rand.Seed(time.Now().UnixNano())
randBytes := make([]byte, size)
rand.Read(randBytes)
hexString := hex.EncodeToString(randBytes)
return hexString
}
func RandBytes(size int) []byte {
rand.Seed(time.Now().UnixNano())
randBytes := make([]byte, size)
rand.Read(randBytes)
return randBytes
}
//EOF