Files
scandup/pmscanner/scanner.go
2022-01-07 23:49:37 +02:00

211 lines
4.3 KiB
Go

package pmscanner
import (
"errors"
"fmt"
"io"
"os"
"path/filepath"
"strconv"
"strings"
"pmapp/pmhasher"
"pmapp/pmkeyvdb"
)
type Scanner struct {
bSize int
keyvdb *pmkeyvdb.DB
hasher *pmhasher.Hasher
}
func NewScanner(dbname string, key []byte, bSize int) (*Scanner, error) {
var err error
scanner := &Scanner{}
keyvdb := pmkeyvdb.NewDB()
err = keyvdb.Open(dbname)
if err != nil {
return scanner, err
}
scanner.keyvdb = keyvdb
hasher, err := pmhasher.NewHahser(key)
if err != nil {
return scanner, err
}
scanner.hasher = hasher
scanner.bSize = bSize
return scanner, err
}
func (this *Scanner) Close() {
this.keyvdb.Close()
}
const maxDepth int = 7
func (this *Scanner) Scan(basedir string) error {
var err error
err = this.keyvdb.Clean()
if err != nil {
return err
}
executor := func(filename string) {
fmt.Print(".")
this.scanFile(filename)
}
_ = this.FileTreeScanner(basedir, maxDepth, executor)
fmt.Println()
return err
}
func (this *Scanner) scanFile(filename string) error {
var err error
if this.keyvdb == nil {
return errors.New("db yet not open")
}
file, err := os.OpenFile(filename, os.O_RDONLY, 0)
if err != nil {
return err
}
defer file.Close()
for {
buffer := make([]byte, this.bSize)
read, err := file.Read(buffer);
if err == io.EOF {
break
}
if err != nil {
return err
}
//if read < this.bSize {
// continue
//}
hash, err := this.hasher.Hash(buffer[:read])
if err != nil {
return err
}
err = this.Inc(hash)
if err != nil {
return err
}
}
return err
}
func (this *Scanner) Print() error {
var err error
dupSize := 0
dupCounter := 0
executor := func(key []byte, val []byte) bool {
counter, _ := strconv.Atoi(string(val))
if counter > 1 {
dupSize += (counter - 1) * this.bSize
dupCounter += (counter - 1)
}
return false
}
err = this.keyvdb.Iter(executor)
if err != nil {
return err
}
totalCounter := 0
executor = func(key []byte, val []byte) bool {
totalCounter++
return false
}
err = this.keyvdb.Iter(executor)
if err != nil {
return err
}
fmt.Println(totalCounter, dupCounter, (100 * float32(dupCounter))/float32(totalCounter))
//totalSize := totalCounter * this.bSize
//fmt.Println(totalSize, dupSize, (100 * float32(dupSize))/float32(totalSize))
return err
}
func (this *Scanner) Inc(key []byte) error {
var err error
var val []byte
has, err := this.keyvdb.Has(key)
if err != nil {
return err
}
if has {
val, err = this.keyvdb.Get(key)
if err != nil {
return err
}
}
var counter int
switch {
case len(val) == 0:
counter = 1
default:
counter, err = strconv.Atoi(string(val))
if err != nil {
return err
}
counter++
}
nval := []byte(strconv.Itoa(counter))
err = this.keyvdb.Set(key, nval)
if err != nil {
return err
}
return err
}
const pathSeparator string = "/"
type FileExecutor = func(filePath string)
func (this *Scanner) FileTreeScanner(basePath string, depth int, executor FileExecutor) error {
pathLength := func(path string) int {
if len(path) == 0 {
return 0
}
path = filepath.Clean(path)
path = filepath.ToSlash(path)
path = strings.Trim(path, pathSeparator)
if len(path) == 0 {
return 0
}
list := strings.Split(path, pathSeparator)
return len(list)
}
depth = depth + pathLength(basePath)
resolver := func(fullPath string, info os.FileInfo, err error) error {
if err != nil {
return err
}
if pathLength(fullPath) > depth {
return filepath.SkipDir
}
if !info.IsDir(){
executor(fullPath)
}
return nil
}
err := filepath.Walk(basePath, resolver)
if err != nil {
return err
}
return err
}
//EOF