211 lines
4.3 KiB
Go
211 lines
4.3 KiB
Go
package pmscanner
|
|
|
|
import (
|
|
"errors"
|
|
"fmt"
|
|
"io"
|
|
"os"
|
|
"path/filepath"
|
|
"strconv"
|
|
"strings"
|
|
|
|
"pmapp/pmhasher"
|
|
"pmapp/pmkeyvdb"
|
|
)
|
|
|
|
type Scanner struct {
|
|
bSize int
|
|
keyvdb *pmkeyvdb.DB
|
|
hasher *pmhasher.Hasher
|
|
}
|
|
|
|
func NewScanner(dbname string, key []byte, bSize int) (*Scanner, error) {
|
|
var err error
|
|
scanner := &Scanner{}
|
|
|
|
keyvdb := pmkeyvdb.NewDB()
|
|
err = keyvdb.Open(dbname)
|
|
if err != nil {
|
|
return scanner, err
|
|
}
|
|
scanner.keyvdb = keyvdb
|
|
|
|
hasher, err := pmhasher.NewHahser(key)
|
|
if err != nil {
|
|
return scanner, err
|
|
}
|
|
scanner.hasher = hasher
|
|
scanner.bSize = bSize
|
|
return scanner, err
|
|
}
|
|
|
|
func (this *Scanner) Close() {
|
|
this.keyvdb.Close()
|
|
}
|
|
|
|
const maxDepth int = 7
|
|
|
|
func (this *Scanner) Scan(basedir string) error {
|
|
var err error
|
|
|
|
err = this.keyvdb.Clean()
|
|
if err != nil {
|
|
return err
|
|
}
|
|
|
|
executor := func(filename string) {
|
|
fmt.Print(".")
|
|
this.scanFile(filename)
|
|
}
|
|
_ = this.FileTreeScanner(basedir, maxDepth, executor)
|
|
fmt.Println()
|
|
return err
|
|
}
|
|
|
|
func (this *Scanner) scanFile(filename string) error {
|
|
var err error
|
|
|
|
if this.keyvdb == nil {
|
|
return errors.New("db yet not open")
|
|
}
|
|
|
|
file, err := os.OpenFile(filename, os.O_RDONLY, 0)
|
|
if err != nil {
|
|
return err
|
|
}
|
|
defer file.Close()
|
|
|
|
for {
|
|
buffer := make([]byte, this.bSize)
|
|
read, err := file.Read(buffer);
|
|
if err == io.EOF {
|
|
break
|
|
}
|
|
if err != nil {
|
|
return err
|
|
}
|
|
//if read < this.bSize {
|
|
// continue
|
|
//}
|
|
hash, err := this.hasher.Hash(buffer[:read])
|
|
if err != nil {
|
|
return err
|
|
}
|
|
err = this.Inc(hash)
|
|
if err != nil {
|
|
return err
|
|
}
|
|
}
|
|
return err
|
|
}
|
|
|
|
func (this *Scanner) Print() error {
|
|
var err error
|
|
dupSize := 0
|
|
dupCounter := 0
|
|
executor := func(key []byte, val []byte) bool {
|
|
counter, _ := strconv.Atoi(string(val))
|
|
if counter > 1 {
|
|
dupSize += (counter - 1) * this.bSize
|
|
dupCounter += (counter - 1)
|
|
}
|
|
return false
|
|
}
|
|
err = this.keyvdb.Iter(executor)
|
|
if err != nil {
|
|
return err
|
|
}
|
|
|
|
totalCounter := 0
|
|
executor = func(key []byte, val []byte) bool {
|
|
totalCounter++
|
|
return false
|
|
}
|
|
err = this.keyvdb.Iter(executor)
|
|
if err != nil {
|
|
return err
|
|
}
|
|
fmt.Println(totalCounter, dupCounter, (100 * float32(dupCounter))/float32(totalCounter))
|
|
//totalSize := totalCounter * this.bSize
|
|
//fmt.Println(totalSize, dupSize, (100 * float32(dupSize))/float32(totalSize))
|
|
|
|
return err
|
|
}
|
|
|
|
func (this *Scanner) Inc(key []byte) error {
|
|
var err error
|
|
var val []byte
|
|
has, err := this.keyvdb.Has(key)
|
|
if err != nil {
|
|
return err
|
|
}
|
|
if has {
|
|
val, err = this.keyvdb.Get(key)
|
|
if err != nil {
|
|
return err
|
|
}
|
|
}
|
|
var counter int
|
|
switch {
|
|
case len(val) == 0:
|
|
counter = 1
|
|
default:
|
|
counter, err = strconv.Atoi(string(val))
|
|
if err != nil {
|
|
return err
|
|
}
|
|
counter++
|
|
}
|
|
nval := []byte(strconv.Itoa(counter))
|
|
err = this.keyvdb.Set(key, nval)
|
|
if err != nil {
|
|
return err
|
|
}
|
|
return err
|
|
}
|
|
|
|
const pathSeparator string = "/"
|
|
|
|
type FileExecutor = func(filePath string)
|
|
|
|
func (this *Scanner) FileTreeScanner(basePath string, depth int, executor FileExecutor) error {
|
|
|
|
pathLength := func(path string) int {
|
|
if len(path) == 0 {
|
|
return 0
|
|
}
|
|
path = filepath.Clean(path)
|
|
path = filepath.ToSlash(path)
|
|
path = strings.Trim(path, pathSeparator)
|
|
if len(path) == 0 {
|
|
return 0
|
|
}
|
|
list := strings.Split(path, pathSeparator)
|
|
return len(list)
|
|
}
|
|
|
|
depth = depth + pathLength(basePath)
|
|
|
|
resolver := func(fullPath string, info os.FileInfo, err error) error {
|
|
if err != nil {
|
|
return err
|
|
}
|
|
if pathLength(fullPath) > depth {
|
|
return filepath.SkipDir
|
|
}
|
|
if !info.IsDir(){
|
|
executor(fullPath)
|
|
}
|
|
return nil
|
|
}
|
|
err := filepath.Walk(basePath, resolver)
|
|
if err != nil {
|
|
return err
|
|
}
|
|
return err
|
|
}
|
|
|
|
|
|
|
|
//EOF
|