backblaze-backup/internal/services/backblaze/duplicates.go

152 lines
3.2 KiB
Go

package backblaze
import (
"context"
"errors"
"fmt"
"log"
"strings"
"sync"
"github.com/kurin/blazer/b2"
"golang.org/x/sync/semaphore"
)
type duplicate struct {
bucket string
file string
count int
}
func (d duplicate) dir() string {
if !strings.Contains(d.file, "/") {
return d.bucket
}
splitted := strings.Split(d.file, "/")
return strings.Join(splitted[:(len(splitted)-1)], "/")
}
func (b *BackBlaze) ListDuplicateVersions(ctx context.Context, cancel context.CancelFunc) error {
b2Client, err := b2.NewClient(ctx, b.bbID, b.bbKey)
if err != nil {
return fmt.Errorf("b2.NewClient %w", err)
}
log.Println("b2Client ok")
dups, err := b.listDuplicates(ctx, cancel, b2Client)
if err != nil {
return fmt.Errorf("b.listDuplicates: %w", err)
}
if len(dups) > 0 {
var builder strings.Builder
for _, dup := range dups {
builder.WriteString(fmt.Sprintf("%+v\n", dup))
}
return fmt.Errorf("found duplicates: %s", builder.String())
}
return nil
}
func (b *BackBlaze) listDuplicates(ctx context.Context, cancel context.CancelFunc, b2Client *b2.Client) ([]duplicate, error) {
buckets, err := b2Client.ListBuckets(ctx)
if err != nil {
return nil, fmt.Errorf("b2Client.Bucket %w", err)
}
wg := sync.WaitGroup{}
dups := make([]duplicate, 0)
log.Println("len(buckets)", len(buckets))
sm := semaphore.NewWeighted(int64(b.maxWorkers))
wg.Add(len(buckets))
for _, bc := range buckets {
if err := sm.Acquire(ctx, 1); err != nil {
return nil, fmt.Errorf("sm.Acquire %w", err)
}
go func(bc *b2.Bucket) {
defer sm.Release(1)
defer wg.Done()
files := make(map[string]int, 0)
bucketIter := bc.List(ctx, b2.ListHidden())
if bucketIter == nil {
b.logger.Error("bucket list cannot be nil")
return
}
for {
if !bucketIter.Next() {
if bucketIter.Err() != nil {
b.logger.Error("bucketIter err %w", bucketIter.Err())
return
}
break
}
if bucketIter.Object() == nil {
b.logger.Error("bucketIter Object is nil")
continue
}
files[bucketIter.Object().Name()]++
}
// Search duplicates
for file, count := range files {
if count > 1 {
dups = append(dups, duplicate{
bucket: bc.Name(),
file: file,
count: count,
})
}
}
}(bc)
}
wg.Wait()
return dups, nil
}
func (b *BackBlaze) listDuplicatesFromBucket(ctx context.Context, cancel context.CancelFunc, b2Client *b2.Client, bucketName string) ([]duplicate, error) {
bucket, err := b2Client.Bucket(ctx, bucketName)
if err != nil {
return nil, fmt.Errorf("b2Client.Bucket %w", err)
}
dups := make([]duplicate, 0)
files := make(map[string]int, 0)
bucketIter := bucket.List(ctx, b2.ListHidden())
if bucketIter == nil {
return nil, errors.New("bucket list cannot be nil")
}
for {
if !bucketIter.Next() {
if bucketIter.Err() != nil {
return nil, fmt.Errorf("bucketIter err %w", bucketIter.Err())
}
break
}
if bucketIter.Object() == nil {
return nil, errors.New("bucketIter Object is nil")
}
files[bucketIter.Object().Name()]++
}
// Search duplicates
for file, count := range files {
if count > 1 {
dups = append(dups, duplicate{
bucket: bucket.Name(),
file: file,
count: count,
})
}
}
return dups, nil
}