2023-08-27 21:30:19 +02:00
|
|
|
package backblaze
|
2023-08-12 09:52:04 +02:00
|
|
|
|
|
|
|
import (
|
|
|
|
"context"
|
|
|
|
"errors"
|
|
|
|
"fmt"
|
|
|
|
"log"
|
|
|
|
"strings"
|
|
|
|
"sync"
|
|
|
|
|
|
|
|
"github.com/kurin/blazer/b2"
|
|
|
|
"golang.org/x/sync/semaphore"
|
|
|
|
)
|
|
|
|
|
2023-08-27 21:30:19 +02:00
|
|
|
type duplicate struct {
|
|
|
|
bucket string
|
|
|
|
file string
|
|
|
|
count int
|
|
|
|
}
|
|
|
|
|
2023-08-12 09:52:04 +02:00
|
|
|
func (d duplicate) dir() string {
|
|
|
|
if !strings.Contains(d.file, "/") {
|
|
|
|
return d.bucket
|
|
|
|
}
|
|
|
|
splitted := strings.Split(d.file, "/")
|
|
|
|
return strings.Join(splitted[:(len(splitted)-1)], "/")
|
|
|
|
}
|
|
|
|
|
2023-08-27 21:30:19 +02:00
|
|
|
func (b *BackBlaze) ListDuplicateVersions(ctx context.Context, cancel context.CancelFunc) error {
|
2023-08-12 09:52:04 +02:00
|
|
|
b2Client, err := b2.NewClient(ctx, b.bbID, b.bbKey)
|
|
|
|
if err != nil {
|
|
|
|
return fmt.Errorf("b2.NewClient %w", err)
|
|
|
|
}
|
|
|
|
log.Println("b2Client ok")
|
|
|
|
|
|
|
|
dups, err := b.listDuplicates(ctx, cancel, b2Client)
|
|
|
|
if err != nil {
|
|
|
|
return fmt.Errorf("b.listDuplicates: %w", err)
|
|
|
|
}
|
|
|
|
|
|
|
|
if len(dups) > 0 {
|
|
|
|
var builder strings.Builder
|
|
|
|
for _, dup := range dups {
|
|
|
|
builder.WriteString(fmt.Sprintf("%+v\n", dup))
|
|
|
|
}
|
|
|
|
return fmt.Errorf("found duplicates: %s", builder.String())
|
|
|
|
}
|
|
|
|
return nil
|
|
|
|
}
|
|
|
|
|
2023-08-27 21:30:19 +02:00
|
|
|
func (b *BackBlaze) listDuplicates(ctx context.Context, cancel context.CancelFunc, b2Client *b2.Client) ([]duplicate, error) {
|
2023-08-12 09:52:04 +02:00
|
|
|
buckets, err := b2Client.ListBuckets(ctx)
|
|
|
|
if err != nil {
|
|
|
|
return nil, fmt.Errorf("b2Client.Bucket %w", err)
|
|
|
|
}
|
|
|
|
|
|
|
|
wg := sync.WaitGroup{}
|
|
|
|
dups := make([]duplicate, 0)
|
|
|
|
|
|
|
|
log.Println("len(buckets)", len(buckets))
|
|
|
|
sm := semaphore.NewWeighted(int64(b.maxWorkers))
|
|
|
|
wg.Add(len(buckets))
|
|
|
|
for _, bc := range buckets {
|
|
|
|
if err := sm.Acquire(ctx, 1); err != nil {
|
|
|
|
return nil, fmt.Errorf("sm.Acquire %w", err)
|
|
|
|
}
|
|
|
|
|
|
|
|
go func(bc *b2.Bucket) {
|
|
|
|
defer sm.Release(1)
|
|
|
|
defer wg.Done()
|
|
|
|
files := make(map[string]int, 0)
|
|
|
|
|
|
|
|
bucketIter := bc.List(ctx, b2.ListHidden())
|
|
|
|
if bucketIter == nil {
|
2023-08-29 11:40:07 +02:00
|
|
|
b.logger.Errorln("bucket list cannot be nil")
|
2023-08-12 09:52:04 +02:00
|
|
|
return
|
|
|
|
}
|
|
|
|
|
|
|
|
for {
|
|
|
|
if !bucketIter.Next() {
|
|
|
|
if bucketIter.Err() != nil {
|
2023-08-29 11:40:07 +02:00
|
|
|
b.logger.Errorf("bucketIter err %s", bucketIter.Err())
|
2023-08-12 09:52:04 +02:00
|
|
|
return
|
|
|
|
}
|
|
|
|
break
|
|
|
|
}
|
|
|
|
if bucketIter.Object() == nil {
|
2023-08-29 11:40:07 +02:00
|
|
|
b.logger.Errorln("bucketIter Object is nil")
|
2023-08-12 09:52:04 +02:00
|
|
|
continue
|
|
|
|
}
|
|
|
|
files[bucketIter.Object().Name()]++
|
|
|
|
}
|
|
|
|
|
|
|
|
// Search duplicates
|
|
|
|
for file, count := range files {
|
|
|
|
if count > 1 {
|
|
|
|
dups = append(dups, duplicate{
|
|
|
|
bucket: bc.Name(),
|
|
|
|
file: file,
|
|
|
|
count: count,
|
|
|
|
})
|
|
|
|
}
|
|
|
|
}
|
|
|
|
}(bc)
|
|
|
|
}
|
|
|
|
wg.Wait()
|
|
|
|
|
|
|
|
return dups, nil
|
|
|
|
}
|
|
|
|
|
2023-08-27 21:30:19 +02:00
|
|
|
func (b *BackBlaze) listDuplicatesFromBucket(ctx context.Context, cancel context.CancelFunc, b2Client *b2.Client, bucketName string) ([]duplicate, error) {
|
2023-08-12 09:52:04 +02:00
|
|
|
bucket, err := b2Client.Bucket(ctx, bucketName)
|
|
|
|
if err != nil {
|
|
|
|
return nil, fmt.Errorf("b2Client.Bucket %w", err)
|
|
|
|
}
|
|
|
|
|
|
|
|
dups := make([]duplicate, 0)
|
|
|
|
files := make(map[string]int, 0)
|
|
|
|
|
|
|
|
bucketIter := bucket.List(ctx, b2.ListHidden())
|
|
|
|
if bucketIter == nil {
|
|
|
|
return nil, errors.New("bucket list cannot be nil")
|
|
|
|
|
|
|
|
}
|
|
|
|
|
|
|
|
for {
|
|
|
|
if !bucketIter.Next() {
|
|
|
|
if bucketIter.Err() != nil {
|
|
|
|
return nil, fmt.Errorf("bucketIter err %w", bucketIter.Err())
|
|
|
|
}
|
|
|
|
break
|
|
|
|
}
|
|
|
|
if bucketIter.Object() == nil {
|
|
|
|
return nil, errors.New("bucketIter Object is nil")
|
|
|
|
}
|
|
|
|
files[bucketIter.Object().Name()]++
|
|
|
|
}
|
|
|
|
|
|
|
|
// Search duplicates
|
|
|
|
for file, count := range files {
|
|
|
|
if count > 1 {
|
|
|
|
dups = append(dups, duplicate{
|
|
|
|
bucket: bucket.Name(),
|
|
|
|
file: file,
|
|
|
|
count: count,
|
|
|
|
})
|
|
|
|
}
|
|
|
|
}
|
|
|
|
|
|
|
|
return dups, nil
|
|
|
|
}
|