package backblaze import ( "context" "errors" "fmt" "log" "strings" "sync" "github.com/kurin/blazer/b2" "golang.org/x/sync/semaphore" ) type duplicate struct { bucket string file string count int } func (d duplicate) dir() string { if !strings.Contains(d.file, "/") { return d.bucket } splitted := strings.Split(d.file, "/") return strings.Join(splitted[:(len(splitted)-1)], "/") } func (b *BackBlaze) ListDuplicateVersions(ctx context.Context, cancel context.CancelFunc) error { b2Client, err := b2.NewClient(ctx, b.bbID, b.bbKey) if err != nil { return fmt.Errorf("b2.NewClient %w", err) } log.Println("b2Client ok") dups, err := b.listDuplicates(ctx, cancel, b2Client) if err != nil { return fmt.Errorf("b.listDuplicates: %w", err) } if len(dups) > 0 { var builder strings.Builder for _, dup := range dups { builder.WriteString(fmt.Sprintf("%+v\n", dup)) } return fmt.Errorf("found duplicates: %s", builder.String()) } return nil } func (b *BackBlaze) listDuplicates(ctx context.Context, cancel context.CancelFunc, b2Client *b2.Client) ([]duplicate, error) { buckets, err := b2Client.ListBuckets(ctx) if err != nil { return nil, fmt.Errorf("b2Client.Bucket %w", err) } wg := sync.WaitGroup{} dups := make([]duplicate, 0) log.Println("len(buckets)", len(buckets)) sm := semaphore.NewWeighted(int64(b.maxWorkers)) wg.Add(len(buckets)) for _, bc := range buckets { if err := sm.Acquire(ctx, 1); err != nil { return nil, fmt.Errorf("sm.Acquire %w", err) } go func(bc *b2.Bucket) { defer sm.Release(1) defer wg.Done() files := make(map[string]int, 0) bucketIter := bc.List(ctx, b2.ListHidden()) if bucketIter == nil { b.logger.Error("bucket list cannot be nil") return } for { if !bucketIter.Next() { if bucketIter.Err() != nil { b.logger.Error("bucketIter err %w", bucketIter.Err()) return } break } if bucketIter.Object() == nil { b.logger.Error("bucketIter Object is nil") continue } files[bucketIter.Object().Name()]++ } // Search duplicates for file, count := range files { if count > 1 { dups = append(dups, duplicate{ bucket: bc.Name(), file: file, count: count, }) } } }(bc) } wg.Wait() return dups, nil } func (b *BackBlaze) listDuplicatesFromBucket(ctx context.Context, cancel context.CancelFunc, b2Client *b2.Client, bucketName string) ([]duplicate, error) { bucket, err := b2Client.Bucket(ctx, bucketName) if err != nil { return nil, fmt.Errorf("b2Client.Bucket %w", err) } dups := make([]duplicate, 0) files := make(map[string]int, 0) bucketIter := bucket.List(ctx, b2.ListHidden()) if bucketIter == nil { return nil, errors.New("bucket list cannot be nil") } for { if !bucketIter.Next() { if bucketIter.Err() != nil { return nil, fmt.Errorf("bucketIter err %w", bucketIter.Err()) } break } if bucketIter.Object() == nil { return nil, errors.New("bucketIter Object is nil") } files[bucketIter.Object().Name()]++ } // Search duplicates for file, count := range files { if count > 1 { dups = append(dups, duplicate{ bucket: bucket.Name(), file: file, count: count, }) } } return dups, nil }