【问题标题】:List nested folders in Google Cloud Storage Bucket in C#在 C# 中列出 Google Cloud Storage Bucket 中的嵌套文件夹
【发布时间】:2022-05-01 23:53:59
【问题描述】:

在 Google-Cloud-Storage 存储桶中列出嵌套文件夹的最佳方式是什么。我有一个存储桶,其中首先是年文件夹,然后是月和日文件夹,而日文件夹有数百万个文件,但月文件夹可能会错过一些天(文件夹)。当我运行代码时,它会循环文件夹和文件,这需要几个小时。这是我正在使用的代码

 public static IEnumerable<string> ListFolders(this StorageClient client, string bucket, string folder = "")
        {
            if (client == null) { throw new ArgumentNullException("this"); }
            if (string.IsNullOrWhiteSpace(bucket)) { throw new ArgumentOutOfRangeException("bucket must be non-empty"); }
            if (!string.IsNullOrEmpty(folder) && !folder.EndsWith(Delimiter.ToString())) { throw new ArgumentException("folder must end in " + Delimiter); }
            if (!string.IsNullOrEmpty(folder) && folder == Delimiter.ToString()) { throw new ArgumentException("root folder is \"\", not " + Delimiter); }

            var prefix = folder ?? "";
            return client
                .ListObjects(bucket, prefix)
                .Select(o => o.Name.Substring(prefix.Length))
                .Where(n => n.Contains(Delimiter))
                .Select(n => n.Split(Delimiter).First())
                .Distinct()
                .Select(n => prefix + n + Delimiter);
        }


private static void ListLiveFolders(string yearFolder)
        {
            var storage = StorageClient.Create(StorageHelper.Credentials);
            var listGcpMonthFolders = StorageHelper.ListFolders(storage, settings.Bucket, $"{settings.BucketFolder}/{yearFolder}/").ToList();

            try
            {
                foreach (var monthFolder in listGcpMonthFolders)
                {
                    Console.WriteLine(monthFolder);
                    var listGcpDaysFolders = StorageHelper.ListFolders(storage, settings.Bucket, monthFolder).ToList();
                    foreach (var daysFolder in listGcpDaysFolders)
                    {
                        Console.WriteLine(daysFolder);
                    }
                }
            }
            catch (Exception exception)
            {
                Console.WriteLine(exception.Message);
            }
        }

【问题讨论】:

  • 看看this question ,您似乎遇到了同样的问题,只是语言不同。查看已接受答案下的第一条评论,看看是否有帮助。 “如果我们通过带有 delimiter=/ 的 prefix=abc/xyz 得到答案,我们会得到名称以 abc/xyz 开头的所有对象以及在逻辑上可以视为子文件夹的前缀。”
  • 您能否检查以下documentation 是否对您有帮助?可能使用客户端库和列表对象可能会给您更快的结果。如果现在,那么您可以尝试异步。
  • 不,它会搜索所有对象,然后你必须为每个对象提供条件,没有找到帮助。
  • 我猜你必须使用内部应用程序逻辑来过滤你的文件。如果您希望它作为 Google 的新功能。你可以去打开一个功能请求。使用此issue tracker 链接打开 FR。
  • 似乎您的代码在每次循环时都请求对象,由于对谷歌的http请求,导致它非常慢。如果你先得到所有东西然后在本地做逻辑呢? (对不起英语)

标签: c# google-cloud-storage google-api-dotnet-client


【解决方案1】:

要实现您的目标,您必须使用内部应用程序逻辑来过滤您的文件。但是,如果在 cmets 和官方文档中链接的类似问题中找到的解决方案没有帮助,您可以使用此 Issue Tracker 打开 Google 的功能请求。

【讨论】:

    【解决方案2】:

    这是我在 .net5 项目中使用的工作场所

    1. 先搞定一切

      public async Task<List<FolderTree>> GetObjectListAsync(string dirName)
      {
          var __request = _storageService.Objects.List(_bucketName);
          __request.Prefix = dirName;
          __request.Projection = ProjectionEnum.Full;
          var __res = await __request.ExecuteAsync();
          return GCloudHelper.TreeGenerator(__res?.Items);//call this method from GCloudHelper.cs
      }
      
    2. 在本地使用资源GCloudHelper.cs

    using Newtonsoft.Json;
    using System;
    using System.Collections.Generic;
    using System.Linq;
    using System.Web;
    using Object = Google.Apis.Storage.v1.Data.Object;
    namespace MyGCloudHelper
    {
         public class GCloudHelper
         {
            public static List<FolderTree> TreeGenerator(IEnumerable<Object> objects, string matchedPath = " ")
            {
                var __tree = new List<FolderTree>();
                var __items = objects.ToList();
                if (!__items.Any()) return new();
    
                var __matchedPath = matchedPath;
                var __toDoItems = matchedPath == " " ? __items.Where(x => !x.Name.Contains(__matchedPath)) : __items;
                foreach(var item in __toDoItems)
                {
                    var __newName = item.Name.Replace($"{matchedPath}/", "");
                    var __arr = __newName.Split('/');
                    for (var i = 0; i < __arr.Length; i++)
                    {
                        var __part = __arr[i];
                        var __pathArr = __arr[..(i+1)];
    
                        var __prefix = matchedPath == " " ? "" : $"{matchedPath}/";
                        var __currentPath = __prefix + string.Join('/', __pathArr);
    
                        //working with directory
                        if (!IsFile(__part))
                        {
                            __tree.Add(new()
                            {
                                level = i,
                                name = __part,
                                path = __currentPath
                            });
                            //check to find matched level from other items
                            var __children = __items.Where(cx => cx.Name != item.Name && cx.Name.Contains(__currentPath)).ToList();
                            if (__children.Any())
                            {
                                __matchedPath = __currentPath;
                                var __subs = GetSubs(__children, __currentPath);
                                __tree[i].subs = __subs;
                            }
                        }
                    }
    
                }
                //get files for each dir
                __tree = GetFiles(__tree, __items);
                return SizeSetter(__tree);
            }
    
            private static List<FolderTree> SizeSetter(List<FolderTree> tree)
            {
                tree.ForEach(item => {
                    var __size = item.files.Sum(f => (decimal)f.size);
                    if (item.subs.Any()) item.subs = SizeSetter(item.subs);//recurse
                    item.size = (ulong)(__size + item.subs.Sum(sx => (decimal)sx.size));
                });
    
                return tree;
            }
    
            private static List<FolderTree> GetFiles(List<FolderTree> dirs, List<Object> items)
            {
                foreach(var dir in dirs)
                {
                    var __getFiles = items.Where(x => x.Name.Contains(dir.path));
                    foreach (var f in __getFiles)
                    {
                        var __parts = f.Name.Split(dir.path + "/");
                        var __fpart = __parts.LastOrDefault()?.Split('/').FirstOrDefault();
                        if (IsFile(__fpart))
                        {
                            dir.files.Add(new()
                            {
                                name = __fpart,
                                link = HttpUtility.UrlDecode(f.MediaLink),
                                size = f.Size,
                                type = f.ContentType,
                                path = $"{dir.path}/{__fpart}",
                                created =  f.TimeCreated,
                                modified = f.Updated
                            });
                        }
                    }
    
                    if (dir.subs.Any()) 
                        dir.subs = GetFiles(dir.subs, items);//recurse
                }
    
                return dirs;
            }
    
            private static List<FolderTree> GetSubs(List<Object> children, string parantDir)
            {
                var __tree = new List<FolderTree>();
                var __json = JsonConvert.SerializeObject(children);
                var __newList = JsonConvert.DeserializeObject<List<Object>>(__json);
                __newList.ForEach(x => x.Name = x.Name.Replace($"{parantDir}/", ""));
    
                var __tmpStore = new List<TmpStore>();
                for(var i = 0; i < __newList.Count(); i++)
                {
                    var __name = __newList[i].Name.Split('/').FirstOrDefault();
                    if (!IsFile(__name))
                        __tmpStore.Add(new() { Name = __name, item = children.FirstOrDefault(fx => fx.Id == __newList[i].Id) });
                }
    
                var __group = __tmpStore.GroupBy(x => x.Name);
                foreach (var group in __group)
                {
                    var __tmp = group.FirstOrDefault();
                    var __currentPath = $"{parantDir}/{__tmp.Name}";
    
                    //find children
                    var __children = children.Where(cx => cx.Name != __tmp.item.Name && cx.Name.Contains(__currentPath)).ToList();
                    var __subs = new List<FolderTree>();
                    if (__children.Any())
                        __subs = GetSubs(__children, __currentPath);//recurse
    
                    __tree.Add(new()
                    {
                        name = __tmp.Name,
                        path = __currentPath,
                        subs = __subs,
                    });
                }
    
                return __tree;
            }
    
    
            private static bool IsFile(string part)
                => part.Split('.').Count() > 1;
    
        }//class
    
        internal class TmpStore
        {
            public string Name { get; set; }
            public Object item { get; set; }
        }
    
        public class FolderTree
        {
            public string name { get; set; }
            public int level { get; set; }
            public string path { get; set; }
            public ulong size { get; set; }
            public List<FolderTree> subs { get; set; } = new();
            public List<FileProp> files { get; set; } = new();
        }
    
        public class FileProp
        {
            public string name { get; set; }
            public string link { get; set; }
            public ulong? size { get; set; }
            public string type { get; set; }
            public string path { get; set; }
            public DateTime? created { get; set; }
            public DateTime? modified { get; set; }
        }
    
    }
    
    

    结果
    这将生成包含文件和大小的嵌套目录树
    其中subs 是子目录列表,files 是文件列表
    => 结果可能不像你的概念,但它可能会有所帮助

    [
        {
            "name": "myFolder",
            "level": 0,
            "path": "myFolder",
            "size": 304340,
            "subs": [
                {
                    "name": "Resource",
                    "level": 0,
                    "path": "myFolder/Resource",
                    "size": 304301,
                    "subs": [
                        {
                            "name": "Image",
                            "level": 0,
                            "path": "myFolder/Resource/Image",
                            "size": 304301,
                            "subs": [
                                {
                                    "name": "Logo",
                                    "level": 0,
                                    "path": "myFolder/Resource/Image/Logo",
                                    "size": 304301,
                                    "subs": [],
                                    "files": [
                                        {
                                            "name": "fileImg01.png",
                                            "link": "https://storage.googleapis.com/download/storage/v1/b/myBucket/o/myFolder/Resource/Image/Logo/fileImg01.png",
                                            "size": 64436,
                                            "type": "image/jpeg",
                                            "path": "myFolder/Resource/Image/Logo/fileImg01.png",
                                            "created": "2022-05-01T11:13:27.727+07:00",
                                            "modified": "2022-05-01T11:13:27.727+07:00"
                                        },
                                        {
                                            "name": "fileImg02.png",
                                            "link": "https://storage.googleapis.com/download/storage/v1/b/myBucket/o/myFolder/Resource/Image/Logo/fileImg02.png",
                                            "size": 175429,
                                            "type": "image/jpeg",
                                            "path": "myFolder/Resource/Image/Logo/fileImg02.png",
                                            "created": "2022-05-01T11:06:35.58+07:00",
                                            "modified": "2022-05-01T11:06:35.58+07:00"
                                        },
                                        {
                                            "name": "fileImg03.png",
                                            "link": "https://storage.googleapis.com/download/storage/v1/b/myBucket/o/myFolder/Resource/Image/Logo/fileImg03.png",
                                            "size": 64436,
                                            "type": "image/jpeg",
                                            "path": "myFolder/Resource/Image/Logo/fileImg03.png",
                                            "created": "2022-05-01T11:18:42.365+07:00",
                                            "modified": "2022-05-01T11:18:42.365+07:00"
                                        }
                                    ]
                                }
                            ],
                            "files": []
                        }
                    ],
                    "files": []
                }
            ],
            "files": [
                {
                    "name": "README.MD",
                    "link": "https://storage.googleapis.com/download/storage/v1/b/myBucket/o/myFolder/README.MD",
                    "size": 39,
                    "type": null,
                    "path": "myFolder/README.MD",
                    "created": "2022-05-01T11:04:57.565+07:00",
                    "modified": "2022-05-01T11:04:57.565+07:00"
                }
            ]
        }
    ]
    

    注意使用C# .net5

    【讨论】:

      猜你喜欢
      • 2017-11-08
      • 1970-01-01
      • 2019-06-29
      • 1970-01-01
      • 2021-07-15
      • 1970-01-01
      • 2014-11-14
      • 2020-02-29
      • 1970-01-01
      相关资源
      最近更新 更多