io icon indicating copy to clipboard operation
io copied to clipboard

Fix S3 GetChildren() substr index error

Open annie-anna opened this issue 6 months ago • 2 comments

Found a bug in S3 GetChildren, which will give substr index error.

int GetChildren(const TF_Filesystem* filesystem, const char* path,
                char*** entries, TF_Status* status) {
  TF_VLog(1, "GetChildren for path: %s\n", path);
  Aws::String bucket, prefix;
  ParseS3Path(path, true, &bucket, &prefix, status);
  if (TF_GetCode(status) != TF_OK) return -1;
  if (!prefix.empty() && prefix.back() != '/') prefix.push_back('/');    // for example, prefix='dir1/'

  auto s3_file = static_cast<S3File*>(filesystem->plugin_filesystem);
  GetS3Client(s3_file);

  Aws::S3::Model::ListObjectsV2Request list_objects_request;
  list_objects_request.WithBucket(bucket)
      .WithPrefix(prefix)
      .WithMaxKeys(kS3GetChildrenMaxKeys)
      .WithDelimiter("/");
  list_objects_request.SetResponseStreamFactory(
      []() { return Aws::New<Aws::StringStream>(kS3FileSystemAllocationTag); });

  Aws::S3::Model::ListObjectsV2Result list_objects_result;
  std::vector<Aws::String> result;
  do {
    auto list_objects_outcome =
        s3_file->s3_client->ListObjectsV2(list_objects_request);
    if (!list_objects_outcome.IsSuccess()) {
      TF_SetStatusFromAWSError(list_objects_outcome.GetError(), status);
      return -1;
    }

    list_objects_result = list_objects_outcome.GetResult();
    for (const auto& object : list_objects_result.GetCommonPrefixes()) {  // 'CommonPrefixes': [{'Prefix': 'dir1/'}]
      Aws::String s = object.GetPrefix();                                 // object={'Prefix': 'dir1/'}, s='dir1/'
      s.erase(s.length() - 1);                                            // s='dir1'
      Aws::String entry = s.substr(prefix.length());                      // s='dir1', prefix='dir1/', prefix.length() > s.length()
      if (entry.length() > 0) {
        result.push_back(entry);
      }
    }
    for (const auto& object : list_objects_result.GetContents()) {
      Aws::String s = object.GetKey();
      Aws::String entry = s.substr(prefix.length());
      if (entry.length() > 0) {
        result.push_back(entry);
      }
    }
    list_objects_request.SetContinuationToken(
        list_objects_result.GetNextContinuationToken());
  } while (list_objects_result.GetIsTruncated());

  int num_entries = result.size();
  *entries = static_cast<char**>(
      plugin_memory_allocate(num_entries * sizeof((*entries)[0])));
  for (int i = 0; i < num_entries; i++)
    (*entries)[i] = strdup(result[i].c_str());
  TF_SetStatus(status, TF_OK, "");
  return num_entries;
}

annie-anna avatar May 10 '25 02:05 annie-anna

Thanks for your pull request! It looks like this may be your first contribution to a Google open source project. Before we can look at your pull request, you'll need to sign a Contributor License Agreement (CLA).

View this failed invocation of the CLA check for more information.

For the most up to date status, view the checks section at the bottom of the pull request.

google-cla[bot] avatar May 10 '25 02:05 google-cla[bot]

@mihaimaruseac @yongtang Could you review the PR?

annie-anna avatar May 14 '25 01:05 annie-anna