gct icon indicating copy to clipboard operation
gct copied to clipboard

Lack of IO error checks generate incorrect file checksums

Open spbooth opened this issue 1 year ago • 4 comments

The routine globus_l_gass_copy_cksm_file() in globus_gass_copy_glob.c has a file read loop while((n = read(fd,buf,count)) > 0) If an IO error occurs in the read call it will return -1 and the loop will terminate early generating an incorrect digest for the file but no error report until the subsequent file transfer fails its checksum test.

On a related note if you check for errors properly you discover that a recursive globus-url-copy attempts to calculate checksums on directory-urls using this routine (which always generates a error in the read call)

My POC fix is

static
globus_result_t
globus_l_gass_copy_cksm_file(
    globus_gass_copy_handle_t *         handle,
    char *                              url,
    char *                              cksm,
    globus_off_t                        offset,
    globus_off_t                        length,
    const char *                        algorithm,
    globus_gass_copy_callback_t         callback,
    void *                              callback_arg)
{
    char *      myname = "globus_l_gass_copy_cksm_file";

    globus_url_t                        parsed_url;
    globus_result_t                     result;
    int                                 rc;

    EVP_MD_CTX *                        mdctx;
    const EVP_MD *                      md;
    char *                              cksmptr;
    unsigned char                       md_value[EVP_MAX_MD_SIZE];
    char                                cksm_buff[CKSM_SIZE];
    unsigned int                        md_len;

    char                                buf[GASS_COPY_CKSM_BUFSIZE];

    int                                 i;
    int                                 fd;
    int                                 n;
    ssize_t                             readlen;
    globus_off_t                        count;
    globus_off_t                        read_left;
    struct stat                         statbuf;
    int                                 is_regular;

    rc = globus_url_parse_loose(url, &parsed_url);
    if(rc != 0)
    {
        result = globus_error_put(
            globus_error_construct_string(
                GLOBUS_GASS_COPY_MODULE,
                GLOBUS_NULL,
                "[%s]: error parsing url: "
                "globus_url_parse returned %d",
                myname,
                url));
        goto error_url;
    }

    if(parsed_url.url_path == GLOBUS_NULL)
    {
        result = globus_error_put(
            globus_error_construct_string(
                GLOBUS_GASS_COPY_MODULE,
                GLOBUS_NULL,
                "[%s]: error parsing url: "
                "url has no path",
                myname));
        goto error_fd;
    }    
       
    read_left = length;
    if(length >= 0)
    {
        count = (read_left > GASS_COPY_CKSM_BUFSIZE) ? 
            GASS_COPY_CKSM_BUFSIZE : read_left;
    }
    else
    {
        count = GASS_COPY_CKSM_BUFSIZE;
    }
    
    fd = open(parsed_url.url_path, O_RDONLY);        
    if(fd < 0)
    {
        result = globus_error_put(
            globus_error_construct_string(
                GLOBUS_GASS_COPY_MODULE,
                GLOBUS_NULL,
                "[%s]: error opening checksum file %s",
                myname,
                parsed_url.url_path));
        goto error_fd;
    }
    fstat(fd,&statbuf);
    is_regular = S_ISREG(statbuf.st_mode);    

    if (is_regular && lseek(fd, offset, SEEK_SET) == -1)
    {
        result = globus_error_put(
            globus_error_construct_string(
                GLOBUS_GASS_COPY_MODULE,
                GLOBUS_NULL,
                "[%s]: error (%d) seeking checksum file %s",
                myname,
                errno,
                parsed_url.url_path));
        goto error_seek;
    }

    OpenSSL_add_all_algorithms();
    md = EVP_get_digestbyname(algorithm);
    if (!md)
    {
        /*
         * try again with uppercase algorithm name.
         */
        char *                          p;
        char *                          alg = strdup(algorithm);
        for(p = alg; *p != '\0'; p++)
        {
            *p = toupper(*p);
        }
        md = EVP_get_digestbyname(alg);
        free(alg);
    }
    if (!md)
    {
        result = globus_error_put(globus_error_construct_string(
                GLOBUS_GASS_COPY_MODULE,
                GLOBUS_NULL, 
                "Unable to use checksum algorithm %s", 
                algorithm));
        goto error_seek;
    }

    mdctx = EVP_MD_CTX_create();
    EVP_DigestInit_ex(mdctx, md, NULL);

    if( is_regular )
    {
        while((n = (readlen = read(fd, buf, count))) > 0)
        {
            if(length >= 0)
            {
                read_left -= n;
                count = (read_left > GASS_COPY_CKSM_BUFSIZE) ? GASS_COPY_CKSM_BUFSIZE : read_left;
            }

            EVP_DigestUpdate(mdctx, buf, n);
        }
        if( readlen < 0 && count > 0 ){
            result = globus_error_put(globus_error_construct_string(
                    GLOBUS_GASS_COPY_MODULE,
                    GLOBUS_NULL,
                    "Read error in checksum calculation url=%s read_left =%d %d %s",
                    url,read_left,readlen, strerror(errno)));
            goto error_seek;
        }
    }

    EVP_DigestFinal_ex(mdctx, md_value, &md_len);
    EVP_MD_CTX_destroy(mdctx);
    
    close(fd);
        
    cksmptr = cksm_buff;
    for (i = 0; i < md_len; i++)
    {
       sprintf(cksmptr, "%02x", md_value[i]);
       cksmptr++;
       cksmptr++;
    }
    *cksmptr = '\0';
    
    strncpy(cksm, cksm_buff, sizeof(cksm_buff));
    //globus_libc_fprintf(stderr,"SPB file-checksum %s=%s\n",url,cksm); 
    globus_url_destroy(&parsed_url);
    
    if(callback)
    {
        callback(callback_arg, handle, NULL);
    }
    return GLOBUS_SUCCESS;

error_seek:
    close(fd);
error_fd:
    globus_url_destroy(&parsed_url);

error_url:

    return result;
}

But it probably needs a bit of tidying up.

spbooth avatar Jul 27 '23 16:07 spbooth