s5cmd icon indicating copy to clipboard operation
s5cmd copied to clipboard

Select document JSON content type (multiple lines)

Open ollie-bell opened this issue 1 year ago • 0 comments

S3 Select has a "JSON content type" option which can be selected as either "Lines" or "Document". Currently it appears that only "Lines" is implemented in s5cmd. Is it possible to also implement "Document"?

image

e.g. for this json file (also pasted at the end), the query

SELECT s.tracking_id FROM s3object[*]['metadata']['.zattrs'] s

should return

{
  "tracking_id": "hdl:21.14100/e46c841e-d14c-479f-a6c1-85439a10baad"
}

image

However, running

s5cmd select --query "SELECT s.tracking_id FROM s3object[*]['metadata']['.zattrs'] s" s3://cmip6-pds/CMIP6/ScenarioMIP/CCCma/CanESM5/ssp370/r2i1p1f1/day/hurs/gn/v20190429/.zmetadata

currently returns an error

ERROR "select --query=SELECT s.tracking_id FROM s3object[*]['metadata']['.zattrs'] s s3://cmip6-pds/CMIP6/ScenarioMIP/CCCma/CanESM5/ssp370/r2i1p1f1/day/hurs/gn/v20190429/.zmetadata": JSONParsingError: Error parsing JSON file. Please check the file and try again. status code: 400, request id: 5791JR5VJYKXJ6KM, host id: G/NFOIjrL4RrmdJkpbCNzw2U2v4rGBfpApFyfZec/h+2iJBCyGKySit05YT3fD7/5Ib+ZwfYoT0=
s3://cmip6-pds/CMIP6/ScenarioMIP/CCCma/CanESM5/ssp370/r2i1p1f1/day/hurs/gn/v20190429/.zmetadata
{
  "metadata": {
    ".zattrs": {
      "CCCma_model_hash": "1f91f92cb6d607391f44831504025d32fc44faa1",
      "CCCma_parent_runid": "rc3.1-his02",
      "CCCma_pycmor_hash": "33c30511acc319a98240633965a04ca99c26427e",
      "CCCma_runid": "rc3.1-s7002",
      "Conventions": "CF-1.7 CMIP-6.2",
      "YMDH_branch_time_in_child": "2015:01:01:00",
      "YMDH_branch_time_in_parent": "2015:01:01:00",
      "activity_id": "ScenarioMIP AerChemMIP",
      "branch_method": "Spin-up documentation",
      "branch_time_in_child": 60225,
      "branch_time_in_parent": 60225,
      "cmor_version": "3.4.0",
      "contact": "[email protected]",
      "coordinates": "time_bnds lat_bnds lon_bnds",
      "creation_date": "2019-05-02T08:47:58Z",
      "data_specs_version": "01.00.29",
      "experiment": "gap-filling scenario reaching 7.0 based on SSP3",
      "experiment_id": "ssp370",
      "external_variables": "areacella",
      "forcing_index": 1,
      "frequency": "day",
      "further_info_url": "https://furtherinfo.es-doc.org/CMIP6.CCCma.CanESM5.ssp370.none.r2i1p1f1",
      "grid": "T63L49 native atmosphere, T63 Linear Gaussian Grid; 128 x 64 longitude/latitude; 49 levels; top level 1 hPa",
      "grid_label": "gn",
      "history": "2019-05-02T08:47:58Z ;rewrote data to be consistent with ScenarioMIP for variable hurs found in table day.;\nOutput from $runid",
      "initialization_index": 1,
      "institution": "Canadian Centre for Climate Modelling and Analysis, Environment and Climate Change Canada, Victoria, BC V8P 5C2, Canada",
      "institution_id": "CCCma",
      "license": "CMIP6 model data produced by The Government of Canada (Canadian Centre for Climate Modelling and Analysis, Environment and Climate Change Canada) is licensed under a Creative Commons Attribution ShareAlike 4.0 International License (https://creativecommons.org/licenses). Consult https://pcmdi.llnl.gov/CMIP6/TermsOfUse for terms of use governing CMIP6 output, including citation requirements and proper acknowledgment. Further information about this data, including some limitations, can be found via the further_info_url (recorded as a global attribute in this file) and at https:///pcmdi.llnl.gov/. The data producers and data providers make no warranty, either express or implied, including, but not limited to, warranties of merchantability and fitness for a particular purpose. All liabilities arising from the supply of the information (including any liability arising in negligence) are excluded to the fullest extent permitted by law.",
      "mip_era": "CMIP6",
      "nominal_resolution": "500 km",
      "parent_activity_id": "CMIP",
      "parent_experiment_id": "historical",
      "parent_mip_era": "CMIP6",
      "parent_source_id": "CanESM5",
      "parent_time_units": "days since 1850-01-01 0:0:0.0",
      "parent_variant_label": "r2i1p1f1",
      "physics_index": 1,
      "product": "model-output",
      "realization_index": 2,
      "realm": "atmos",
      "references": "Geophysical Model Development Special issue on CanESM5 (https://www.geosci-model-dev.net/special_issues.html)",
      "source": "CanESM5 (2019): \naerosol: interactive\natmos: CanAM5 (T63L49 native atmosphere, T63 Linear Gaussian Grid; 128 x 64 longitude/latitude; 49 levels; top level 1 hPa)\natmosChem: specified oxidants for aerosols\nland: CLASS3.6/CTEM1.2\nlandIce: specified ice sheets\nocean: NEMO3.4.1 (ORCA1 tripolar grid, 1 deg with refinement to 1/3 deg within 20 degrees of the equator; 361 x 290 longitude/latitude; 45 vertical levels; top grid cell 0-6.19 m)\nocnBgchem: Canadian Model of Ocean Carbon (CMOC); NPZD ecosystem with OMIP prescribed carbonate chemistry\nseaIce: LIM2",
      "source_id": "CanESM5",
      "source_type": "AOGCM",
      "sub_experiment": "none",
      "sub_experiment_id": "none",
      "table_id": "day",
      "table_info": "Creation Date:(20 February 2019) MD5:374fbe5a2bcca535c40f7f23da271e49",
      "title": "CanESM5 output prepared for CMIP6",
      "tracking_id": "hdl:21.14100/e46c841e-d14c-479f-a6c1-85439a10baad",
      "variable_id": "hurs",
      "variant_label": "r2i1p1f1",
      "version": "v20190429",
      "status": "2019-11-15;created;by [email protected]",
      "netcdf_tracking_ids": "hdl:21.14100/e46c841e-d14c-479f-a6c1-85439a10baad",
      "version_id": "v20190429"
    },
    ".zgroup": {
      "zarr_format": 2
    },
    "height/.zarray": {
      "chunks": [],
      "compressor": null,
      "dtype": "<f8",
      "fill_value": "NaN",
      "filters": null,
      "order": "C",
      "shape": [],
      "zarr_format": 2
    },
    "height/.zattrs": {
      "_ARRAY_DIMENSIONS": [],
      "axis": "Z",
      "long_name": "height",
      "positive": "up",
      "standard_name": "height",
      "units": "m"
    },
    "hurs/.zarray": {
      "chunks": [
        1807,
        64,
        128
      ],
      "compressor": {
        "blocksize": 0,
        "clevel": 5,
        "cname": "lz4",
        "id": "blosc",
        "shuffle": 1
      },
      "dtype": "<f4",
      "fill_value": 100000002004087730000,
      "filters": null,
      "order": "C",
      "shape": [
        31390,
        64,
        128
      ],
      "zarr_format": 2
    },
    "hurs/.zattrs": {
      "_ARRAY_DIMENSIONS": [
        "time",
        "lat",
        "lon"
      ],
      "cell_measures": "area: areacella",
      "cell_methods": "area: time: mean",
      "comment": "The relative humidity with respect to liquid water for T> 0 C, and with respect to ice for T<0 C.",
      "coordinates": "height",
      "history": "mltby100 2019-05-02T08:47:58Z altered by CMOR: Treated scalar dimension: 'height'. 2019-05-02T08:47:58Z altered by CMOR: Reordered dimensions, original order: lat lon time. 2019-05-02T08:47:58Z altered by CMOR: replaced missing value flag (1e+38) with standard missing value (1e+20).",
      "long_name": "Near-Surface Relative Humidity",
      "original_name": "SRH",
      "standard_name": "relative_humidity",
      "units": "%"
    },
    "lat/.zarray": {
      "chunks": [
        64
      ],
      "compressor": {
        "blocksize": 0,
        "clevel": 5,
        "cname": "lz4",
        "id": "blosc",
        "shuffle": 1
      },
      "dtype": "<f8",
      "fill_value": "NaN",
      "filters": null,
      "order": "C",
      "shape": [
        64
      ],
      "zarr_format": 2
    },
    "lat/.zattrs": {
      "_ARRAY_DIMENSIONS": [
        "lat"
      ],
      "axis": "Y",
      "bounds": "lat_bnds",
      "long_name": "Latitude",
      "standard_name": "latitude",
      "units": "degrees_north"
    },
    "lat_bnds/.zarray": {
      "chunks": [
        64,
        2
      ],
      "compressor": {
        "blocksize": 0,
        "clevel": 5,
        "cname": "lz4",
        "id": "blosc",
        "shuffle": 1
      },
      "dtype": "<f8",
      "fill_value": "NaN",
      "filters": null,
      "order": "C",
      "shape": [
        64,
        2
      ],
      "zarr_format": 2
    },
    "lat_bnds/.zattrs": {
      "_ARRAY_DIMENSIONS": [
        "lat",
        "bnds"
      ]
    },
    "lon/.zarray": {
      "chunks": [
        128
      ],
      "compressor": {
        "blocksize": 0,
        "clevel": 5,
        "cname": "lz4",
        "id": "blosc",
        "shuffle": 1
      },
      "dtype": "<f8",
      "fill_value": "NaN",
      "filters": null,
      "order": "C",
      "shape": [
        128
      ],
      "zarr_format": 2
    },
    "lon/.zattrs": {
      "_ARRAY_DIMENSIONS": [
        "lon"
      ],
      "axis": "X",
      "bounds": "lon_bnds",
      "long_name": "Longitude",
      "standard_name": "longitude",
      "units": "degrees_east"
    },
    "lon_bnds/.zarray": {
      "chunks": [
        128,
        2
      ],
      "compressor": {
        "blocksize": 0,
        "clevel": 5,
        "cname": "lz4",
        "id": "blosc",
        "shuffle": 1
      },
      "dtype": "<f8",
      "fill_value": "NaN",
      "filters": null,
      "order": "C",
      "shape": [
        128,
        2
      ],
      "zarr_format": 2
    },
    "lon_bnds/.zattrs": {
      "_ARRAY_DIMENSIONS": [
        "lon",
        "bnds"
      ]
    },
    "time/.zarray": {
      "chunks": [
        31390
      ],
      "compressor": {
        "blocksize": 0,
        "clevel": 5,
        "cname": "lz4",
        "id": "blosc",
        "shuffle": 1
      },
      "dtype": "<i8",
      "fill_value": null,
      "filters": null,
      "order": "C",
      "shape": [
        31390
      ],
      "zarr_format": 2
    },
    "time/.zattrs": {
      "_ARRAY_DIMENSIONS": [
        "time"
      ],
      "axis": "T",
      "bounds": "time_bnds",
      "calendar": "noleap",
      "long_name": "time",
      "standard_name": "time",
      "units": "days since 2015-01-01 12:00:00.000000"
    },
    "time_bnds/.zarray": {
      "chunks": [
        15695,
        2
      ],
      "compressor": {
        "blocksize": 0,
        "clevel": 5,
        "cname": "lz4",
        "id": "blosc",
        "shuffle": 1
      },
      "dtype": "<f8",
      "fill_value": "NaN",
      "filters": null,
      "order": "C",
      "shape": [
        31390,
        2
      ],
      "zarr_format": 2
    },
    "time_bnds/.zattrs": {
      "_ARRAY_DIMENSIONS": [
        "time",
        "bnds"
      ],
      "calendar": "365_day",
      "units": "days since 1850-01-01"
    }
  },
  "zarr_consolidated_format": 1
}

ollie-bell avatar Aug 24 '22 04:08 ollie-bell