Ocean-Data-Map-Project
Ocean-Data-Map-Project copied to clipboard
Understanding memory consumption
A random tile (produced by using gunicorn ./launch_web_service.sh):
`/api/v1.0/tiles/gaussian/25/10/EPSG:3857/giops_day/votemper/2281521600/0/-5,30/3/3/3.png` produces the following memory profile
Filename: /home/ubuntu/onav-cloud/Ocean-Data-Map-Project/data/__init__.py
Line # Mem usage Increment Occurrences Line Contents
=============================================================
9 185.0 MiB 185.0 MiB 1 @hashable_lru
10 @profile
11 def open_dataset(dataset, **kwargs):
12 """Open a dataset.
13
14 Creates a CalculatedData (derived from NetCDFData) instance to handle dataset file
15 access and calculation layer operations.
16 Then, determines the type of model the dataset is from and returns the appropriate
17 Model-derived instance (Nemo, Mercator, Fvcom) with the calculation layer instance
18 as an attribute.
19
20 Note: the returned model object will be LRU-cached internally so frequent calls
21 to open the "same" dataset will have minimal overhead.
22
23 Params:
24 * dataset -- Either a DatasetConfig object, or a string URL for the dataset
25
26 Optional Keyword Arguments:
27 * variable {str or list} -- String or list of strings of variable keys to be loaded
28 (e.g. 'votemper' or ['votemper', 'vosaline']).
29 * timestamp {int} -- Integer value of date/time for requested data (e.g. 2128723200).
30 When loading a range of timestamps, this argument serves as the starting time.
31 * endtime {int} -- Integer value of date/time. This argument is only used when
32 loading a range of timestamps, and should hold the ending time.
33 * nearest_timestamp {bool} -- When true, open_dataset will assume the given
34 starttime (and endtime) do not exactly correspond to a timestamp integer
35 in the dataset, and will perform a binary search to find the nearest timestamp
36 that is less-than-or-equal-to the given starttime (and endtime).
37 """
38 185.0 MiB 0.0 MiB 1 MODEL_CLASSES = {
39 185.0 MiB 0.0 MiB 1 "mercator": Mercator,
40 185.0 MiB 0.0 MiB 1 "nemo": Nemo,
41 185.0 MiB 0.0 MiB 1 "fvcom": Fvcom,
42 }
43
44 185.0 MiB 0.0 MiB 1 if not dataset:
45 raise ValueError("Unknown dataset.")
46
47 185.0 MiB 0.0 MiB 1 try:
48 185.0 MiB 0.0 MiB 1 url = dataset.url
49 185.0 MiB 0.0 MiB 1 calculated_vars = dataset.calculated_variables
50 except AttributeError:
51 url = dataset
52 calculated_vars = {}
53
54 185.0 MiB 0.0 MiB 1 if url is None:
55 raise ValueError("Dataset url is None.")
56
57 185.0 MiB 0.0 MiB 1 try:
58 185.0 MiB 0.0 MiB 1 model_class = MODEL_CLASSES[getattr(dataset, "model_class", "").lower()]
59 except (AttributeError, KeyError):
60 raise ValueError(
61 f"Missing or unrecongized model_class attribute in config for dataset {dataset}"
62 )
63
64 185.0 MiB 0.0 MiB 2 kwargs.update(
65 185.0 MiB 0.0 MiB 1 {
66 185.0 MiB 0.0 MiB 1 "calculated": calculated_vars,
67 185.0 MiB 0.0 MiB 1 "grid_angle_file_url": getattr(dataset, "grid_angle_file_url", ""),
68 185.0 MiB 0.0 MiB 1 "bathymetry_file_url": getattr(dataset, "bathymetry_file_url", ""),
69 185.0 MiB 0.0 MiB 1 "dataset_key": getattr(dataset, "key", ""),
70 }
71 )
72
73 185.1 MiB 0.1 MiB 1 nc_data = CalculatedData(url, **kwargs)
74 185.1 MiB 0.0 MiB 1 return model_class(nc_data)
Filename: /home/ubuntu/onav-cloud/Ocean-Data-Map-Project/data/nearest_grid_point.py
Line # Mem usage Increment Occurrences Line Contents
=============================================================
18 192.4 MiB 192.4 MiB 1 @profile
19 def find_nearest_grid_point(lat, lon, latvar, lonvar, n=1):
20 """Find the nearest grid point to a given lat/lon pair.
21
22 Parameters
23 ----------
24 lat : float
25 Latitude value at which to find the nearest grid point.
26 lon : float
27 Longitude value at which to find the nearest grid point.
28 latvar : xarray.DataArray
29 DataArray corresponding to latitude variable.
30 lonVar : xarray.DataArray
31 DataArray corresponding to longitude variable.
32 n : int, optional
33 Number of nearest grid points to return. Default is to return the
34 single closest grid point.
35
36 Returns
37 -------
38 iy, ix, dist_sq
39 A tuple of numpy arrays:
40
41 - ``iy``: the y indices of the nearest grid points
42 - ``ix``: the x indices of the nearest grid points
43 - dist_sq: squared distance
44 """
45
46 # Note the use of the squeeze method: it removes single-dimensional entries
47 # from the shape of an array. For example, in the GIOPS mesh file the
48 # longitude of the U velocity points is defined as an array with shape
49 # (1, 1, 1021, 1442). The squeeze method converts this into the equivalent
50 # array with shape (1021, 1442).
51 192.4 MiB 0.0 MiB 1 latvar = latvar.squeeze()
52 192.4 MiB 0.0 MiB 1 lonvar = lonvar.squeeze()
53
54 192.4 MiB 0.0 MiB 1 rad_factor = pi / 180.0
55 192.4 MiB 0.0 MiB 1 latvals = latvar[:] * rad_factor
56 192.4 MiB 0.0 MiB 1 lonvals = lonvar[:] * rad_factor
57 192.5 MiB 0.1 MiB 1 clat, clon = np.cos(latvals), np.cos(lonvals)
58 192.5 MiB 0.0 MiB 1 slat, slon = np.sin(latvals), np.sin(lonvals)
59 192.5 MiB 0.0 MiB 1 if latvar.ndim == 1:
60 # If latitude and longitude are 1D arrays (as is the case with the
61 # GIOPS forecast data currently pulled from datamart), then we need to
62 # handle this situation in a special manner. The clat array will be of
63 # some size m, say, and the clon array will be of size n. By virtue of
64 # being defined with different dimensions, the product of these two
65 # arrays will be of size (m, n) because xarray will automatically
66 # broadcast the arrays so that the multiplication makes sense to do.
67 # Thus, the array calculated from
68 #
69 # np.ravel(clat * clon)
70 #
71 # will be of size mn. However, the array
72 #
73 # np.ravel(slat)
74 #
75 # will be of size m and this will cause the KDTree() call to fail. To
76 # resolve this issue, we broadcast slat to the appropriate size and
77 # shape.
78 192.5 MiB 0.0 MiB 1 shape = (slat.size, slon.size)
79 192.5 MiB 0.0 MiB 1 slat = np.broadcast_to(slat.values[:, np.newaxis], shape)
80 else:
81 shape = latvar.shape
82 210.0 MiB 0.2 MiB 2 triples = np.array(
83 209.9 MiB 17.4 MiB 1 [np.ravel(clat * clon), np.ravel(clat * slon), np.ravel(slat)]
84 ).transpose()
85
86 244.7 MiB 34.7 MiB 1 kdt = KDTree(triples)
87 269.1 MiB 24.4 MiB 1 dist_sq, iy, ix = _find_index(lat, lon, kdt, shape, n)
88 # The results returned from _find_index are two-dimensional arrays (if
89 # n > 1) because it can handle the case of finding indices closest to
90 # multiple lat/lon locations (i.e., where lat and lon are arrays, not
91 # scalars). Currently, this function is intended only for a single lat/lon,
92 # so we redefine the results as one-dimensional arrays.
93 269.1 MiB 0.0 MiB 1 if n > 1:
94 269.1 MiB 0.0 MiB 1 return iy, ix, dist_sq
95 else:
96 return int(iy), int(ix), dist_sq
Filename: /home/ubuntu/onav-cloud/Ocean-Data-Map-Project/data/mercator.py
Line # Mem usage Increment Occurrences Line Contents
=============================================================
85 233.2 MiB 233.2 MiB 1 @profile
86 def __resample(self, lat_in, lon_in, lat_out, lon_out, var, radius=50000):
87 233.2 MiB 0.0 MiB 1 var = np.squeeze(var)
88
89 233.2 MiB 0.0 MiB 1 origshape = var.shape
90
91 233.3 MiB 0.0 MiB 2 data, masked_lat_in, masked_lon_in, output_def = super()._make_resample_data(
92 233.2 MiB 0.0 MiB 1 lat_in, lon_in, lat_out, lon_out, var
93 )
94
95 233.3 MiB 0.0 MiB 1 if len(data.shape) == 3:
96 output = []
97 # multiple depths
98 for d in range(0, data.shape[2]):
99 grid_lat, grid_lon = np.meshgrid(masked_lat_in, masked_lon_in)
100 grid_lat.mask = grid_lon.mask = (
101 data[:, :, d].view(np.ma.MaskedArray).mask.transpose()
102 )
103 input_def = pyresample.geometry.SwathDefinition(
104 lons=grid_lon, lats=grid_lat
105 )
106
107 output.append(
108 self.nc_data.interpolate(
109 input_def, output_def, data[:, :, d].transpose()
110 )
111 )
112
113 output = np.ma.array(output).transpose()
114 else:
115 233.3 MiB 0.0 MiB 1 grid_lat, grid_lon = np.meshgrid(masked_lat_in, masked_lon_in)
116 233.3 MiB 0.0 MiB 2 grid_lat.mask = grid_lon.mask = data.view(
117 233.3 MiB 0.0 MiB 1 np.ma.MaskedArray
118 ).mask.transpose()
119
120 233.3 MiB 0.0 MiB 2 input_def = pyresample.geometry.SwathDefinition(
121 233.3 MiB 0.0 MiB 1 lons=grid_lon, lats=grid_lat
122 )
123
124 233.3 MiB 0.0 MiB 1 output = self.nc_data.interpolate(input_def, output_def, data.transpose())
125
126 233.3 MiB 0.0 MiB 1 if len(origshape) == 4:
127 # un-collapse time and depth axes and
128 # move axes back to original positions.
129 output = np.rollaxis(output, -1).reshape(
130 (
131 origshape[0], # time
132 origshape[1], # depth
133 output.shape[0], # lat
134 output.shape[1], # lon
135 )
136 )
137
138 233.3 MiB 0.0 MiB 1 return np.squeeze(output)
Filename: /home/ubuntu/onav-cloud/Ocean-Data-Map-Project/data/mercator.py
Line # Mem usage Increment Occurrences Line Contents
=============================================================
185 192.4 MiB 192.4 MiB 1 @profile
186 def get_point(
187 self,
188 latitude,
189 longitude,
190 depth,
191 variable,
192 starttime,
193 endtime=None,
194 return_depth=False,
195 ):
196
197 224.2 MiB 31.8 MiB 1 miny, maxy, minx, maxx, radius = self.__bounding_box(latitude, longitude, 10)
198
199 224.2 MiB 0.0 MiB 1 if not hasattr(latitude, "__len__"):
200 latitude = np.array([latitude])
201 longitude = np.array([longitude])
202
203 224.2 MiB 0.0 MiB 1 var = self.nc_data.get_dataset_variable(variable)
204
205 224.2 MiB 0.0 MiB 1 starttime_idx = self.nc_data.timestamp_to_time_index(starttime)
206 224.2 MiB 0.0 MiB 1 time_slice = slice(starttime_idx, starttime_idx + 1) # slice only 1 element
207 224.2 MiB 0.0 MiB 1 if endtime is not None: # we have a range of times
208 endtime_idx = self.nc_data.timestamp_to_time_index(endtime)
209 time_slice = slice(starttime_idx, endtime_idx + 1)
210
211 time_duration = endtime_idx - starttime_idx # how many time values we have
212
213 224.2 MiB 0.0 MiB 1 depth_value = None
214 224.2 MiB 0.0 MiB 1 res = None
215 224.2 MiB 0.0 MiB 1 if depth == "bottom":
216 d = var[time_slice, :, miny:maxy, minx:maxx].values
217
218 d = np.rollaxis(d, 0, 4) # roll time to back
219 # compress lat, lon, time along depth axis
220 reshaped = np.ma.masked_invalid(d.reshape([d.shape[0], -1]))
221
222 # Find the bottom data values along depth axis.
223 edges = np.array(np.ma.notmasked_edges(reshaped, axis=0))
224 depths = edges[1, 0, :]
225 indices = edges[1, 1, :]
226
227 data = np.ma.MaskedArray(
228 np.zeros(d.shape[1:]), # copy lat lon and time axis shapes
229 mask=True,
230 dtype=d.dtype,
231 )
232 data[np.unravel_index(indices, data.shape)] = reshaped[depths, indices]
233
234 # Roll time axis back to the front
235 data = np.rollaxis(data, 2, 0)
236
237 res = self.__resample(
238 self.latvar[miny:maxy],
239 self.lonvar[minx:maxx],
240 [latitude],
241 [longitude],
242 data,
243 radius,
244 )
245
246 if return_depth:
247 depth_values = np.ma.MaskedArray(
248 np.zeros(d.shape[1:]), mask=True, dtype=self.depths.dtype
249 )
250
251 depth_values[
252 np.unravel_index(indices, depth_values.shape)
253 ] = self.depths[depths]
254
255 dep = self.__resample(
256 self.latvar[miny:maxy],
257 self.lonvar[minx:maxx],
258 latitude,
259 longitude,
260 np.reshape(depth_values, data.shape),
261 radius,
262 )
263
264 else:
265 224.2 MiB 0.0 MiB 1 if len(var.shape) == 4:
266 233.2 MiB 9.0 MiB 1 data = var[time_slice, int(depth), miny:maxy, minx:maxx]
267 else:
268 data = var[time_slice, miny:maxy, minx:maxx]
269
270 233.3 MiB 0.1 MiB 2 res = self.__resample(
271 233.2 MiB 0.0 MiB 1 self.latvar[miny:maxy],
272 233.2 MiB 0.0 MiB 1 self.lonvar[minx:maxx],
273 233.2 MiB 0.0 MiB 1 latitude,
274 233.2 MiB 0.0 MiB 1 longitude,
275 233.2 MiB 0.0 MiB 1 data.values,
276 233.2 MiB 0.0 MiB 1 radius,
277 )
278
279 233.3 MiB 0.0 MiB 1 if return_depth:
280 depth_value = self.depths[int(depth)]
281 depth_value = np.tile(depth_value, len(latitude))
282 if endtime is not None:
283 depth_value = np.array([depth_value] * time_duration)
284
285 233.3 MiB 0.0 MiB 1 if return_depth:
286 return res, depth_value
287 233.3 MiB 0.0 MiB 1 return res
Filename: /home/ubuntu/onav-cloud/Ocean-Data-Map-Project/data/model.py
Line # Mem usage Increment Occurrences Line Contents
=============================================================
111 192.1 MiB 192.1 MiB 1 @profile
112 def get_area(
113 self,
114 area,
115 depth,
116 time,
117 variable,
118 interp,
119 radius,
120 neighbours,
121 return_depth=False,
122 ):
123 192.1 MiB 0.0 MiB 1 try:
124 192.1 MiB 0.0 MiB 1 latitude = area[0, :].ravel() # do we really need this slicing `:` BS?
125 192.1 MiB 0.0 MiB 1 longitude = area[1, :].ravel()
126 except IndexError:
127 latitude = area[0].ravel()
128 longitude = area[1].ravel()
129
130 192.1 MiB 0.0 MiB 1 self.nc_data.interp = interp
131 192.1 MiB 0.0 MiB 1 self.nc_data.radius = radius
132 192.1 MiB 0.0 MiB 1 self.nc_data.neighbours = neighbours
133
134 192.1 MiB 0.0 MiB 1 if return_depth:
135 a, d = self.get_point(
136 latitude, longitude, depth, variable, time, return_depth=return_depth
137 )
138 return numpy.reshape(a, area.shape[1:]), numpy.reshape(d, area.shape[1:])
139 233.3 MiB 41.1 MiB 2 a = self.get_point(
140 192.1 MiB 0.0 MiB 1 latitude, longitude, depth, variable, time, return_depth=return_depth
141 )
142 233.3 MiB 0.0 MiB 1 return numpy.reshape(a, area.shape[1:])
Filename: /home/ubuntu/onav-cloud/Ocean-Data-Map-Project/plotting/tile.py
Line # Mem usage Increment Occurrences Line Contents
=============================================================
168 177.2 MiB 177.2 MiB 1 @profile
169 def plot(projection, x, y, z, args):
170 184.0 MiB 6.7 MiB 1 lat, lon = get_latlon_coords(projection, x, y, z)
171 184.0 MiB 0.0 MiB 1 if len(lat.shape) == 1:
172 184.8 MiB 0.8 MiB 1 lat, lon = np.meshgrid(lat, lon)
173
174 184.8 MiB 0.0 MiB 1 dataset_name = args.get("dataset")
175 184.8 MiB 0.0 MiB 1 config = DatasetConfig(dataset_name)
176 184.8 MiB 0.0 MiB 1 variable = args.get("variable")
177
178 184.8 MiB 0.0 MiB 1 variable = variable.split(",")
179
180 184.8 MiB 0.0 MiB 1 depth = args.get("depth")
181
182 184.8 MiB 0.0 MiB 1 scale = args.get("scale")
183 184.8 MiB 0.0 MiB 5 scale = [float(component) for component in scale.split(",")]
184
185 184.8 MiB 0.0 MiB 1 time = args.get("time")
186
187 184.8 MiB 0.0 MiB 1 data = []
188 191.9 MiB 7.1 MiB 1 with open_dataset(config, variable=variable, timestamp=time) as dataset:
189
190 233.3 MiB 0.0 MiB 2 for v in variable:
191 233.3 MiB 0.0 MiB 2 data.append(
192 233.3 MiB 41.1 MiB 2 dataset.get_area(
193 192.1 MiB 0.2 MiB 1 np.array([lat, lon]),
194 192.1 MiB 0.0 MiB 1 depth,
195 192.1 MiB 0.0 MiB 1 time,
196 192.1 MiB 0.0 MiB 1 v,
197 192.1 MiB 0.0 MiB 1 args.get("interp"),
198 192.1 MiB 0.0 MiB 1 args.get("radius"),
199 192.1 MiB 0.0 MiB 1 args.get("neighbours"),
200 )
201 )
202
203 233.3 MiB 0.0 MiB 1 vc = config.variable[dataset.variables[variable[0]]]
204 233.3 MiB 0.0 MiB 1 variable_name = vc.name
205 233.3 MiB 0.0 MiB 1 variable_unit = vc.unit
206 233.3 MiB 0.0 MiB 1 cmap = colormap.find_colormap(variable_name)
207
208 233.3 MiB 0.0 MiB 1 if depth != "bottom":
209 233.3 MiB 0.0 MiB 1 depthm = dataset.depths[depth]
210 else:
211 depthm = 0
212
213 233.3 MiB 0.0 MiB 1 if len(data) == 1:
214 233.3 MiB 0.0 MiB 1 data = data[0]
215
216 233.3 MiB 0.0 MiB 1 if len(data) == 2:
217 data = np.sqrt(data[0] ** 2 + data[1] ** 2)
218 cmap = colormap.colormaps.get("speed")
219
220 233.3 MiB 0.0 MiB 1 data = data.transpose()
221 233.3 MiB 0.0 MiB 1 xpx = x * 256
222 233.3 MiB 0.0 MiB 1 ypx = y * 256
223
224 # Mask out any topography if we're below the vector-tile threshold
225 233.3 MiB 0.0 MiB 1 if z < 8:
226 233.3 MiB 0.0 MiB 2 with Dataset(
227 233.3 MiB 0.0 MiB 1 current_app.config["ETOPO_FILE"] % (projection, z), "r"
228 233.3 MiB 0.0 MiB 1 ) as dataset:
229 233.3 MiB 0.0 MiB 1 bathymetry = dataset["z"][ypx : (ypx + 256), xpx : (xpx + 256)]
230
231 233.4 MiB 0.1 MiB 1 bathymetry = gaussian_filter(bathymetry, 0.5)
232
233 233.4 MiB 0.0 MiB 1 data[np.where(bathymetry > -depthm)] = np.ma.masked
234
235 233.4 MiB 0.0 MiB 2 sm = matplotlib.cm.ScalarMappable(
236 233.4 MiB 0.0 MiB 1 matplotlib.colors.Normalize(vmin=scale[0], vmax=scale[1]), cmap=cmap
237 )
238
239 233.5 MiB 0.1 MiB 1 img = sm.to_rgba(np.ma.masked_invalid(np.squeeze(data)))
240 233.5 MiB 0.0 MiB 1 im = Image.fromarray((img * 255.0).astype(np.uint8))
241
242 233.5 MiB 0.0 MiB 1 buf = BytesIO()
243 233.8 MiB 0.2 MiB 1 im.save(buf, format="PNG", optimize=True)
244 233.8 MiB 0.0 MiB 1 return buf
Filename: /home/ubuntu/onav-cloud/Ocean-Data-Map-Project/routes/api_v1_0.py
Line # Mem usage Increment Occurrences Line Contents
=============================================================
895 177.0 MiB 177.0 MiB 1 @bp_v1_0.route(
896 "/api/v1.0/tiles/<string:interp>/<int:radius>/<int:neighbours>/<string:projection>/<string:dataset>/<string:variable>/<int:time>/<string:depth>/<string:scale>/<int:zoom>/<int:x>/<int:y>.png" # noqa: E501
897 )
898 @profile
899 def tile_v1_0(
900 projection: str,
901 interp: str,
902 radius: int,
903 neighbours: int,
904 dataset: str,
905 variable: str,
906 time: int,
907 depth: str,
908 scale: str,
909 zoom: int,
910 x: int,
911 y: int,
912 ):
913 """
914 Produces the map data tiles
915 """
916
917 177.0 MiB 0.0 MiB 1 cache_dir = current_app.config["CACHE_DIR"]
918 177.0 MiB 0.0 MiB 1 f = os.path.join(cache_dir, request.path[1:])
919
920 # Check if the tile/image is cached and send it
921 177.0 MiB 0.1 MiB 1 if _is_cache_valid(dataset, f):
922 return send_file(f, mimetype="image/png", cache_timeout=MAX_CACHE)
923 # Render a new tile/image, then cache and send it
924
925 177.0 MiB 0.0 MiB 1 if depth != "bottom" and depth != "all":
926 177.0 MiB 0.0 MiB 1 depth = int(depth)
927
928 233.8 MiB 56.7 MiB 2 img = plotting.tile.plot(
929 177.0 MiB 0.0 MiB 1 projection,
930 177.0 MiB 0.0 MiB 1 x,
931 177.0 MiB 0.0 MiB 1 y,
932 177.0 MiB 0.0 MiB 1 zoom,
933 177.0 MiB 0.0 MiB 1 {
934 177.0 MiB 0.0 MiB 1 "interp": interp,
935 177.0 MiB 0.0 MiB 1 "radius": radius * 1000,
936 177.0 MiB 0.0 MiB 1 "neighbours": neighbours,
937 177.0 MiB 0.0 MiB 1 "dataset": dataset,
938 177.0 MiB 0.0 MiB 1 "variable": variable,
939 177.0 MiB 0.0 MiB 1 "time": time,
940 177.0 MiB 0.0 MiB 1 "depth": depth,
941 177.0 MiB 0.0 MiB 1 "scale": scale,
942 },
943 )
944
945 234.0 MiB 0.2 MiB 1 return _cache_and_send_img(img, f)
A random transect (produced by using gunicorn ./launch_web_service.sh)
`/api/v1.0/plot/?query=%7B%22colormap%22%3A%22default%22%2C%22dataset%22%3A%22giops_day%22%2C%22depth_limit%22%3Afalse%2C%22linearthresh%22%3A200%2C%22path%22%3A%5B%5B53.94162670955251%2C-48.65234553813935%5D%2C%5B44.5103249408252%2C-60.86914241313934%5D%5D%2C%22plotTitle%22%3A%22%22%2C%22scale%22%3A%22-5%2C30%2Cauto%22%2C%22selectedPlots%22%3A%220%2C1%2C1%22%2C%22showmap%22%3Atrue%2C%22surfacevariable%22%3A%22none%22%2C%22time%22%3A2281564800%2C%22type%22%3A%22transect%22%2C%22variable%22%3A%22votemper%22%7D&format=json` produces the following memory profile
Filename: /home/ubuntu/onav-cloud/Ocean-Data-Map-Project/data/__init__.py
Line # Mem usage Increment Occurrences Line Contents
=============================================================
9 349.0 MiB 349.0 MiB 1 @hashable_lru
10 @profile
11 def open_dataset(dataset, **kwargs):
12 """Open a dataset.
13
14 Creates a CalculatedData (derived from NetCDFData) instance to handle dataset file
15 access and calculation layer operations.
16 Then, determines the type of model the dataset is from and returns the appropriate
17 Model-derived instance (Nemo, Mercator, Fvcom) with the calculation layer instance
18 as an attribute.
19
20 Note: the returned model object will be LRU-cached internally so frequent calls
21 to open the "same" dataset will have minimal overhead.
22
23 Params:
24 * dataset -- Either a DatasetConfig object, or a string URL for the dataset
25
26 Optional Keyword Arguments:
27 * variable {str or list} -- String or list of strings of variable keys to be loaded
28 (e.g. 'votemper' or ['votemper', 'vosaline']).
29 * timestamp {int} -- Integer value of date/time for requested data (e.g. 2128723200).
30 When loading a range of timestamps, this argument serves as the starting time.
31 * endtime {int} -- Integer value of date/time. This argument is only used when
32 loading a range of timestamps, and should hold the ending time.
33 * nearest_timestamp {bool} -- When true, open_dataset will assume the given
34 starttime (and endtime) do not exactly correspond to a timestamp integer
35 in the dataset, and will perform a binary search to find the nearest timestamp
36 that is less-than-or-equal-to the given starttime (and endtime).
37 """
38 349.0 MiB 0.0 MiB 1 MODEL_CLASSES = {
39 349.0 MiB 0.0 MiB 1 "mercator": Mercator,
40 349.0 MiB 0.0 MiB 1 "nemo": Nemo,
41 349.0 MiB 0.0 MiB 1 "fvcom": Fvcom,
42 }
43
44 349.0 MiB 0.0 MiB 1 if not dataset:
45 raise ValueError("Unknown dataset.")
46
47 349.0 MiB 0.0 MiB 1 try:
48 349.0 MiB 0.0 MiB 1 url = dataset.url
49 349.0 MiB 0.0 MiB 1 calculated_vars = dataset.calculated_variables
50 except AttributeError:
51 url = dataset
52 calculated_vars = {}
53
54 349.0 MiB 0.0 MiB 1 if url is None:
55 raise ValueError("Dataset url is None.")
56
57 349.0 MiB 0.0 MiB 1 try:
58 349.0 MiB 0.0 MiB 1 model_class = MODEL_CLASSES[getattr(dataset, "model_class", "").lower()]
59 except (AttributeError, KeyError):
60 raise ValueError(
61 f"Missing or unrecongized model_class attribute in config for dataset {dataset}"
62 )
63
64 349.0 MiB 0.0 MiB 2 kwargs.update(
65 349.0 MiB 0.0 MiB 1 {
66 349.0 MiB 0.0 MiB 1 "calculated": calculated_vars,
67 349.0 MiB 0.0 MiB 1 "grid_angle_file_url": getattr(dataset, "grid_angle_file_url", ""),
68 349.0 MiB 0.0 MiB 1 "bathymetry_file_url": getattr(dataset, "bathymetry_file_url", ""),
69 349.0 MiB 0.0 MiB 1 "dataset_key": getattr(dataset, "key", ""),
70 }
71 )
72
73 349.0 MiB 0.0 MiB 1 nc_data = CalculatedData(url, **kwargs)
74 349.0 MiB 0.0 MiB 1 return model_class(nc_data)
10.5.166.1 - - [10/Apr/2022 17:10:08] "GET /api/v1.0/variables/?dataset=giops_day HTTP/1.1" 200 -
Filename: /home/ubuntu/onav-cloud/Ocean-Data-Map-Project/plotting/plotter.py
Line # Mem usage Increment Occurrences Line Contents
=============================================================
21 349.2 MiB 349.2 MiB 1 @profile
22 def __init__(self, dataset_name: str, query: str, **kwargs):
23 349.2 MiB 0.0 MiB 1 self.dataset_name: str = dataset_name
24 349.2 MiB 0.0 MiB 1 self.dataset_config: DatasetConfig = DatasetConfig(dataset_name)
25 349.2 MiB 0.0 MiB 1 self.query: dict = query
26 349.2 MiB 0.0 MiB 1 self.format: str = kwargs["format"]
27 349.2 MiB 0.0 MiB 1 self.dpi: int = int(kwargs["dpi"])
28 349.2 MiB 0.0 MiB 1 self.size: str = kwargs["size"]
29 349.2 MiB 0.0 MiB 1 self.plotTitle: str = None
30 349.2 MiB 0.0 MiB 1 self.compare: bool = False
31 349.2 MiB 0.0 MiB 1 self.data = None
32 349.2 MiB 0.0 MiB 1 self.time: int = None
33 349.2 MiB 0.0 MiB 1 self.variables = None
34 349.2 MiB 0.0 MiB 1 self.variable_names = None
35 349.2 MiB 0.0 MiB 1 self.variable_units = None
36 349.2 MiB 0.0 MiB 1 self.scale = None
37 349.2 MiB 0.0 MiB 1 self.date_formatter = None
38 # Init interpolation stuff
39 349.2 MiB 0.0 MiB 1 self.interp: str = "gaussian"
40 349.2 MiB 0.0 MiB 1 self.radius: int = 25000 # radius in meters
41 349.2 MiB 0.0 MiB 1 self.neighbours: int = 10
42 349.2 MiB 0.0 MiB 1 self.filetype, self.mime = utils.get_mimetype(kwargs["format"])
43 349.2 MiB 0.0 MiB 2 self.filename: str = utils.get_filename(
44 349.2 MiB 0.0 MiB 1 self.plottype, dataset_name, self.filetype
45 )
Filename: /home/ubuntu/onav-cloud/Ocean-Data-Map-Project/plotting/transect.py
Line # Mem usage Increment Occurrences Line Contents
=============================================================
24 349.0 MiB 349.0 MiB 1 @profile
25 def __init__(self, dataset_name: str, query: str, **kwargs):
26 349.0 MiB 0.0 MiB 1 self.plottype: str = "transect"
27 349.2 MiB 0.2 MiB 1 super(TransectPlotter, self).__init__(dataset_name, query, **kwargs)
28
29 # Holds Velocity Plot Type [magnitude, parallel, perpendicular]
30 349.2 MiB 0.0 MiB 1 self.selected_velocity_plots = None
Filename: /home/ubuntu/onav-cloud/Ocean-Data-Map-Project/plotting/plotter.py
Line # Mem usage Increment Occurrences Line Contents
=============================================================
68 349.2 MiB 349.2 MiB 1 @abstractmethod
69 @profile
70 def parse_query(self, query: dict):
71
72 349.2 MiB 0.0 MiB 1 self.date_formatter = self.__get_date_formatter(query.get("quantum"))
73
74 349.2 MiB 0.0 MiB 1 self.time = self.__get_time(query.get("time"))
75 349.2 MiB 0.0 MiB 1 self.starttime = self.__get_time(query.get("starttime"))
76 349.2 MiB 0.0 MiB 1 self.endtime = self.__get_time(query.get("endtime"))
77
78 349.2 MiB 0.0 MiB 1 if query.get("interp") is not None:
79 self.interp = query.get("interp")
80 349.2 MiB 0.0 MiB 1 if query.get("radius") is not None:
81 self.radius = query.get("radius") * 1000 # Convert to meters
82 349.2 MiB 0.0 MiB 1 if query.get("neighbours") is not None:
83 self.neighbours = query.get("neighbours")
84
85 349.2 MiB 0.0 MiB 1 self.plotTitle = query.get("plotTitle")
86
87 349.2 MiB 0.0 MiB 1 self.scale = self.__get_scale(query.get("scale"))
88
89 349.2 MiB 0.0 MiB 1 self.variables = self.__get_variables(query.get("variable"))
90
91 # Parse right-view if in compare mode
92 349.2 MiB 0.0 MiB 1 if query.get("compare_to") is not None:
93 self.compare = query.get("compare_to")
94 self.compare["variables"] = self.compare["variable"].split(",")
95
96 if self.compare.get("colormap_diff") == "default":
97 self.compare["colormap_diff"] = "anomaly"
98
99 try:
100 # Variable scale
101 self.compare["scale"] = self.__get_scale(self.compare["scale"])
102 except KeyError:
103 print("Ignoring scale attribute.")
104 try:
105 # Difference plot scale
106 self.compare["scale_diff"] = self.__get_scale(
107 self.compare["scale_diff"]
108 )
109 except KeyError:
110 print("Ignoring scale_diff attribute.")
111
112 349.2 MiB 0.0 MiB 1 self.cmap = self.__get_colormap(query.get("colormap"))
113
114 349.2 MiB 0.0 MiB 1 self.linearthresh = self.__get_linear_threshold(query.get("linearthresh"))
115
116 349.2 MiB 0.0 MiB 1 self.depth = self.__get_depth(query.get("depth"))
117
118 349.2 MiB 0.0 MiB 1 self.showmap = self.__get_showmap(query.get("showmap"))
Filename: /home/ubuntu/onav-cloud/Ocean-Data-Map-Project/plotting/line.py
Line # Mem usage Increment Occurrences Line Contents
=============================================================
9 349.2 MiB 349.2 MiB 1 @profile
10 def parse_query(self, query):
11 349.2 MiB 0.0 MiB 1 super(LinePlotter, self).parse_query(query)
12
13 349.2 MiB 0.0 MiB 1 points = query.get("path")
14 349.2 MiB 0.0 MiB 1 if points is None or len(points) == 0:
15 points = ["47 N 52.8317 W", "47 N 42 W"]
16
17 349.2 MiB 0.0 MiB 1 self.points = points
18
19 349.2 MiB 0.0 MiB 1 surface = query.get("surfacevariable")
20 349.2 MiB 0.0 MiB 1 if surface is not None and (surface == "" or surface == "none"):
21 349.2 MiB 0.0 MiB 1 surface = None
22
23 349.2 MiB 0.0 MiB 1 self.surface = surface
24
25 349.2 MiB 0.0 MiB 1 name = query.get("name")
26 349.2 MiB 0.0 MiB 1 if name is None or name == "":
27 349.2 MiB 0.0 MiB 1 p0 = geopy.Point(points[0])
28 349.2 MiB 0.0 MiB 1 p1 = geopy.Point(points[-1])
29 349.2 MiB 0.0 MiB 2 name = gettext("(%0.4f N, %0.4f W) to (%0.4f N, %0.4f W)") % (
30 349.2 MiB 0.0 MiB 1 p0.latitude,
31 349.2 MiB 0.0 MiB 1 p0.longitude,
32 349.2 MiB 0.0 MiB 1 p1.latitude,
33 349.2 MiB 0.0 MiB 1 p1.longitude,
34 )
35
36 349.2 MiB 0.0 MiB 1 self.name = name
Filename: /home/ubuntu/onav-cloud/Ocean-Data-Map-Project/data/__init__.py
Line # Mem usage Increment Occurrences Line Contents
=============================================================
9 349.2 MiB 349.2 MiB 1 @hashable_lru
10 @profile
11 def open_dataset(dataset, **kwargs):
12 """Open a dataset.
13
14 Creates a CalculatedData (derived from NetCDFData) instance to handle dataset file
15 access and calculation layer operations.
16 Then, determines the type of model the dataset is from and returns the appropriate
17 Model-derived instance (Nemo, Mercator, Fvcom) with the calculation layer instance
18 as an attribute.
19
20 Note: the returned model object will be LRU-cached internally so frequent calls
21 to open the "same" dataset will have minimal overhead.
22
23 Params:
24 * dataset -- Either a DatasetConfig object, or a string URL for the dataset
25
26 Optional Keyword Arguments:
27 * variable {str or list} -- String or list of strings of variable keys to be loaded
28 (e.g. 'votemper' or ['votemper', 'vosaline']).
29 * timestamp {int} -- Integer value of date/time for requested data (e.g. 2128723200).
30 When loading a range of timestamps, this argument serves as the starting time.
31 * endtime {int} -- Integer value of date/time. This argument is only used when
32 loading a range of timestamps, and should hold the ending time.
33 * nearest_timestamp {bool} -- When true, open_dataset will assume the given
34 starttime (and endtime) do not exactly correspond to a timestamp integer
35 in the dataset, and will perform a binary search to find the nearest timestamp
36 that is less-than-or-equal-to the given starttime (and endtime).
37 """
38 349.2 MiB 0.0 MiB 1 MODEL_CLASSES = {
39 349.2 MiB 0.0 MiB 1 "mercator": Mercator,
40 349.2 MiB 0.0 MiB 1 "nemo": Nemo,
41 349.2 MiB 0.0 MiB 1 "fvcom": Fvcom,
42 }
43
44 349.2 MiB 0.0 MiB 1 if not dataset:
45 raise ValueError("Unknown dataset.")
46
47 349.2 MiB 0.0 MiB 1 try:
48 349.2 MiB 0.0 MiB 1 url = dataset.url
49 349.2 MiB 0.0 MiB 1 calculated_vars = dataset.calculated_variables
50 except AttributeError:
51 url = dataset
52 calculated_vars = {}
53
54 349.2 MiB 0.0 MiB 1 if url is None:
55 raise ValueError("Dataset url is None.")
56
57 349.2 MiB 0.0 MiB 1 try:
58 349.2 MiB 0.0 MiB 1 model_class = MODEL_CLASSES[getattr(dataset, "model_class", "").lower()]
59 except (AttributeError, KeyError):
60 raise ValueError(
61 f"Missing or unrecongized model_class attribute in config for dataset {dataset}"
62 )
63
64 349.2 MiB 0.0 MiB 2 kwargs.update(
65 349.2 MiB 0.0 MiB 1 {
66 349.2 MiB 0.0 MiB 1 "calculated": calculated_vars,
67 349.2 MiB 0.0 MiB 1 "grid_angle_file_url": getattr(dataset, "grid_angle_file_url", ""),
68 349.2 MiB 0.0 MiB 1 "bathymetry_file_url": getattr(dataset, "bathymetry_file_url", ""),
69 349.2 MiB 0.0 MiB 1 "dataset_key": getattr(dataset, "key", ""),
70 }
71 )
72
73 349.4 MiB 0.2 MiB 1 nc_data = CalculatedData(url, **kwargs)
74 349.4 MiB 0.0 MiB 1 return model_class(nc_data)
Filename: /home/ubuntu/onav-cloud/Ocean-Data-Map-Project/data/nearest_grid_point.py
Line # Mem usage Increment Occurrences Line Contents
=============================================================
18 349.4 MiB 349.4 MiB 1 @profile
19 def find_nearest_grid_point(lat, lon, latvar, lonvar, n=1):
20 """Find the nearest grid point to a given lat/lon pair.
21
22 Parameters
23 ----------
24 lat : float
25 Latitude value at which to find the nearest grid point.
26 lon : float
27 Longitude value at which to find the nearest grid point.
28 latvar : xarray.DataArray
29 DataArray corresponding to latitude variable.
30 lonVar : xarray.DataArray
31 DataArray corresponding to longitude variable.
32 n : int, optional
33 Number of nearest grid points to return. Default is to return the
34 single closest grid point.
35
36 Returns
37 -------
38 iy, ix, dist_sq
39 A tuple of numpy arrays:
40
41 - ``iy``: the y indices of the nearest grid points
42 - ``ix``: the x indices of the nearest grid points
43 - dist_sq: squared distance
44 """
45
46 # Note the use of the squeeze method: it removes single-dimensional entries
47 # from the shape of an array. For example, in the GIOPS mesh file the
48 # longitude of the U velocity points is defined as an array with shape
49 # (1, 1, 1021, 1442). The squeeze method converts this into the equivalent
50 # array with shape (1021, 1442).
51 349.4 MiB 0.0 MiB 1 latvar = latvar.squeeze()
52 349.4 MiB 0.0 MiB 1 lonvar = lonvar.squeeze()
53
54 349.4 MiB 0.0 MiB 1 rad_factor = pi / 180.0
55 349.4 MiB 0.0 MiB 1 latvals = latvar[:] * rad_factor
56 349.4 MiB 0.0 MiB 1 lonvals = lonvar[:] * rad_factor
57 349.4 MiB 0.0 MiB 1 clat, clon = np.cos(latvals), np.cos(lonvals)
58 349.4 MiB 0.0 MiB 1 slat, slon = np.sin(latvals), np.sin(lonvals)
59 349.4 MiB 0.0 MiB 1 if latvar.ndim == 1:
60 # If latitude and longitude are 1D arrays (as is the case with the
61 # GIOPS forecast data currently pulled from datamart), then we need to
62 # handle this situation in a special manner. The clat array will be of
63 # some size m, say, and the clon array will be of size n. By virtue of
64 # being defined with different dimensions, the product of these two
65 # arrays will be of size (m, n) because xarray will automatically
66 # broadcast the arrays so that the multiplication makes sense to do.
67 # Thus, the array calculated from
68 #
69 # np.ravel(clat * clon)
70 #
71 # will be of size mn. However, the array
72 #
73 # np.ravel(slat)
74 #
75 # will be of size m and this will cause the KDTree() call to fail. To
76 # resolve this issue, we broadcast slat to the appropriate size and
77 # shape.
78 349.4 MiB 0.0 MiB 1 shape = (slat.size, slon.size)
79 349.4 MiB 0.0 MiB 1 slat = np.broadcast_to(slat.values[:, np.newaxis], shape)
80 else:
81 shape = latvar.shape
82 349.4 MiB 0.0 MiB 2 triples = np.array(
83 349.4 MiB 0.0 MiB 1 [np.ravel(clat * clon), np.ravel(clat * slon), np.ravel(slat)]
84 ).transpose()
85
86 349.4 MiB 0.0 MiB 1 kdt = KDTree(triples)
87 349.4 MiB 0.0 MiB 1 dist_sq, iy, ix = _find_index(lat, lon, kdt, shape, n)
88 # The results returned from _find_index are two-dimensional arrays (if
89 # n > 1) because it can handle the case of finding indices closest to
90 # multiple lat/lon locations (i.e., where lat and lon are arrays, not
91 # scalars). Currently, this function is intended only for a single lat/lon,
92 # so we redefine the results as one-dimensional arrays.
93 349.4 MiB 0.0 MiB 1 if n > 1:
94 349.4 MiB 0.0 MiB 1 return iy, ix, dist_sq
95 else:
96 return int(iy), int(ix), dist_sq
Filename: /home/ubuntu/onav-cloud/Ocean-Data-Map-Project/data/mercator.py
Line # Mem usage Increment Occurrences Line Contents
=============================================================
85 349.4 MiB 349.4 MiB 1 @profile
86 def __resample(self, lat_in, lon_in, lat_out, lon_out, var, radius=50000):
87 349.4 MiB 0.0 MiB 1 var = np.squeeze(var)
88
89 349.4 MiB 0.0 MiB 1 origshape = var.shape
90
91 349.4 MiB 0.0 MiB 2 data, masked_lat_in, masked_lon_in, output_def = super()._make_resample_data(
92 349.4 MiB 0.0 MiB 1 lat_in, lon_in, lat_out, lon_out, var
93 )
94
95 349.4 MiB 0.0 MiB 1 if len(data.shape) == 3:
96 349.4 MiB 0.0 MiB 1 output = []
97 # multiple depths
98 349.4 MiB 0.0 MiB 51 for d in range(0, data.shape[2]):
99 349.4 MiB 0.0 MiB 50 grid_lat, grid_lon = np.meshgrid(masked_lat_in, masked_lon_in)
100 349.4 MiB 0.0 MiB 50 grid_lat.mask = grid_lon.mask = (
101 349.4 MiB 0.0 MiB 50 data[:, :, d].view(np.ma.MaskedArray).mask.transpose()
102 )
103 349.4 MiB 0.0 MiB 100 input_def = pyresample.geometry.SwathDefinition(
104 349.4 MiB 0.0 MiB 50 lons=grid_lon, lats=grid_lat
105 )
106
107 349.4 MiB 0.0 MiB 100 output.append(
108 349.4 MiB 0.0 MiB 100 self.nc_data.interpolate(
109 349.4 MiB 0.0 MiB 50 input_def, output_def, data[:, :, d].transpose()
110 )
111 )
112
113 349.4 MiB 0.0 MiB 1 output = np.ma.array(output).transpose()
114 else:
115 grid_lat, grid_lon = np.meshgrid(masked_lat_in, masked_lon_in)
116 grid_lat.mask = grid_lon.mask = data.view(
117 np.ma.MaskedArray
118 ).mask.transpose()
119
120 input_def = pyresample.geometry.SwathDefinition(
121 lons=grid_lon, lats=grid_lat
122 )
123
124 output = self.nc_data.interpolate(input_def, output_def, data.transpose())
125
126 349.4 MiB 0.0 MiB 1 if len(origshape) == 4:
127 # un-collapse time and depth axes and
128 # move axes back to original positions.
129 output = np.rollaxis(output, -1).reshape(
130 (
131 origshape[0], # time
132 origshape[1], # depth
133 output.shape[0], # lat
134 output.shape[1], # lon
135 )
136 )
137
138 349.4 MiB 0.0 MiB 1 return np.squeeze(output)
Filename: /home/ubuntu/onav-cloud/Ocean-Data-Map-Project/plotting/transect.py
Line # Mem usage Increment Occurrences Line Contents
=============================================================
57 349.2 MiB 349.2 MiB 1 @profile
58 def load_data(self):
59 349.2 MiB 0.0 MiB 1 vars_to_load = self.variables
60 349.2 MiB 0.0 MiB 1 if self.surface:
61 vars_to_load.append(self.surface)
62
63 349.4 MiB 0.2 MiB 2 with open_dataset(
64 349.2 MiB 0.0 MiB 1 self.dataset_config, timestamp=self.time, variable=vars_to_load
65 349.4 MiB 0.0 MiB 1 ) as dataset:
66
67 349.4 MiB 0.0 MiB 2 for idx, v in enumerate(self.variables):
68 349.4 MiB 0.0 MiB 1 var = dataset.variables[v]
69 349.4 MiB 0.0 MiB 1 if not (set(var.dimensions) & set(dataset.nc_data.depth_dimensions)):
70 for potential in dataset.variables:
71 if potential in self.variables:
72 continue
73 pot = dataset.variables[potential]
74 if set(pot.dimensions) & set(dataset.nc_data.depth_dimensions):
75 if len(pot.dimensions) > 3:
76 self.variables[idx] = potential.key
77
78 349.4 MiB 0.0 MiB 1 value = parallel = perpendicular = magnitude = None
79
80 349.4 MiB 0.0 MiB 1 variable_names = self.get_variable_names(dataset, self.variables)
81 349.4 MiB 0.0 MiB 1 variable_units = self.get_variable_units(dataset, self.variables)
82
83 # Load data sent from primary/Left Map
84 349.4 MiB 0.0 MiB 1 if len(self.variables) > 1:
85 # Only velocity has 2 variables
86 v = []
87 for name in self.variables:
88 v.append(dataset.variables[name])
89
90 distances, times, lat, lon, bearings = geo.path_to_points(
91 self.points, 100
92 )
93 # Calculate vector components
94 transect_pts, distance, x, dep = dataset.get_path_profile(
95 self.points, self.variables[0], self.time, numpoints=100
96 )
97 transect_pts, distance, y, dep = dataset.get_path_profile(
98 self.points, self.variables[1], self.time, numpoints=100
99 )
100
101 r = np.radians(np.subtract(90, bearings))
102 theta = np.arctan2(y, x) - r
103 magnitude = np.sqrt(x**2 + y**2)
104
105 parallel = magnitude * np.cos(theta)
106 perpendicular = magnitude * np.sin(theta)
107
108 else:
109 # Get data for one variable
110 349.4 MiB 0.0 MiB 2 transect_pts, distance, value, dep = dataset.get_path_profile(
111 349.4 MiB 0.0 MiB 1 self.points, self.variables[0], self.time
112 )
113
114 349.4 MiB 0.0 MiB 1 if len(self.variables) == 2:
115 variable_names = [
116 self.get_vector_variable_name(dataset, self.variables)
117 ]
118 variable_units = [
119 self.get_vector_variable_unit(dataset, self.variables)
120 ]
121
122 # If a colourmap has not been manually specified by the
123 # Navigator...
124 349.4 MiB 0.0 MiB 1 if self.cmap is None:
125 349.4 MiB 0.0 MiB 1 self.cmap = colormap.find_colormap(variable_names[0])
126
127 349.4 MiB 0.0 MiB 1 self.iso_timestamp = dataset.nc_data.timestamp_to_iso_8601(self.time)
128
129 349.4 MiB 0.0 MiB 1 self.depth = dep
130 349.4 MiB 0.0 MiB 1 self.depth_unit = "m"
131
132 349.4 MiB 0.0 MiB 1 self.transect_data = {
133 349.4 MiB 0.0 MiB 1 "points": transect_pts,
134 349.4 MiB 0.0 MiB 1 "distance": distance,
135 349.4 MiB 0.0 MiB 1 "data": value,
136 349.4 MiB 0.0 MiB 1 "name": variable_names[0],
137 349.4 MiB 0.0 MiB 1 "unit": variable_units[0],
138 349.4 MiB 0.0 MiB 1 "parallel": parallel,
139 349.4 MiB 0.0 MiB 1 "perpendicular": perpendicular,
140 349.4 MiB 0.0 MiB 1 "magnitude": magnitude,
141 }
142
143 349.4 MiB 0.0 MiB 1 if self.surface:
144 surface_pts, surface_dist, _, surface_value = dataset.get_path(
145 self.points, 0, self.surface, self.time
146 )
147 vc = self.dataset_config.variable[dataset.variables[self.surface]]
148 surface_unit = vc.unit
149 surface_name = vc.name
150 surface_value = np.multiply(surface_value, surface_factor)
151
152 self.surface_data = {
153 "config": vc,
154 "points": surface_pts,
155 "distance": surface_dist,
156 "data": surface_value,
157 "name": surface_name,
158 "unit": surface_unit,
159 }
160
161 # Load data sent from Right Map (if in compare mode)
162 349.4 MiB 0.0 MiB 1 if self.compare:
163
164 def interpolate_depths(data, depth_in, depth_out):
165 output = []
166 for i in range(0, depth_in.shape[0]):
167 f = interp1d(
168 depth_in[i],
169 data[:, i],
170 bounds_error=False,
171 assume_sorted=True,
172 )
173 output.append(f(depth_out[i].view(np.ma.MaskedArray).filled()))
174
175 return np.ma.masked_invalid(output).transpose()
176
177 self.compare_config = DatasetConfig(self.compare["dataset"])
178 self.compare["time"] = int(self.compare["time"])
179 with open_dataset(
180 self.compare_config,
181 timestamp=self.compare["time"],
182 variable=self.compare["variables"],
183 ) as dataset:
184 self.compare["iso_timestamp"] = dataset.nc_data.timestamp_to_iso_8601(
185 self.compare["time"]
186 )
187
188 # 1 variable
189 if len(self.compare["variables"]) == 1:
190
191 # Get and store the "nicely formatted" string for the variable name
192 self.compare["name"] = self.get_variable_names(
193 dataset, self.compare["variables"]
194 )[0]
195
196 # Find correct colourmap
197 if self.compare["colormap"] == "default":
198 self.compare["colormap"] = colormap.find_colormap(
199 self.compare["name"]
200 )
201 else:
202 self.compare["colormap"] = colormap.find_colormap(
203 self.compare["colormap"]
204 )
205
206 (
207 climate_points,
208 climate_distance,
209 climate_data,
210 cdep,
211 ) = dataset.get_path_profile(
212 self.points, self.compare["variables"][0], self.compare["time"]
213 )
214
215 self.compare["unit"] = dataset.variables[
216 self.compare["variables"][0]
217 ].unit
218 self.__fill_invalid_shift(climate_data)
219
220 if (self.depth.shape != cdep.shape) or (self.depth != cdep).any():
221 # Need to interpolate the depths
222 climate_data = interpolate_depths(
223 climate_data, cdep, self.depth
224 )
225
226 if self.transect_data["data"] is None:
227 self.transect_data["magnitude"] -= climate_data
228 self.transect_data["parallel"] -= climate_data
229 self.transect_data["perpendicular"] -= climate_data
230 else:
231 self.transect_data["compare_data"] = climate_data
232
233 # Velocity variables
234 else:
235 # Get and store the "nicely formatted" string for the variable name
236 self.compare["name"] = self.get_vector_variable_name(
237 dataset, self.compare["variables"]
238 )
239
240 (
241 climate_pts,
242 climate_distance,
243 climate_x,
244 cdep,
245 ) = dataset.get_path_profile(
246 self.points,
247 self.compare["variables"][0],
248 self.compare["time"],
249 numpoints=100,
250 )
251 (
252 climate_pts,
253 climate_distance,
254 climate_y,
255 cdep,
256 ) = dataset.get_path_profile(
257 self.points,
258 self.compare["variables"][0],
259 self.compare["time"],
260 numpoints=100,
261 )
262
263 (
264 climate_distances,
265 ctimes,
266 clat,
267 clon,
268 bearings,
269 ) = geo.path_to_points(self.points, 100)
270
271 r = np.radians(np.subtract(90, bearings))
272 theta = np.arctan2(climate_y, climate_x) - r
273 mag = np.sqrt(climate_x**2 + climate_y**2)
274
275 if np.all(self.depth != cdep):
276 theta = interpolate_depths(theta, cdep, self.depth)
277 self.__fill_invalid_shift(theta)
278 mag = interpolate_depths(mag, cdep, self.depth)
279 self.__fill_invalid_shift(mag)
280
281 self.compare["parallel"] = mag * np.cos(theta)
282 self.compare["perpendicular"] = mag * np.sin(theta)
283
284 """
285 if self.transect_data['parallel'] is None:
286 self.transect_data['data'] -= mag
287 else:
288 self.transect_data['parallel'] -= climate_parallel
289 self.transect_data['perpendicular'] -= climate_perpendicular
290 """
291
292 # Bathymetry
293 349.4 MiB 0.0 MiB 1 with Dataset(current_app.config["BATHYMETRY_FILE"], "r") as dataset:
294 350.4 MiB 1.1 MiB 2 bath_x, bath_y = bathymetry(
295 349.4 MiB 0.0 MiB 1 dataset.variables["y"],
296 349.4 MiB 0.0 MiB 1 dataset.variables["x"],
297 349.4 MiB 0.0 MiB 1 dataset.variables["z"],
298 349.4 MiB 0.0 MiB 1 self.points,
299 )
300
301 350.4 MiB 0.0 MiB 1 self.bathymetry = {"x": bath_x, "y": bath_y}
Filename: /home/ubuntu/onav-cloud/Ocean-Data-Map-Project/plotting/plotter.py
Line # Mem usage Increment Occurrences Line Contents
=============================================================
47 349.2 MiB 349.2 MiB 1 @profile
48 def prepare_plot(self):
49 # Extract requested data
50 349.2 MiB 0.0 MiB 1 self.parse_query(self.query)
51 350.4 MiB 1.3 MiB 1 self.load_data()
52
53 350.4 MiB 0.0 MiB 1 return self.data
Filename: /home/ubuntu/onav-cloud/Ocean-Data-Map-Project/plotting/transect.py
Line # Mem usage Increment Occurrences Line Contents
=============================================================
515 350.4 MiB 350.4 MiB 1 @profile
516 def plot(self):
517
518 350.5 MiB 0.1 MiB 1 gs, fig, velocity = self.__create_plot_grid()
519
520 # Plot the transect on a map
521 350.5 MiB 0.0 MiB 1 if self.showmap:
522 350.5 MiB 0.0 MiB 1 plt.subplot(gs[0, 0])
523 367.6 MiB 17.1 MiB 1 utils.path_plot(self.transect_data["points"])
524
525 369.9 MiB 0.0 MiB 2 def do_plot(
526 subplots, map_subplot, data, name, cmapLabel, vmin, vmax, units, cmap
527 ):
528 """
529 Args:
530 subplots: a GridSpec object (gs)
531 map_subplot: Row number (Note: don't use consecutive rows to allow for expanding figure height)
532 data: Data to be plotted
533 name: subplot title
534 cmapLabel: label for colourmap legend
535 vmin: minimum value for a variable (grabbed from the lowest value of some data)
536 vmax: maxmimum value for a variable (grabbed from the highest value of some data)onstrate a networked Ope
537 units: units for variable (PSU, Celsius, etc)
538 cmap: colormap for variable
539 """
540
541 369.9 MiB 0.0 MiB 1 plt.subplot(subplots[map_subplot[0], map_subplot[1]])
542
543 370.2 MiB 0.3 MiB 2 divider = self._transect_plot(
544 369.9 MiB 0.0 MiB 1 data, self.depth, name, vmin, vmax, cmapLabel, units, cmap
545 )
546
547 370.2 MiB 0.0 MiB 1 if self.surface:
548 self.__add_surface_plot(divider)
549
550 367.6 MiB 0.0 MiB 1 def find_minmax(scale, data):
551 """
552 Finds and returns the correct min/max values for the variable scale
553 Args:
554 scale: scale for the left or Right Map (self.scale or self.compare['scale])
555 data: transect_data
556 Returns:
557 (min, max)
558 """
559 if scale:
560 return (scale[0], scale[1])
561 else:
562 return (np.amin(data), np.amax(data))
563
564 # Creates and places the plots
565 367.6 MiB 0.0 MiB 1 def velocity_plot():
566
567 Row = 0
568 if self.showmap:
569 Col = 1
570 else:
571 Col = 0
572
573 if self.selected_velocity_plots[0] == 1:
574 do_plot(
575 gs,
576 [Row, Col],
577 self.transect_data["magnitude"],
578 gettext("Magnitude")
579 + gettext(" for ")
580 + self.date_formatter(self.iso_timestamp),
581 gettext("Magnitude"),
582 vmin,
583 vmax,
584 self.transect_data["unit"],
585 self.cmap,
586 )
587 Row += 1
588 if self.selected_velocity_plots[1] == 1:
589 do_plot(
590 gs,
591 [Row, Col],
592 self.transect_data["parallel"],
593 self.transect_data["name"]
594 + " ("
595 + gettext("Parallel")
596 + ")"
597 + gettext(" for ")
598 + self.date_formatter(self.iso_timestamp),
599 gettext("Parallel"),
600 vmin,
601 vmax,
602 self.transect_data["unit"],
603 self.cmap,
604 )
605 Row += 1
606 if self.selected_velocity_plots[2] == 1:
607
608 do_plot(
609 gs,
610 [Row, Col],
611 self.transect_data["perpendicular"],
612 self.transect_data["name"]
613 + " ("
614 + gettext("Perpendicular")
615 + ")"
616 + gettext(" for ")
617 + self.date_formatter(self.iso_timestamp),
618 gettext("Perpendicular"),
619 vmin,
620 vmax,
621 self.transect_data["unit"],
622 self.cmap,
623 )
624
625 # Plot Transects
626 # If in compare mode
627
628 367.6 MiB 0.0 MiB 1 if self.compare:
629 # Velocity has 2 components
630 if velocity:
631 if self.scale:
632 vmin = self.scale[0]
633 vmax = self.scale[1]
634 else:
635 vmin = min(
636 np.amin(self.transect_data["parallel"]),
637 np.amin(self.transect_data["perpendicular"]),
638 )
639 vmax = max(
640 np.amax(self.transect_data["parallel"]),
641 np.amin(self.transect_data["perpendicular"]),
642 )
643 vmin = min(vmin, -vmax)
644 vmax = max(vmax, -vmin)
645
646 # Get colormap for variable
647 if self.showmap:
648 Col = 1
649 else:
650 Col = 0
651
652 do_plot(
653 gs,
654 [0, Col],
655 self.transect_data["parallel"],
656 self.transect_data["name"]
657 + " ("
658 + gettext("Parallel")
659 + ")"
660 + gettext(" for ")
661 + self.date_formatter(self.iso_timestamp),
662 gettext("Parallel"),
663 vmin,
664 vmax,
665 self.transect_data["unit"],
666 self.cmap,
667 )
668 Col += 1
669 do_plot(
670 gs,
671 [0, Col],
672 self.transect_data["perpendicular"],
673 self.transect_data["name"]
674 + " ("
675 + gettext("Perpendicular")
676 + ")"
677 + gettext(" for ")
678 + self.date_formatter(self.iso_timestamp),
679 gettext("Perpendicular"),
680 vmin,
681 vmax,
682 self.transect_data["unit"],
683 self.cmap,
684 )
685
686 if len(self.compare["variables"]) == 2:
687 if self.compare["scale"]:
688 vmin = self.compare["scale"][0]
689 vmax = self.compare["scale"][1]
690 else:
691 vmin = min(
692 np.amin(self.compare["parallel"]),
693 np.amin(self.compare["perpendicular"]),
694 )
695 vmax = max(
696 np.amax(self.compare["parallel"]),
697 np.amin(self.compare["perpendicular"]),
698 )
699 vmin = min(vmin, -vmax)
700 vmax = max(vmax, -vmin)
701
702 # Get colormap for variable
703 cmap = colormap.find_colormap(self.compare["colormap"])
704 if self.showmap:
705 Col = 1
706 else:
707 Col = 0
708 do_plot(
709 gs,
710 [1, Col],
711 self.compare["parallel"],
712 self.transect_data["name"]
713 + " ("
714 + gettext("Parallel")
715 + ")"
716 + gettext(" for ")
717 + self.date_formatter(self.compare["iso_timestamp"]),
718 gettext("Parallel"),
719 vmin,
720 vmax,
721 self.transect_data["unit"],
722 cmap,
723 )
724 Col += 1
725 do_plot(
726 gs,
727 [1, Col],
728 self.compare["perpendicular"],
729 self.transect_data["name"]
730 + " ("
731 + gettext("Perpendicular")
732 + ")"
733 + gettext(" for ")
734 + self.date_formatter(self.compare["iso_timestamp"]),
735 gettext("Perpendicular"),
736 vmin,
737 vmax,
738 self.transect_data["unit"],
739 cmap,
740 )
741
742 else:
743 vmin, vmax = utils.normalize_scale(
744 self.transect_data["data"],
745 self.dataset_config.variable[self.variables[0]],
746 )
747
748 # Render primary/Left Map
749 if self.showmap:
750 Col = 1
751 else:
752 Col = 0
753
754 do_plot(
755 gs,
756 [0, Col],
757 self.transect_data["data"],
758 self.transect_data["name"]
759 + gettext(" for ")
760 + self.date_formatter(self.iso_timestamp),
761 self.transect_data["name"],
762 vmin,
763 vmax,
764 self.transect_data["unit"],
765 self.cmap,
766 )
767
768 # Render Right Map
769 vmin, vmax = utils.normalize_scale(
770 self.transect_data["compare_data"],
771 self.compare_config.variable[",".join(self.compare["variables"])],
772 )
773 if self.showmap:
774 Col = 1
775 else:
776 Col = 0
777
778 do_plot(
779 gs,
780 [1, Col],
781 self.transect_data["compare_data"],
782 self.compare["name"]
783 + gettext(" for ")
784 + self.date_formatter(self.compare["iso_timestamp"]),
785 self.compare["name"],
786 vmin,
787 vmax,
788 self.compare["unit"],
789 self.compare["colormap"],
790 )
791
792 # Show a difference plot if both variables and datasets are the same
793 if self.variables[0] == self.compare["variables"][0]:
794 self.transect_data["difference"] = (
795 self.transect_data["data"] - self.transect_data["compare_data"]
796 )
797 # Calculate variable range
798 if self.compare["scale_diff"] is not None:
799 vmin = self.compare["scale_diff"][0]
800 vmax = self.compare["scale_diff"][1]
801 else:
802 vmin, vmax = find_minmax(
803 self.compare["scale_diff"], self.transect_data["difference"]
804 )
805 vmin = min(vmin, -vmax)
806 vmax = max(vmax, -vmin)
807 if self.showmap:
808 Col = 1
809 else:
810 Col = 0
811 do_plot(
812 gs,
813 [2, Col],
814 self.transect_data["difference"],
815 self.transect_data["name"] + gettext(" Difference"),
816 self.transect_data["name"],
817 vmin,
818 vmax,
819 # Since both variables are the same doesn't matter which view we reference
820 self.transect_data["unit"],
821 # Colormap for difference graphs
822 colormap.find_colormap(self.compare["colormap_diff"]),
823 )
824
825 # Not comparing
826 else:
827 # Velocity has 3 possible components
828 367.6 MiB 0.0 MiB 1 if velocity:
829 if self.scale:
830 vmin = self.scale[0]
831 vmax = self.scale[1]
832 else:
833 vmin = min(
834 np.amin(self.transect_data["magnitude"]),
835 np.amin(self.transect_data["parallel"]),
836 np.amin(self.transect_data["perpendicular"]),
837 )
838 vmax = max(
839 np.amax(self.transect_data["magnitude"]),
840 np.amax(self.transect_data["parallel"]),
841 np.amin(self.transect_data["perpendicular"]),
842 )
843 vmin = min(vmin, -vmax)
844 vmax = max(vmax, -vmin)
845
846 Row = 0
847
848 velocity_plot()
849
850 # All other variables have 1 component
851 else:
852 367.6 MiB 0.0 MiB 1 if self.showmap:
853 367.6 MiB 0.0 MiB 1 Col = 1
854 else:
855 Col = 0
856 367.6 MiB 0.0 MiB 1 if self.scale:
857 vmin = self.scale[0]
858 vmax = self.scale[1]
859 else:
860 367.6 MiB 0.0 MiB 2 vmin, vmax = utils.normalize_scale(
861 367.6 MiB 0.0 MiB 1 self.transect_data["data"],
862 367.6 MiB 0.0 MiB 1 self.dataset_config.variable[self.variables[0]],
863 )
864
865 370.2 MiB 0.0 MiB 2 do_plot(
866 367.6 MiB 0.0 MiB 1 gs,
867 367.6 MiB 0.0 MiB 1 [0, Col],
868 367.6 MiB 0.0 MiB 1 self.transect_data["data"],
869 369.9 MiB 0.0 MiB 3 self.transect_data["name"]
870 367.6 MiB 0.0 MiB 1 + " for "
871 369.9 MiB 2.2 MiB 1 + self.date_formatter(self.iso_timestamp),
872 369.9 MiB 0.0 MiB 1 self.transect_data["name"],
873 369.9 MiB 0.0 MiB 1 vmin,
874 369.9 MiB 0.0 MiB 1 vmax,
875 369.9 MiB 0.0 MiB 1 self.transect_data["unit"],
876 369.9 MiB 0.0 MiB 1 self.cmap,
877 )
878
879 # Figure title
880 370.2 MiB 0.0 MiB 1 if self.plotTitle is None or self.plotTitle == "":
881 370.2 MiB 0.0 MiB 1 fig.suptitle("Transect Data for:\n%s" % (self.name), fontsize=15)
882 else:
883 fig.suptitle(self.plotTitle, fontsize=15)
884
885 # Subplot padding
886 370.3 MiB 0.1 MiB 1 fig.tight_layout(pad=2, w_pad=2, h_pad=2)
887 370.3 MiB 0.0 MiB 1 fig.subplots_adjust(top=0.90 if self.compare else 0.85)
888
889 370.5 MiB 0.2 MiB 1 return super(TransectPlotter, self).plot(fig)
Filename: /home/ubuntu/onav-cloud/Ocean-Data-Map-Project/routes/api_v1_0.py
Line # Mem usage Increment Occurrences Line Contents
=============================================================
581 349.0 MiB 349.0 MiB 1 @bp_v1_0.route("/api/v1.0/plot/", methods=["GET", "POST"])
582 @profile
583 def plot_v1_0():
584 """
585 API Format: /api/v1.0/plot/?query='...'&format
586
587 query = {
588 dataset : Dataset to extract data
589 names :
590 plottitle : Title of Plot (Default if blank)
591 showmap : Include a map of the plots location on the map
592 station : Coordinates of the point/line/area/etc
593 time : Time retrieved data was gathered/modeled
594 type : File / Plot Type (Check Navigator for Possible options)
595 variable : Variable key (e.g. votemper)
596 }
597 **Query must be written in JSON and converted to encodedURI**
598 **Not all components of query are required
599 """
600
601 349.0 MiB 0.0 MiB 1 if request.method == "GET":
602 349.0 MiB 0.0 MiB 1 args = request.args
603 else:
604 args = request.form
605
606 349.0 MiB 0.0 MiB 1 if "query" not in args:
607 raise APIError("Please provide a query.")
608
609 349.0 MiB 0.0 MiB 1 query = json.loads(args.get("query"))
610
611 349.0 MiB 0.0 MiB 1 fmt = args.get("format")
612 349.0 MiB 0.0 MiB 1 if fmt == "json":
613
614 370.5 MiB 0.0 MiB 2 def make_response(data, mime):
615 370.7 MiB 0.2 MiB 1 b64 = base64.encodebytes(data).decode()
616
617 370.7 MiB 0.0 MiB 2 return Response(
618 370.7 MiB 0.0 MiB 1 json.dumps("data:%s;base64,%s" % (mime, b64)),
619 370.7 MiB 0.0 MiB 1 status=200,
620 370.7 MiB 0.0 MiB 1 mimetype="application/json",
621 )
622
623 else:
624
625 def make_response(data, mime):
626 return Response(data, status=200, mimetype=mime)
627
628 349.0 MiB 0.0 MiB 1 dataset = query.get("dataset")
629 349.0 MiB 0.0 MiB 1 plottype = query.get("type")
630
631 349.0 MiB 0.0 MiB 1 options = {
632 349.0 MiB 0.0 MiB 1 "format": fmt,
633 349.0 MiB 0.0 MiB 1 "size": args.get("size", "15x9"),
634 349.0 MiB 0.0 MiB 1 "dpi": args.get("dpi", 72),
635 }
636
637 # Determine which plotter we need.
638 349.0 MiB 0.0 MiB 1 if plottype == "map":
639 plotter = MapPlotter(dataset, query, **options)
640 349.0 MiB 0.0 MiB 1 elif plottype == "transect":
641 349.2 MiB 0.2 MiB 1 plotter = TransectPlotter(dataset, query, **options)
642 elif plottype == "timeseries":
643 plotter = TimeseriesPlotter(dataset, query, **options)
644 elif plottype == "ts":
645 plotter = TemperatureSalinityPlotter(dataset, query, **options)
646 elif plottype == "sound":
647 plotter = SoundSpeedPlotter(dataset, query, **options)
648 elif plottype == "profile":
649 plotter = ProfilePlotter(dataset, query, **options)
650 elif plottype == "hovmoller":
651 plotter = HovmollerPlotter(dataset, query, **options)
652 elif plottype == "observation":
653 plotter = ObservationPlotter(dataset, query, **options)
654 elif plottype == "track":
655 plotter = TrackPlotter(dataset, query, **options)
656 elif plottype == "class4":
657 plotter = Class4Plotter(dataset, query, **options)
658 elif plottype == "stick":
659 plotter = StickPlotter(dataset, query, **options)
660 else:
661 raise APIError("You Have Not Selected a Plot Type - Please Review your Query")
662
663 349.2 MiB 0.0 MiB 1 if "data" in request.args:
664 data = plotter.prepare_plot()
665 return data
666
667 370.5 MiB 21.3 MiB 1 img, mime, filename = plotter.run()
668
669 370.5 MiB 0.0 MiB 1 if img:
670 370.7 MiB 0.0 MiB 1 response = make_response(img, mime)
671 else:
672 raise FAILURE
673
674 370.7 MiB 0.0 MiB 1 if "save" in args:
675 response.headers["Content-Disposition"] = 'attachment; filename="%s"' % filename
676
677 370.7 MiB 0.0 MiB 1 response.cache_control.max_age = 300
678
679 370.7 MiB 0.0 MiB 1 if "data" in args:
680 plotData = {
681 "data": str(resp), # noqa: F821
682 "shape": resp.shape, # noqa: F821
683 "mask": str(resp.mask), # noqa: F821
684 }
685 plotData = json.dumps(plotData)
686 return Response(plotData, status=200, mimetype="application/json")
687
688 370.7 MiB 0.0 MiB 1 return response
Observations:
- Base memory of a flask worker is 177MiB (~186MB). So minimum memory consumption of the server is
186MB * nproc * WORKER_THREADS. On my laptop that's186 * 8 * 1 = 1488MB. - For tiles, the memory usage is okay except for the
find_nearest_grid_pointfunction which allocates a whopping 31.8MiB (~33.3MB) for every tiling request. For comparison, the actual netCDF data loaded from disk to render one tile only occupies 9MiB (~9.4MB). All of this memory should be reclaimed by the garbage collector once the request returns back to the browser so this isn't leaking memory.
Running a fresh single-threaded server via mprof run -C -M runserver.py to capture all child process and forks, and navigating to localhost to let a bunch of tiles plot. Note that the base memory usage of a flask worker when running runserver.py is higher than via gunicorn due to debug stuff. Note the y-axis is denoted in MiB which is < MB.

Using gUnicorn and mprof run -C -M launch-web-service.sh to capture all child process and forks. Note the y-axis is denoted in MiB which is < MB.

Both of these graphs show memory usage trending higher. I think we need to track this data over a longer period of time on production. I wonder if there's data being left around in memory with some weak refs that the garbage collector isn't able to get rid of.
In order to get a better view of system metrics over time, we need to set up our LXD containers to export data to Prometheus, which will be graphed by Grafana.
- https://prometheus.io/docs/visualization/grafana/
The Grafana dashboard should be accessible on the navigator.oceansdata domain but perhaps behind a username/password combo for safety.
In addition to tracking the LXD metrics, our Flask app must be configured to expose data for Prometheus to scrape at defined intervals.
It would also be incredibly helpful if s3fs wasn't so freaking slow....waiting 5 minutes for 1 tile to show is ridiculous.
Deleting the @hashable_lru decorator from open_dataset results in the following graph. Memory is still increasing but at a much slower rate.

That's quite the improvement! Another good reason to drop the lru decorator. Any idea what that spike at ~400s might be?
Matplotlib I believe
With the removal of the hashable_lru decorator, here is what memory consumption is looking like:

so we're still seeing some climbing as we draw more pictures.
Forced an exception to be raised so I could access the python debugger and here's a dump of the memory summary at 2.3GB RSS. All memory numbers below are in bytes
>>> from operator import itemgetter
>>> from pympler import tracker
>>> mem = tracker.SummaryTracker()
>>> print(sorted(mem.create_summary(), reverse=True, key=itemgetter(2))[:10])
[['str', 192315, 33964177], ['dict', 69995, 23186920], ['numpy.ndarray', 14900, 12189853], ['code', 51652, 9329786], ['numpy.ma.core.MaskedArray', 25, 6407440], ['type', 6551, 6392072], ['list', 42972, 4313624], ['tuple', 51193, 3094440], ['set', 5387, 1953352], ['pint.util.udict', 3643, 903464]]
>>> import pandas as pd
>>> memory = pd.DataFrame(mem.create_summary(), columns=['object', 'number_of_objects', 'memory'])
>>> memory['mem_per_object'] = memory['memory'] / memory['number_of_objects']
>>> print(memory.sort_values('mem_per_object', ascending=False).head(50))
object number_of_objects memory mem_per_object
7282 numpy.ma.core.MaskedArray 25 6407440 256297.600000
13337 matplotlib.colors._ColorMapping 1 36976 36976.000000
5773 pytz.lazy.LazySet.__new__.<locals>.LazySet 2 65984 32992.000000
307 _io.BufferedReader 7 167576 23939.428571
14405 matplotlib.cbook.maxdict 1 4712 4712.000000
5766 pytz.lazy.LazyList.__new__.<locals>.LazyList 2 9232 4616.000000
7194 numpy.random._mt19937.MT19937 2 5248 2624.000000
1082 random.Random 3 7656 2552.000000
4851 random.SystemRandom 1 2552 2552.000000
25761 flask.config.Config 1 2288 2288.000000
13660 matplotlib.RcParams 31 56176 1812.129032
2694 collections.defaultdict 62 94408 1522.709677
4013 urllib3.util.retry._RetryMeta 1 1472 1472.000000
18870 pint.registry.RegistryMeta 5 6264 1252.800000
25918 marshmallow.schema.SchemaMeta 6 7200 1200.000000
12910 matplotlib.docstring._ArtistKwdocLoader 1 1192 1192.000000
309 _io.BufferedWriter 2 2384 1192.000000
7491 pandas._libs.tslibs.strptime.TimeRE 1 1192 1192.000000
6084 numpy.core.numerictypes._typedict 5 5960 1192.000000
298 abc.ABCMeta 459 507496 1105.655773
23149 sqlalchemy.sql.visitors.TraversibleType 228 245680 1077.543860
1190 ast._ABC 5 5320 1064.000000
2818 typing_extensions._TypedDictMeta 1 1064 1064.000000
7415 dateutil.tz._factories._TzStrFactory 1 1064 1064.000000
7412 dateutil.tz._factories._TzSingleton 1 1064 1064.000000
3048 jinja2.nodes.NodeType 69 73416 1064.000000
7606 pandas.core.dtypes.generic.ABCBase 20 21280 1064.000000
23440 sqlalchemy.sql.type_api.VisitableCheckKWArg 1 1064 1064.000000
1857 typing._TypedDictMeta 4 4256 1064.000000
1853 typing.NamedTupleMeta 1 1064 1064.000000
826 string._TemplateMetaclass 1 1064 1064.000000
23239 sqlalchemy.sql.base._MetaOptions 7 7448 1064.000000
4376 sentry_sdk.hub.HubMeta 1 1064 1064.000000
4930 werkzeug.wrappers.base_request._FakeSubclassCheck 1 1064 1064.000000
10638 fsspec.spec._Cached 1 1064 1064.000000
7414 dateutil.tz._factories._TzOffsetFactory 1 1064 1064.000000
23925 sqlalchemy.sql.functions._GenericMeta 68 72352 1064.000000
7509 pandas._libs.tslibs.offsets.OffsetMeta 1 1064 1064.000000
23156 sqlalchemy.sql.visitors._InternalTraversalType 7 7448 1064.000000
4980 werkzeug.wrappers.base_response._FakeSubclassC... 1 1064 1064.000000
23576 sqlalchemy.event.base._EventMeta 13 13832 1064.000000
25676 flask_sqlalchemy.model.DefaultMeta 6 6384 1064.000000
23926 sqlalchemy.util.langhelpers.EnsureKWArgType 3 3192 1064.000000
678 enum.EnumMeta 73 77168 1057.095890
38 _ctypes.PyCSimpleType 30 31272 1042.400000
37 _ctypes.PyCFuncPtrType 19 19568 1029.894737
35 _ctypes.PyCArrayType 11 11056 1005.090909
2318 collections.OrderedDict 622 622464 1000.745981
25 re.Pattern 366 366012 1000.032787
36 _ctypes.PyCPointerType 8 7864 983.000000
>>> from pympler.process import ProcessMemoryInfo
>>> pmi = ProcessMemoryInfo()
>>> print ("Virtual size [Byte]: " + str(pmi.vsz))
Virtual size [Byte]: 2443882496
>>> print(memory.sort_values('memory', ascending=False).head(50))
object number_of_objects memory mem_per_object
0 str 191200 33895806 177.279320
12 dict 69995 23187048 331.267205
52 numpy.ndarray 14900 12189853 818.110940
19 code 51652 9329786 180.627778
7282 numpy.ma.core.MaskedArray 25 6407440 256297.600000
1 type 6551 6392072 975.739887
102 list 42939 4302272 100.194974
5 tuple 51192 3094376 60.446476
202 set 5386 1951088 362.251764
18975 pint.util.udict 3643 903464 248.000000
100 weakref 11908 857376 72.000000
2 int 23354 715632 30.642802
95 builtin_function_or_method 9175 660600 72.000000
2318 collections.OrderedDict 622 622464 1000.745981
165 cell 13685 547400 40.000000
298 abc.ABCMeta 459 507496 1105.655773
143 function (__init__) 3358 456688 136.000000
98 getset_descriptor 6469 414016 64.000000
886 function (<lambda>) 2862 389232 136.000000
25 re.Pattern 366 366012 1000.032787
2319 inspect.Parameter 5590 357760 64.000000
169 frozenset 758 303248 400.063325
96 method_descriptor 3851 277272 72.000000
94 wrapper_descriptor 3751 270072 72.000000
23149 sqlalchemy.sql.visitors.TraversibleType 228 245680 1077.543860
151 property 3225 232200 72.000000
20 float 9280 222720 24.000000
7537 fused_cython_function 811 181664 224.000000
18961 pint.util.UnitsContainer 2711 173504 64.000000
307 _io.BufferedReader 7 167576 23939.428571
147 function (__repr__) 848 115328 136.000000
142 _frozen_importlib.ModuleSpec 2354 112992 48.000000
276 _frozen_importlib_external.SourceFileLoader 2074 99552 48.000000
99 member_descriptor 1539 98496 64.000000
1850 typing._GenericAlias 2026 97248 48.000000
2694 collections.defaultdict 62 94408 1522.709677
525 method 1472 94208 64.000000
47 numpy.ufunc 391 90712 232.000000
994 collections.deque 143 90288 631.384615
2326 cython_function_or_method 431 86200 200.000000
678 enum.EnumMeta 73 77168 1057.095890
3048 jinja2.nodes.NodeType 69 73416 1064.000000
23925 sqlalchemy.sql.functions._GenericMeta 68 72352 1064.000000
18985 pint.util.ParserHelper 932 67104 72.000000
5773 pytz.lazy.LazySet.__new__.<locals>.LazySet 2 65984 32992.000000
2213 functools.partial 725 58000 80.000000
2655 ctypes.CDLL.__init__.<locals>._FuncPtr 296 56832 192.000000
13660 matplotlib.RcParams 31 56176 1812.129032
327 function (__call__) 376 51136 136.000000
152 classmethod 1065 51120 48.000000