pixie
pixie copied to clipboard
Pixie Python client fails to execute script given by the dashboard
Describe the bug
I get the following error when executing a Pixie script provided by the Pixie dashboard:
[libprotobuf ERROR google/protobuf/wire_format_lite.cc:618] String field 'px.api.vizierpb.StringColumn.data' contains invalid UTF-8 data when parsing a protocol buffer. Use the 'bytes' type if you intend to send raw bytes.
To Reproduce
I am running the following script:
# Copyright 2018- The Pixie Authors.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
#
# SPDX-License-Identifier: Apache-2.0
''' HTTP Data Tracer
This script traces all HTTP/HTTP2 data on the cluster.
'''
import px
def http_data(start_time: str, source_filter: str, destination_filter: str, num_head: int):
df = px.DataFrame(table='http_events', start_time=start_time)
# Add context.
df.node = df.ctx['node']
df.pid = px.upid_to_pid(df.upid)
df = add_source_dest_columns(df)
df = add_source_dest_links(df, start_time)
# Filter out entities as specified by the user.
df = df[px.contains(df.source, source_filter)]
df = df[px.contains(df.destination, destination_filter)]
# Add additional filters below:
# Restrict number of results.
df = df.head(num_head)
# Order columns.
df = df['time_', 'source', 'destination', 'latency', 'major_version', 'req_path',
'req_method', 'req_headers', 'req_body', 'req_body_size', 'resp_status',
'resp_message', 'resp_headers', 'resp_body', 'resp_body_size']
return df
def add_source_dest_columns(df):
''' Add source and destination columns for the HTTP request.
HTTP requests are traced server-side (trace_role==2), unless the server is
outside of the cluster in which case the request is traced client-side (trace_role==1).
When trace_role==2, the HTTP request source is the remote_addr column
and destination is the pod column. When trace_role==1, the HTTP request
source is the pod column and the destination is the remote_addr column.
Input DataFrame must contain trace_role, upid, remote_addr columns.
'''
df.pod = df.ctx['pod']
df.namespace = df.ctx['namespace']
# If remote_addr is a pod, get its name. If not, use IP address.
df.ra_pod = px.pod_id_to_pod_name(px.ip_to_pod_id(df.remote_addr))
df.is_ra_pod = df.ra_pod != ''
df.ra_name = px.select(df.is_ra_pod, df.ra_pod, df.remote_addr)
df.is_server_tracing = df.trace_role == 2
df.is_source_pod_type = px.select(df.is_server_tracing, df.is_ra_pod, True)
df.is_dest_pod_type = px.select(df.is_server_tracing, True, df.is_ra_pod)
# Set source and destination based on trace_role.
df.source = px.select(df.is_server_tracing, df.ra_name, df.pod)
df.destination = px.select(df.is_server_tracing, df.pod, df.ra_name)
# Filter out messages with empty source / destination.
df = df[df.source != '']
df = df[df.destination != '']
df = df.drop(['ra_pod', 'is_ra_pod', 'ra_name', 'is_server_tracing'])
return df
def add_source_dest_links(df, start_time: str):
''' Modifies the source and destination columns to display deeplinks in the UI.
Clicking on a pod name in either column will run the px/pod script for that pod.
Clicking on an IP address, will run the px/ip script showing all network connections
to/from that IP address.
Input DataFrame must contain source, destination, is_source_pod_type,
is_dest_pod_type, and namespace columns.
'''
# Source linking. If source is a pod, link to px/pod. If an IP addr, link to px/net_flow_graph.
df.src_pod_link = px.script_reference(df.source, 'px/pod', {
'start_time': start_time,
'pod': df.source
})
df.src_link = px.script_reference(df.source, 'px/ip', {
'start_time': start_time,
'ip': df.source,
})
df.source = px.select(df.is_source_pod_type, df.src_pod_link, df.src_link)
# If destination is a pod, link to px/pod. If an IP addr, link to px/net_flow_graph.
df.dest_pod_link = px.script_reference(df.destination, 'px/pod', {
'start_time': start_time,
'pod': df.destination
})
df.dest_link = px.script_reference(df.destination, 'px/ip', {
'start_time': start_time,
'ip': df.destination,
})
df.destination = px.select(df.is_dest_pod_type, df.dest_pod_link, df.dest_link)
df = df.drop(['src_pod_link', 'src_link', 'is_source_pod_type', 'dest_pod_link',
'dest_link', 'is_dest_pod_type'])
return df
df = http_data("-5m", "", "", 1000)
px.display(df)
I took it verbatim from the Pixie dashboard, only adding the last two lines so I could run it as a script from the Python client.
Expected behavior
Scripts taken from the dashboard should run as-is with the Python client.
We released 0.7.0 with this fix. Thanks for raising this!