Skip to content

Commit

Permalink
list_mptcp_connections chose a wrong mptcpdest
Browse files Browse the repository at this point in the history
because it was comparing values of different types.
For now I encode the failing fields as str instead of UInt64
(dsnraw seems concerned as well)
see pandas-dev/pandas#25472 for more details
  • Loading branch information
teto committed Feb 28, 2019
1 parent 7c82b52 commit 8897e7b
Show file tree
Hide file tree
Showing 6 changed files with 51 additions and 52 deletions.
29 changes: 6 additions & 23 deletions mptcpanalyzer/cli.py
Original file line number Diff line number Diff line change
Expand Up @@ -362,8 +362,8 @@ def list_subflows(self, mptcpstreamid: MpTcpStreamId):

try:
con = MpTcpConnection.build_from_dataframe(self.data, mptcpstreamid)
self.poutput("mptcp.stream %d has %d subflow(s) (client/server): "
% (mptcpstreamid, len(con.subflows())))
msg = "mptcp.stream %d has %d subflow(s) (client/server): "
self.poutput(msg % (mptcpstreamid, len(con.subflows())))
for sf in con.subflows():
self.poutput("\t%s" % sf)
except mp.MpTcpException as e:
Expand Down Expand Up @@ -526,7 +526,6 @@ def do_tcp_summary(self, args, unknown):
df = self.data

# args.pcapdestinations ?
# print(args)

for dest in ConnectionRoles:
# TODO do it only when needed
Expand Down Expand Up @@ -573,7 +572,7 @@ def do_mptcp_summary(self, args, unknown):
mptcpstream = args.mptcpstream

# args.pcapdestinations ?
print(args)
# print(args)
ret = mptcp_compute_throughput(
self.data, args.mptcpstream, args.destination
)
Expand Down Expand Up @@ -670,7 +669,7 @@ def do_summary_extended(self, args, unknown):
For now it is naive, does not look at retransmissions ?
"""

print("%r" % args)
print("Summary extended resume %r" % args)
df_pcap1 = load_into_pandas(args.pcap1, self.tshark_config)

# to abstract things a bit
Expand Down Expand Up @@ -779,17 +778,6 @@ def do_list_mptcp_connections(self, *args):
self.list_subflows(mptcpstream)
self.poutput("\n")

# def generate_namespace(self) -> argparse.Namespace:
# myNamespace = Namespace()
# myNamespace.toto = self.data
# parser = argparse_completer.ACArgumentParser(
# description="""
# Mptcpanalyzer filters pcaps to keep only tcp packets.
# This may explain why printed packet ids dont map
# """
# )



parser = MpTcpAnalyzerParser(
description="Export a pcap that can be used with wireshark to debug ids"
Expand Down Expand Up @@ -1091,12 +1079,7 @@ def do_load_pcap(self, args):
"""
Load the file as the current one
"""
print(args)
# args = shlex.split(args)
# print(args)
# parser = self.do_load_pcap.argparser
# print(parser)
# args = parser.parse_args(args)

self.poutput("Loading %s" % args.input_file)
self.data = args._dataframes["input_file"]
Expand Down Expand Up @@ -1138,15 +1121,15 @@ def do_plot(self, args, unknown):
# 'converts' the namespace to for the syntax define a dict
dargs = vars(args)

print("%s" % dargs)
# print("%s" % dargs)
dataframes = dargs.pop("_dataframes")
# workaround argparse limitations to set as default both directions
# TODO replace that with an action ?
# destinations=dargs.get("destinations", list(mp.ConnectionRoles))
# dargs.update(destinations=destinations)
# log.debug("Selecting destinations %s" % (destinations,))
# dataframes = plotter.preprocess(**dargs)
print("%s" % args)
print("DO_PLOT %s" % args)
# dataframes = args._dataframes.values()
assert dataframes is not None, "Preprocess must return a list"
# pass unknown_args too ?
Expand Down
25 changes: 20 additions & 5 deletions mptcpanalyzer/connection.py
Original file line number Diff line number Diff line change
Expand Up @@ -154,19 +154,23 @@ def __str__(self):
# :>5d
# TODO should be converted to int instead, would spare some memory
line = ("tcp.stream {s.tcpstreamid:.0f}: {s.tcpclient_ip}:{s.client_port:0>5.0f} "
" <-> {s.tcpserver_ip}:{s.server_port:0>5.0f} ").format(s=self,
" -> {s.tcpserver_ip}:{s.server_port:0>5.0f} ").format(s=self,
# tcpstreamid=self.tcpstreamid
)
return line


# should it ?
@dataclass
# @dataclass
class MpTcpSubflow(TcpConnection):
"""
"""

""" to which mptcp side belongs the tcp server"""
# mptcpdest: ConnectionRoles
# addrid: int = None

def __init__(self, mptcpdest: ConnectionRoles, addrid=None, **kwargs) -> None:
super().__init__(**kwargs)
self.addrid = addrid
Expand Down Expand Up @@ -233,6 +237,7 @@ def __str__(self):


# @dataframe
# @dataclass
class MpTcpConnection:
"""
Holds key characteristics of an MPTCP connection: keys, tokens, subflows
Expand All @@ -241,6 +246,8 @@ class MpTcpConnection:
subflows can be any order
"""
# mptcpstreamid: MpTcpStreamId

def __init__(self,
mptcpstreamid: int,
client_key: int, client_token: int, server_key: int,
Expand Down Expand Up @@ -294,6 +301,8 @@ def subflows(self, mptcpdest: ConnectionRoles = ConnectionRoles.Server):
def build_from_dataframe(ds: pd.DataFrame, mptcpstreamid: MpTcpStreamId) -> 'MpTcpConnection':
"""
Instantiates a class that describes an MPTCP connection
Look for the first 2 packets containing "sendkey"
"""

def get_index_of_non_null_values(serie):
Expand Down Expand Up @@ -345,17 +354,23 @@ def get_index_of_non_null_values(serie):
receiver_token = subflow_ds["recvtok"].iloc[row]

# if we see the token
log.debug("receiver_token %r to compare with server_token %r" % (receiver_token, server_token))
log.debug("Test %s" % (receiver_token == server_token))
mptcpdest = ConnectionRoles.Server if receiver_token == server_token \
else ConnectionRoles.Client

subflow = MpTcpSubflow.create_subflow(
mptcpdest = ConnectionRoles.Server if receiver_token == server_token \
else ConnectionRoles.Client,
mptcpdest = mptcpdest,
tcpstreamid =tcpstreamid,
tcpclient_ip=subflow_ds['ipsrc'].iloc[row],
tcpserver_ip=subflow_ds['ipdst'].iloc[row],
client_port =subflow_ds['sport'].iloc[row],
server_port =subflow_ds['dport'].iloc[row],
addrid =None,
# rcv_token =receiver_token,
)
)

log.debug("Created subflow %s" % subflow)

subflows.append(subflow)

Expand Down
20 changes: 8 additions & 12 deletions mptcpanalyzer/data.py
Original file line number Diff line number Diff line change
Expand Up @@ -430,21 +430,22 @@ def load_into_pandas(

converters = {f.fullname: f.converter for _, f in config.fields.items() if f.converter}
converters.update({name: f.converter for name, f in per_pcap_artificial_fields.items() if f.converter})
# print("converters\n", converters)

dtypes = {field.fullname: field.type for _, field in config.fields.items() if field.converter is None}
log.debug("Dtypes before load: %s" % dtypes)
log.debug("Converters before load: %s" % converters)
log.debug("Dtypes before load: %s" % (pp.pformat(dtypes)))
log.debug("Converters before load: %s" % (pp.pformat(converters)))

from .pdutils import read_csv_debug
fields = [f.fullname for _, f in config.fields.items()]
# fields = [f.fullname for _, f in config.fields.items()]
# fields =[ "tcp.options.mptcp.sendkey" ]
# data = read_csv_debug(fields,
data = pd.read_csv(
fd,
comment='#',
sep=config.delimiter,
dtype=dtypes,
# usecols = [config.fields["ipsrc"].fullname ],
# config.fields["ipsrc"].fullname
# usecols = [ "tcp.options.mptcp.sendkey" ],
# seems like for now we can't change the default representation apart from converting the column to
# a string !!!
# https://stackoverflow.com/questions/46930201/pandas-to-datetime-is-not-formatting-the-datetime-value-in-the-desired-format
Expand All @@ -456,7 +457,7 @@ def load_into_pandas(
converters=converters,
# float_precision="high", # might be necessary
# nrows=13, # useful for debugging purpose
# chunksize=5, # useful for debugging purpose
# chunksize=1, # useful for debugging purpose
)

log.debug("Finished loading CSV file")
Expand All @@ -472,7 +473,7 @@ def load_into_pandas(
# we want packetid column to survive merges/dataframe transformation so keepit as a column
# TODO remove ? let other functions do it ?
data.set_index("packetid", drop=False, inplace=True)
log.debug("Column names: %s" % data.columns)
# log.debug("Column names: %s" % data.columns)

hashing_fields = [name for name, field in config.fields.items() if field.hash]
log.debug("Hashing over fields %s" % hashing_fields)
Expand All @@ -488,14 +489,9 @@ def load_into_pandas(
except Exception as e:
logging.error("You may need to filter more your pcap to keep only mptcp packets")
raise e
# finally:
# print (data)

log.info("Finished loading dataframe for %s. Size=%d" % (input_file, len(data)))

# print("FINAL_DTYPES")
# log.debug(data.dtypes)
# print(data.head(5))
return data


Expand Down
17 changes: 9 additions & 8 deletions mptcpanalyzer/pdutils.py
Original file line number Diff line number Diff line change
Expand Up @@ -54,7 +54,7 @@ def debug_dataframe(
def read_csv_debug(fields, fd, *args, first_try=True, **kwargs):
"""
Help debugging dataframe loading errors (with dtypes/converters)
chunksize: bool,
chunksize: bool,
"""

chunksize = kwargs.get("chunksize")
Expand All @@ -63,22 +63,23 @@ def read_csv_debug(fields, fd, *args, first_try=True, **kwargs):
kwargs.pop("chunksize", None)

for field in fields:
print("TESTING field ", field)
print("TESTING field %s (first_try ? %s ) " % (field, first_try))
print(kwargs.get("dtype")[field])
try:
res = pd.read_csv(
fd,
*args,
usecols=[ field],
**kwargs
)
fd,
*args,
usecols=[ field],
**kwargs
)
if chunksize is not None:
for i, chunk in enumerate(res):
# print("chunk %d" % i)
print(chunk)
except TypeError as e:
# TODO retry with chunksize
if first_try:
kwargs.update({"chunksize":chunksize or 40})
kwargs.update({"chunksize": chunksize or 40})
fd.seek(0)
read_csv_debug([field], fd, *args, first_try=False, **kwargs)
else:
Expand Down
3 changes: 2 additions & 1 deletion mptcpanalyzer/statistics.py
Original file line number Diff line number Diff line change
Expand Up @@ -164,9 +164,10 @@ def mptcp_compute_throughput(
subflow_stats: List[TcpUnidirectionalStats] = []
for tcpstream, subdf in d:
# subdf.iloc[0, subdf.columns.get_loc(_second('abstime'))]
debug_dataframe(subdf, "subdf for stream %d" % tcpstream)
# debug_dataframe(subdf, "subdf for stream %d" % tcpstream)
dest = subdf.iloc[0, subdf.columns.get_loc(_sender('tcpdest'))]
sf_stats = tcp_get_stats(subdf, tcpstream,
# work around pandas issue
ConnectionRoles(dest),
True)

Expand Down
9 changes: 6 additions & 3 deletions mptcpanalyzer/tshark.py
Original file line number Diff line number Diff line change
Expand Up @@ -198,9 +198,12 @@ def add_mptcp_fields(self, advanced=True):
# remove this one ?
self.add_field("mptcp.expected_token", "expected_token", str, False, False)
self.add_field("mptcp.stream", "mptcpstream", 'UInt64', False, False)
self.add_field("tcp.options.mptcp.sendkey", "sendkey", np.float64, False, True)
self.add_field("tcp.options.mptcp.recvkey", "recvkey", np.float64, False, True)
self.add_field("tcp.options.mptcp.recvtok", "recvtok", np.float64, False, True)

# TODO convert to 'UInt64'
self.add_field("tcp.options.mptcp.sendkey", "sendkey", str, False, True)
self.add_field("tcp.options.mptcp.recvkey", "recvkey", str, False, True)
self.add_field("tcp.options.mptcp.recvtok", "recvtok", str, False, True)

self.add_field("tcp.options.mptcp.datafin.flag", "datafin", 'Int64', False, True)
# this is a list really; can contain "2,4"
self.add_field("tcp.options.mptcp.subtype", "subtype", str, False, True)
Expand Down

0 comments on commit 8897e7b

Please sign in to comment.