Skip to content
Snippets Groups Projects

Minor update to BW test page

Merged Shunan Zhang requested to merge bw-test-minor-update into master
Files
4
@@ -30,7 +30,7 @@ def get_all_event_numbers(args):
return ret
def calculate_similarity_matrix(event_numbers_by_stream):
def get_event_number_matrix(event_numbers_by_stream):
all_event_numbers = set([
evt_no for evt_no_list in event_numbers_by_stream.values()
@@ -48,6 +48,11 @@ def calculate_similarity_matrix(event_numbers_by_stream):
for evt_no in evt_no_list:
df[stream][evt_no] = True
return df
def calculate_similarity_matrix(df):
jaccard = 1 - pairwise_distances(
df.T.to_numpy(), metric='jaccard'
) # .T bcuz pairwise_distance must expect the fields to take similarity between to be rows rather than columns
@@ -57,6 +62,18 @@ def calculate_similarity_matrix(event_numbers_by_stream):
return jaccard_sim_matrix_df
def calculate_overlap_matrix(df):
cond_prob_per_stream = {stream: [] for stream in df.columns}
for target_stream in df.columns:
for comparison_stream in df.columns:
cond_prob_per_stream[target_stream].append(
sum(df[comparison_stream] * df[target_stream]) / sum(
df[target_stream]))
overlap_matrix_df = pd.DataFrame(
cond_prob_per_stream, columns=df.columns, index=df.columns)
return overlap_matrix_df
def save(df, htmlpath):
# Generate HTML table for similarity matrix
html = df.to_html(float_format=lambda x: f"{x:.1%}")
@@ -83,14 +100,23 @@ def main():
print(
f"Found {len(event_numbers[stream])} events for {stream} stream.")
df = get_event_number_matrix(event_numbers)
ofile = fname_helper.jaccard_similarities_path(args.stream_config)
sim_matrix = calculate_similarity_matrix(event_numbers)
sim_matrix = calculate_similarity_matrix(df)
print(
f"Calculated similarity matrix. Printing and saving to html at {ofile}."
)
print(sim_matrix)
save(sim_matrix, ofile)
ofile = fname_helper.overlap_matrix_path(args.stream_config)
overlap_matrix = calculate_overlap_matrix(df)
print(
f"Calculated overlap matrix. Printing and saving to html at {ofile}.")
print(overlap_matrix)
save(overlap_matrix, ofile)
if __name__ == "__main__":
main()
Loading