diff --git a/characterization/characterization.py b/characterization/characterization.py index 5281c01d7eaa1cb36497f74537339f962666ec64..fb397cc35c6a0f11ab6843dda2856fc53a64a343 100644 --- a/characterization/characterization.py +++ b/characterization/characterization.py @@ -1,8 +1,11 @@ import pandas as pd import seaborn as sns import matplotlib.pyplot as plt +import matplotlib +matplotlib.rcParams['pdf.fonttype'] = 42 + df = pd.read_csv('token_lengths_all.csv') print(df.head()) @@ -18,11 +21,10 @@ df = df[(df['output_token_length'] < 2048)] df = df.replace('stablelm-tuned-alpha-7b', 'stablelm-alpha-7b') fig, ax = plt.subplots(figsize=(6.4, 3.6)) -sns.boxplot(data=df, x="model", y="output_token_length", fliersize=3, showfliers=False, whis=1.5) +sns.boxplot(data=df, x="model", y="output_token_length", fliersize=3, showfliers=False, whis=1.5, palette="Spectral") plt.xticks(rotation=90) plt.xlabel('') plt.ylabel('Output Token Length') -# sns.color_palette("Spectral", as_cmap=True) plt.grid(True, axis='y', zorder=-1, linestyle='dashed', color='gray', alpha=0.5) plt.tight_layout() # plt.show() diff --git a/characterization/token_length_boxplots_all.pdf b/characterization/token_length_boxplots_all.pdf index 705809e5c77d11f02dff25a1b1f0fbf764c7b915..d13c1cfe2d6ac1ebdb33bca52c4aa29c3dd363d6 100644 Binary files a/characterization/token_length_boxplots_all.pdf and b/characterization/token_length_boxplots_all.pdf differ