| { |
| "cells": [ |
| { |
| "cell_type": "code", |
| "execution_count": null, |
| "metadata": {}, |
| "outputs": [], |
| "source": [ |
| "import pandas as pd\n", |
| "import numpy as np\n", |
| "from pprint import pprint\n", |
| "from collections import Counter\n", |
| "import common\n", |
| "import math" |
| ] |
| }, |
| { |
| "cell_type": "code", |
| "execution_count": null, |
| "metadata": {}, |
| "outputs": [], |
| "source": [ |
| "commit_list_df = pd.read_csv(\"results/classifier/commitlist.csv\")\n", |
| "mean_authors=commit_list_df.query(\"category == 'Uncategorized' & topic != 'not user facing'\").author.to_list()\n", |
| "counts = Counter(mean_authors)\n", |
| "commit_list_df.head()" |
| ] |
| }, |
| { |
| "cell_type": "code", |
| "execution_count": null, |
| "metadata": {}, |
| "outputs": [], |
| "source": [ |
| "commit_list_df.category.describe()" |
| ] |
| }, |
| { |
| "cell_type": "code", |
| "execution_count": null, |
| "metadata": {}, |
| "outputs": [], |
| "source": [ |
| "# The number un categorized and no topic commits\n", |
| "no_category = commit_list_df.query(\"category == 'Uncategorized' & topic != 'not user facing'\")\n", |
| "print(len(no_category))" |
| ] |
| }, |
| { |
| "cell_type": "code", |
| "execution_count": null, |
| "metadata": {}, |
| "outputs": [], |
| "source": [ |
| "# check for cherry-picked commits\n", |
| "example_sha = '55c76baf579cb6593f87d1a23e9a49afeb55f15a'\n", |
| "commit_hashes = set(commit_list_df.commit_hash.to_list())\n", |
| "\n", |
| "example_sha[:11] in commit_hashes" |
| ] |
| }, |
| { |
| "cell_type": "code", |
| "execution_count": null, |
| "metadata": {}, |
| "outputs": [], |
| "source": [ |
| "# Get the difference between known categories and categories from commits\n", |
| "\n", |
| "diff_categories = set(commit_list_df.category.to_list()) - set(common.categories)\n", |
| "print(len(diff_categories))\n", |
| "pprint(diff_categories)" |
| ] |
| }, |
| { |
| "cell_type": "code", |
| "execution_count": null, |
| "metadata": {}, |
| "outputs": [], |
| "source": [ |
| "# Counts of categories\n" |
| ] |
| } |
| ], |
| "metadata": { |
| "kernelspec": { |
| "display_name": "Python 3", |
| "language": "python", |
| "name": "python3" |
| }, |
| "language_info": { |
| "codemirror_mode": { |
| "name": "ipython", |
| "version": 3 |
| }, |
| "file_extension": ".py", |
| "mimetype": "text/x-python", |
| "name": "python", |
| "nbconvert_exporter": "python", |
| "pygments_lexer": "ipython3" |
| }, |
| "vscode": { |
| "interpreter": { |
| "hash": "a867c59af434d7534e61ccb37014830daefd5fcd3816cab68d595dde5e446f52" |
| } |
| } |
| }, |
| "nbformat": 4, |
| "nbformat_minor": 2 |
| } |