diff --git a/src/python/evaluation/common/pandas_util.py b/src/python/evaluation/common/pandas_util.py
index 6184335d..5aadf8ab 100644
--- a/src/python/evaluation/common/pandas_util.py
+++ b/src/python/evaluation/common/pandas_util.py
@@ -1,12 +1,12 @@
import json
import logging
from pathlib import Path
-from typing import Any, List, Set, Union
+from typing import Any, Iterable, List, Set, Union
import numpy as np
import pandas as pd
from src.python.evaluation.common.csv_util import write_dataframe_to_csv
-from src.python.evaluation.common.util import ColumnName, EvaluationArgument
+from src.python.evaluation.common.util import ColumnName
from src.python.evaluation.common.xlsx_util import create_workbook, remove_sheet, write_dataframe_to_xlsx_sheet
from src.python.evaluation.inspectors.common.statistics import PenaltyIssue
from src.python.review.application_config import LanguageVersion
@@ -18,15 +18,19 @@
def filter_df_by_language(df: pd.DataFrame, languages: Set[LanguageVersion],
column: str = ColumnName.LANG.value) -> pd.DataFrame:
- return df.loc[df[column].isin(set(map(lambda l: l.value, languages)))]
+ return filter_df_by_iterable_value(df, column, set(map(lambda l: l.value, languages)))
-def filter_df_by_condition(df: pd.DataFrame, column: str, value: Any) -> pd.DataFrame:
+def filter_df_by_iterable_value(df: pd.DataFrame, column: str, value: Iterable) -> pd.DataFrame:
+ return df.loc[df[column].isin(value)]
+
+
+def filter_df_by_single_value(df: pd.DataFrame, column: str, value: Any) -> pd.DataFrame:
return df.loc[df[column] == value]
def drop_duplicates(df: pd.DataFrame, column: str = ColumnName.CODE.value) -> pd.DataFrame:
- return df.drop_duplicates(column, keep='last')
+ return df.drop_duplicates(column, keep='last').reset_index(drop=True)
# Find all rows and columns where two dataframes are inconsistent.
@@ -100,4 +104,4 @@ def get_issues_from_json(str_json: str) -> List[PenaltyIssue]:
def get_issues_by_row(df: pd.DataFrame, row: int) -> List[PenaltyIssue]:
- return get_issues_from_json(df.iloc[row][EvaluationArgument.TRACEBACK.value])
+ return get_issues_from_json(df.iloc[row][ColumnName.TRACEBACK.value])
diff --git a/src/python/evaluation/common/util.py b/src/python/evaluation/common/util.py
index 836074b9..0e1a96eb 100644
--- a/src/python/evaluation/common/util.py
+++ b/src/python/evaluation/common/util.py
@@ -21,6 +21,7 @@ class ColumnName(Enum):
PENALTY = 'penalty'
USER = 'user'
HISTORY = 'history'
+ TRACEBACK = 'traceback'
@unique
diff --git a/src/python/evaluation/evaluation_run_tool.py b/src/python/evaluation/evaluation_run_tool.py
index 4a8d5029..78d59ce6 100644
--- a/src/python/evaluation/evaluation_run_tool.py
+++ b/src/python/evaluation/evaluation_run_tool.py
@@ -94,22 +94,22 @@ def __get_grade_from_traceback(traceback: str) -> str:
# TODO: calculate grade after it
def inspect_solutions_df(config: EvaluationConfig, lang_code_dataframe: pd.DataFrame) -> pd.DataFrame:
report = pd.DataFrame(columns=lang_code_dataframe.columns)
- report[EvaluationArgument.TRACEBACK.value] = []
+ report[ColumnName.TRACEBACK.value] = []
pandarallel.initialize()
if config.traceback:
- report[EvaluationArgument.TRACEBACK.value] = []
+ report[ColumnName.TRACEBACK.value] = []
try:
- lang_code_dataframe[EvaluationArgument.TRACEBACK.value] = lang_code_dataframe.parallel_apply(
+ lang_code_dataframe[ColumnName.TRACEBACK.value] = lang_code_dataframe.parallel_apply(
lambda row: __inspect_row(row[ColumnName.LANG.value],
row[ColumnName.CODE.value],
row[ColumnName.ID.value], config), axis=1)
lang_code_dataframe[ColumnName.GRADE.value] = lang_code_dataframe.parallel_apply(
- lambda row: __get_grade_from_traceback(row[EvaluationArgument.TRACEBACK.value]), axis=1)
+ lambda row: __get_grade_from_traceback(row[ColumnName.TRACEBACK.value]), axis=1)
if not config.traceback:
- del lang_code_dataframe[EvaluationArgument.TRACEBACK.value]
+ del lang_code_dataframe[ColumnName.TRACEBACK.value]
return lang_code_dataframe
except ValueError as e:
diff --git a/src/python/evaluation/inspectors/diffs_between_df.py b/src/python/evaluation/inspectors/diffs_between_df.py
index 04269773..5556484f 100644
--- a/src/python/evaluation/inspectors/diffs_between_df.py
+++ b/src/python/evaluation/inspectors/diffs_between_df.py
@@ -6,7 +6,7 @@
from src.python.evaluation.common.pandas_util import (
get_inconsistent_positions, get_issues_by_row, get_solutions_df, get_solutions_df_by_file_path,
)
-from src.python.evaluation.common.util import ColumnName, EvaluationArgument
+from src.python.evaluation.common.util import ColumnName
from src.python.review.common.file_system import (
Extension, get_parent_folder, get_restricted_extension, serialize_data_and_write_to_file,
)
@@ -52,7 +52,7 @@ def find_diffs(old_df: pd.DataFrame, new_df: pd.DataFrame) -> dict:
diffs = {
ColumnName.GRADE.value: [],
ColumnName.DECREASED_GRADE.value: [],
- EvaluationArgument.TRACEBACK.value: {},
+ ColumnName.TRACEBACK.value: {},
ColumnName.PENALTY.value: {},
}
if ColumnName.USER.value in new_df.columns:
@@ -60,7 +60,7 @@ def find_diffs(old_df: pd.DataFrame, new_df: pd.DataFrame) -> dict:
else:
diffs[ColumnName.USER.value] = 0
# Keep only diffs in the TRACEBACK column
- for row, _ in filter(lambda t: t[1] == EvaluationArgument.TRACEBACK.value, inconsistent_positions.index):
+ for row, _ in filter(lambda t: t[1] == ColumnName.TRACEBACK.value, inconsistent_positions.index):
old_value = old_df.iloc[row][ColumnName.GRADE.value]
new_value = new_df.iloc[row][ColumnName.GRADE.value]
old_quality = QualityType(old_value).to_number()
@@ -79,7 +79,7 @@ def find_diffs(old_df: pd.DataFrame, new_df: pd.DataFrame) -> dict:
raise ValueError(f'New dataframe contains less issues than old for fragment {id}')
difference = set(set(new_issues) - set(old_issues))
if len(difference) > 0:
- diffs[EvaluationArgument.TRACEBACK.value][fragment_id] = difference
+ diffs[ColumnName.TRACEBACK.value][fragment_id] = difference
# Find issues with influence_in_penalty > 0
penalty = set(filter(lambda i: i.influence_on_penalty > 0, new_issues))
diff --git a/src/python/evaluation/inspectors/distribute_grades.py b/src/python/evaluation/inspectors/distribute_grades.py
index e9d3e3ad..0518b9a1 100644
--- a/src/python/evaluation/inspectors/distribute_grades.py
+++ b/src/python/evaluation/inspectors/distribute_grades.py
@@ -5,7 +5,7 @@
import pandas as pd
from src.python.common.tool_arguments import RunToolArgument
from src.python.evaluation.common.pandas_util import get_solutions_df, get_solutions_df_by_file_path, write_df_to_file
-from src.python.evaluation.common.util import ColumnName, EvaluationArgument
+from src.python.evaluation.common.util import ColumnName
from src.python.review.common.file_system import Extension, get_parent_folder, get_restricted_extension
CodeToGradesDict = Dict[str, Tuple[str, Optional[str]]]
@@ -35,12 +35,12 @@ def get_code_to_grades_dict(df: pd.DataFrame) -> CodeToGradesDict:
df.apply(lambda row: __add_grade(code_to_grades_dict,
row[ColumnName.CODE.value],
row[ColumnName.GRADE.value],
- row[EvaluationArgument.TRACEBACK.value]), axis=1)
+ row[ColumnName.TRACEBACK.value]), axis=1)
return code_to_grades_dict
def fill_all_solutions_df(all_solutions_df: pd.DataFrame, code_to_grades_dict: CodeToGradesDict) -> pd.DataFrame:
- all_solutions_df[ColumnName.GRADE.value], all_solutions_df[EvaluationArgument.TRACEBACK.value] = zip(
+ all_solutions_df[ColumnName.GRADE.value], all_solutions_df[ColumnName.TRACEBACK.value] = zip(
*all_solutions_df[ColumnName.CODE.value].map(lambda code: code_to_grades_dict[code]))
return all_solutions_df
diff --git a/src/python/evaluation/inspectors/filter_issues.py b/src/python/evaluation/inspectors/filter_issues.py
index e0d7d86b..60276f20 100644
--- a/src/python/evaluation/inspectors/filter_issues.py
+++ b/src/python/evaluation/inspectors/filter_issues.py
@@ -5,13 +5,13 @@
import pandas as pd
from src.python.common.tool_arguments import RunToolArgument
from src.python.evaluation.common.pandas_util import get_issues_from_json, get_solutions_df_by_file_path
-from src.python.evaluation.common.util import ColumnName, EvaluationArgument, parse_set_arg
+from src.python.evaluation.common.util import ColumnName, parse_set_arg
from src.python.evaluation.inspectors.common.statistics import PenaltyIssue
from src.python.review.common.file_system import Extension, get_parent_folder, serialize_data_and_write_to_file
from src.python.review.inspectors.issue import BaseIssue
-TRACEBACK = EvaluationArgument.TRACEBACK.value
+TRACEBACK = ColumnName.TRACEBACK.value
ID = ColumnName.ID.value
GRADE = ColumnName.GRADE.value
diff --git a/src/python/evaluation/inspectors/get_worse_public_examples.py b/src/python/evaluation/inspectors/get_worse_public_examples.py
index 4d018c3b..980c8a9e 100644
--- a/src/python/evaluation/inspectors/get_worse_public_examples.py
+++ b/src/python/evaluation/inspectors/get_worse_public_examples.py
@@ -5,8 +5,8 @@
import pandas as pd
from src.python.common.tool_arguments import RunToolArgument
from src.python.evaluation.common.csv_util import write_dataframe_to_csv
-from src.python.evaluation.common.pandas_util import filter_df_by_condition, get_solutions_df_by_file_path
-from src.python.evaluation.common.util import ColumnName, EvaluationArgument
+from src.python.evaluation.common.pandas_util import filter_df_by_single_value, get_solutions_df_by_file_path
+from src.python.evaluation.common.util import ColumnName
from src.python.evaluation.inspectors.common.statistics import PenaltyIssue
from src.python.review.common.file_system import deserialize_data_from_file, Extension, get_parent_folder
@@ -32,12 +32,12 @@ def __get_new_inspections(fragment_id_to_issues: Dict[int, List[PenaltyIssue]],
def __get_public_fragments(solutions_df: pd.DataFrame, diffs_dict: dict) -> pd.DataFrame:
# Keep only public solutions
- public_fragments = filter_df_by_condition(solutions_df, ColumnName.IS_PUBLIC.value, 'YES')
+ public_fragments = filter_df_by_single_value(solutions_df, ColumnName.IS_PUBLIC.value, 'YES')
count_inspections_column = 'count_inspections'
new_inspections_column = 'new_inspections'
# Get only new inspections and count them
- fragment_id_to_issues = diffs_dict[EvaluationArgument.TRACEBACK.value]
+ fragment_id_to_issues = diffs_dict[ColumnName.TRACEBACK.value]
public_fragments[new_inspections_column] = public_fragments.apply(
lambda row: __get_new_inspections(fragment_id_to_issues, row[ColumnName.ID.value]), axis=1)
public_fragments[count_inspections_column] = public_fragments.apply(
@@ -45,7 +45,7 @@ def __get_public_fragments(solutions_df: pd.DataFrame, diffs_dict: dict) -> pd.D
public_fragments = public_fragments.sort_values(count_inspections_column, ascending=False)
# Keep only public columns
- return public_fragments[[ColumnName.CODE.value, EvaluationArgument.TRACEBACK.value, new_inspections_column]]
+ return public_fragments[[ColumnName.CODE.value, ColumnName.TRACEBACK.value, new_inspections_column]]
# TODO: add readme
diff --git a/src/python/evaluation/inspectors/print_inspectors_statistics.py b/src/python/evaluation/inspectors/print_inspectors_statistics.py
index e072027c..e3146cd6 100644
--- a/src/python/evaluation/inspectors/print_inspectors_statistics.py
+++ b/src/python/evaluation/inspectors/print_inspectors_statistics.py
@@ -4,7 +4,7 @@
from typing import Dict, List
from src.python.common.tool_arguments import RunToolArgument
-from src.python.evaluation.common.util import ColumnName, EvaluationArgument
+from src.python.evaluation.common.util import ColumnName
from src.python.evaluation.inspectors.common.statistics import (
GeneralInspectorsStatistics, IssuesStatistics, PenaltyInfluenceStatistics, PenaltyIssue,
)
@@ -32,11 +32,11 @@ def configure_arguments(parser: argparse.ArgumentParser) -> None:
def has_incorrect_grades(diffs_dict: dict) -> bool:
- return len(diffs_dict[ColumnName.GRADE.value]) > 0
+ return len(diffs_dict.get(ColumnName.GRADE.value, [])) > 0
def has_decreased_grades(diffs_dict: dict) -> bool:
- return len(diffs_dict[ColumnName.DECREASED_GRADE.value]) > 0
+ return len(diffs_dict.get(ColumnName.DECREASED_GRADE.value, [])) > 0
def __gather_issues_stat(issues_stat_dict: Dict[int, List[PenaltyIssue]]) -> IssuesStatistics:
@@ -50,10 +50,10 @@ def __gather_issues_stat(issues_stat_dict: Dict[int, List[PenaltyIssue]]) -> Iss
def gather_statistics(diffs_dict: dict) -> GeneralInspectorsStatistics:
- new_issues_stat = __gather_issues_stat(diffs_dict[EvaluationArgument.TRACEBACK.value])
- penalty_issues_stat = __gather_issues_stat(diffs_dict[ColumnName.PENALTY.value])
+ new_issues_stat = __gather_issues_stat(diffs_dict.get(ColumnName.TRACEBACK.value, {}))
+ penalty_issues_stat = __gather_issues_stat(diffs_dict.get(ColumnName.PENALTY.value, {}))
return GeneralInspectorsStatistics(new_issues_stat, penalty_issues_stat,
- PenaltyInfluenceStatistics(diffs_dict[ColumnName.PENALTY.value]))
+ PenaltyInfluenceStatistics(diffs_dict.get(ColumnName.PENALTY.value, {})))
def main() -> None:
@@ -73,7 +73,7 @@ def main() -> None:
print('All grades are equal.')
else:
print(f'Decreased grades was found in {len(diffs[ColumnName.DECREASED_GRADE.value])} fragments')
- print(f'{diffs[ColumnName.USER.value]} unique users was found!')
+ print(f'{diffs.get(ColumnName.USER.value, 0)} unique users was found!')
print(separator)
statistics = gather_statistics(diffs)
diff --git a/src/python/evaluation/qodana/README.md b/src/python/evaluation/qodana/README.md
index b42ae860..dcd73e45 100644
--- a/src/python/evaluation/qodana/README.md
+++ b/src/python/evaluation/qodana/README.md
@@ -1,4 +1,4 @@
-# Dataset label
+# Dataset labelling
This script allows you to label a dataset using the found [Qodana](https://github.com/JetBrains/Qodana) inspections.
The dataset must contain at least three columns: `id`, `code` and `lang`, where `id` is a unique solution number, `lang` is the language in which the code is written in the `code` column. The `lang` must belong to one of the following values: `java7`, `java8`, `java9`, `java11`, `python3`, `kotlin`. If `lang` is not equal to any of the values, the row will be skipped.
@@ -22,7 +22,7 @@ Run the [dataset_labeling.py](dataset_labeling.py) with the arguments from comma
---
-# Postprocessing
+# Preprocessing
The model that imitates Qodana analysis gets input from a dataset in a special format.
This module allows preparing datasets that were graded by [dataset_marking.py](dataset_marking.py) script.
@@ -231,3 +231,81 @@ id | code | lang | inspections
1 | "// second line from code with id 1" | java11 | 0
```
+
+# Postprocessing
+
+At this stage, you can convert the data received by the Qodana into the format of the Hyperstyle tool for
+analysis and statistics gathering.
+
+## Convert Qodana inspections into Hyperstyle inspections
+
+This stage allows you to convert the `inspections` column from `csv` marked by Qodana into
+`traceback` column with the Hyperstyle tool format.
+
+This stage includes:
+- keep only unique code fragments in both datasets (Qodana and Hyperstyle);
+- keep only fragments in both datasets that have same ids and same code fragments;
+- add a `grade` column into Qodana dataset corresponding to the `grade` column from Hyperstyle dataset;
+- add a `traceback` column in the Hyperstyle format into Qodana dataset with inspection from the `inspections` column.
+
+Please, note that your Qodana input file must be graded by [dataset_labeling.py](dataset_labeling.py) script
+and have `inspections` column. Your Hyperstyle input file must be graded by [evaluation_run_tool.py](../evaluation_run_tool.py) script
+and have `traceback` and `grade` columns.
+
+Output files is two new `csv` files.
+
+#### Usage
+
+Run the [convert_to_hyperstyle_inspections.py](convert_to_hyperstyle_inspections.py) with the arguments from command line.
+
+Required arguments:
+
+- `solutions_file_path_hyperstyle` — path to a `csv` file labelled by Hyperstyle;
+- `solutions_file_path_qodana` — path to a `csv` file labelled by Qodana.
+
+Optional arguments:
+Argument | Description
+--- | ---
+|**‑i**, **‑‑issues-to-keep**| Set of issues ids to keep in the dataset separated by comma. By default all issues are deleted. |
+
+The Hyperstyle resulting file will be stored in the same folder with `solutions_file_path_hyperstyle`.
+The Qodana resulting file will be stored in the same folder with `solutions_file_path_qodana`.
+
+An example of the Qodana inspections before and after this processing:
+
+1. Before:
+
+```json
+{
+ "issues": [
+ {
+ "fragment_id": 0,
+ "line": 8,
+ "offset": 8,
+ "length": 10,
+ "highlighted_element": "System.out",
+ "description": "Uses of System.out
should probably be replaced with more robust logging #loc",
+ "problem_id": "SystemOutErr"
+ }
+ ]
+}
+```
+
+2. After:
+
+```json
+{
+ "issues": [
+ {
+ "code": "SystemOutErr",
+ "text": "Uses of System.out
should probably be replaced with more robust logging #loc",
+ "line": "",
+ "line_number": 8,
+ "column_number": 8,
+ "category": "INFO",
+ "influence_on_penalty": 0
+ }
+ ]
+}
+```
+___
diff --git a/src/python/evaluation/qodana/convert_to_hyperstyle_inspections.py b/src/python/evaluation/qodana/convert_to_hyperstyle_inspections.py
new file mode 100644
index 00000000..5d7530a3
--- /dev/null
+++ b/src/python/evaluation/qodana/convert_to_hyperstyle_inspections.py
@@ -0,0 +1,123 @@
+import argparse
+import json
+from pathlib import Path
+from typing import Iterable, Set
+
+import pandas as pd
+from src.python.common.tool_arguments import RunToolArgument
+from src.python.evaluation.common.pandas_util import (
+ drop_duplicates, filter_df_by_iterable_value, get_solutions_df_by_file_path, write_df_to_file,
+)
+from src.python.evaluation.common.util import ColumnName, parse_set_arg
+from src.python.evaluation.qodana.util.issue_types import QODANA_CLASS_NAME_TO_ISSUE_TYPE
+from src.python.evaluation.qodana.util.models import QodanaColumnName, QodanaIssue
+from src.python.review.common.file_system import Extension, get_parent_folder
+from src.python.review.inspectors.inspector_type import InspectorType
+from src.python.review.inspectors.issue import BaseIssue, IssueType
+from src.python.review.reviewers.utils.print_review import convert_issue_to_json
+
+
+def configure_arguments(parser: argparse.ArgumentParser) -> None:
+ parser.add_argument(f'{RunToolArgument.SOLUTIONS_FILE_PATH.value.long_name}_hyperstyle',
+ type=lambda value: Path(value).absolute(),
+ help=f'{RunToolArgument.SOLUTIONS_FILE_PATH.value.description}'
+ f'\nAll code fragments from this file must be graded by hyperstyle tool'
+ f'(file contains traceback column)')
+
+ parser.add_argument(f'{RunToolArgument.SOLUTIONS_FILE_PATH.value.long_name}_qodana',
+ type=lambda value: Path(value).absolute(),
+ help=f'{RunToolArgument.SOLUTIONS_FILE_PATH.value.description}'
+ f'\nAll code fragments from this file must be graded by qodana'
+ f'(file contains inspections column)')
+
+ parser.add_argument('-i', '--issues-to-keep',
+ help='Set of issues to keep',
+ default='')
+
+
+# Drop duplicates in the CODE column and delete rows that have ids from value_to_filter
+# The new dataframe will be sorted by the ID column
+def __preprocess_df(df: pd.DataFrame, ids_to_filter: Iterable) -> pd.DataFrame:
+ df = drop_duplicates(df)
+ df = filter_df_by_iterable_value(df, ColumnName.ID.value, ids_to_filter)
+ return df.sort_values(ColumnName.ID.value).set_index(ColumnName.ID.value, drop=False)
+
+
+# Check if all code fragments with the same ids are equal
+def __check_code_by_ids(qodana_df: pd.DataFrame, hyperstyle_df: pd.DataFrame) -> None:
+ assert qodana_df.shape[0] == hyperstyle_df.shape[0], (
+ f'rows count {qodana_df.shape[0]} in the qodana df does not equal rows '
+ f'count {hyperstyle_df.shape[0]} in the hyperstyle df'
+ )
+ for i in range(0, qodana_df.shape[0]):
+ if qodana_df.iloc[i][ColumnName.CODE.value] != hyperstyle_df.iloc[i][ColumnName.CODE.value]:
+ raise ValueError(f'Code fragments in the {i}th row do not equal!')
+
+
+# Convert qodana inspections output to hyperstyle output
+# Note: keep only json field in the result
+def __qodana_to_hyperstyle_output(qodana_output: str, issues_to_keep: Set[str]) -> str:
+ qodana_issues = QodanaIssue.parse_list_issues_from_json(qodana_output)
+ filtered_issues = filter(lambda issue: issue.problem_id in issues_to_keep, qodana_issues)
+ hyperstyle_issues = map(lambda issue:
+ BaseIssue(origin_class=issue.problem_id,
+ type=QODANA_CLASS_NAME_TO_ISSUE_TYPE.get(issue.problem_id, IssueType.INFO),
+ description=issue.description,
+ file_path=Path(),
+ line_no=issue.line,
+ column_no=issue.offset,
+ inspector_type=InspectorType.QODANA),
+ filtered_issues)
+ hyperstyle_json = {'issues': list(map(lambda issue: convert_issue_to_json(issue), hyperstyle_issues))}
+
+ return json.dumps(hyperstyle_json)
+
+
+# Resort all fields in the qodana dataframe according to the hyperstyle dataframe
+# Add column with hyperstyle output (convert qodana output to hyperstyle output)
+# Add grade column with grades from hyperstyle dataframe (to gather statistics by diffs_between_df.py script)
+def __prepare_qodana_df(qodana_df: pd.DataFrame, hyperstyle_df: pd.DataFrame,
+ issues_to_keep: Set[str]) -> pd.DataFrame:
+ qodana_df = __preprocess_df(qodana_df, hyperstyle_df[ColumnName.ID.value])
+ __check_code_by_ids(qodana_df, hyperstyle_df)
+
+ qodana_df[ColumnName.TRACEBACK.value] = qodana_df.apply(
+ lambda row: __qodana_to_hyperstyle_output(row[QodanaColumnName.INSPECTIONS.value], issues_to_keep), axis=1)
+
+ qodana_df[ColumnName.GRADE.value] = hyperstyle_df[ColumnName.GRADE.value]
+ return qodana_df
+
+
+def __write_updated_df(old_df_path: Path, df: pd.DataFrame, name_prefix: str) -> None:
+ output_path = get_parent_folder(Path(old_df_path))
+ write_df_to_file(df, output_path / f'{name_prefix}_updated{Extension.CSV.value}', Extension.CSV)
+
+
+def __reassign_ids(df: pd.DataFrame) -> pd.DataFrame:
+ df = df.sort_values(ColumnName.CODE.value)
+ df[ColumnName.ID.value] = df.index
+ return df
+
+
+def main() -> None:
+ parser = argparse.ArgumentParser()
+ configure_arguments(parser)
+ args = parser.parse_args()
+
+ issues_to_keep = parse_set_arg(args.issues_to_keep)
+
+ qodana_solutions_file_path = args.solutions_file_path_qodana
+ qodana_solutions_df = __reassign_ids(get_solutions_df_by_file_path(qodana_solutions_file_path))
+
+ hyperstyle_solutions_file_path = args.solutions_file_path_hyperstyle
+ hyperstyle_solutions_df = __reassign_ids(get_solutions_df_by_file_path(hyperstyle_solutions_file_path))
+ hyperstyle_solutions_df = __preprocess_df(hyperstyle_solutions_df, qodana_solutions_df[ColumnName.ID.value])
+
+ qodana_solutions_df = __prepare_qodana_df(qodana_solutions_df, hyperstyle_solutions_df, issues_to_keep)
+
+ __write_updated_df(qodana_solutions_file_path, qodana_solutions_df, 'qodana')
+ __write_updated_df(hyperstyle_solutions_file_path, hyperstyle_solutions_df, 'hyperstyle')
+
+
+if __name__ == '__main__':
+ main()
diff --git a/src/python/evaluation/qodana/util/issue_types.py b/src/python/evaluation/qodana/util/issue_types.py
new file mode 100644
index 00000000..aa495e43
--- /dev/null
+++ b/src/python/evaluation/qodana/util/issue_types.py
@@ -0,0 +1,6 @@
+from typing import Dict
+
+from src.python.review.inspectors.issue import IssueType
+
+QODANA_CLASS_NAME_TO_ISSUE_TYPE: Dict[str, IssueType] = {
+}
diff --git a/src/python/review/inspectors/inspector_type.py b/src/python/review/inspectors/inspector_type.py
index 2d00c0d5..e9bfe430 100644
--- a/src/python/review/inspectors/inspector_type.py
+++ b/src/python/review/inspectors/inspector_type.py
@@ -24,6 +24,7 @@ class InspectorType(Enum):
ESLINT = 'ESLINT'
UNDEFINED = 'UNDEFINED'
+ QODANA = 'QODANA'
@classmethod
def available_values(cls) -> List[str]:
diff --git a/src/python/review/reviewers/utils/print_review.py b/src/python/review/reviewers/utils/print_review.py
index 4facf3fb..617eb548 100644
--- a/src/python/review/reviewers/utils/print_review.py
+++ b/src/python/review/reviewers/utils/print_review.py
@@ -121,7 +121,7 @@ class IssueJsonFields(Enum):
INFLUENCE_ON_PENALTY = 'influence_on_penalty'
-def convert_issue_to_json(issue: BaseIssue, influence_on_penalty: int) -> Dict[str, Any]:
+def convert_issue_to_json(issue: BaseIssue, influence_on_penalty: int = 0) -> Dict[str, Any]:
line_text = get_file_line(issue.file_path, issue.line_no)
return {
diff --git a/test/python/evaluation/inspectors/diffs_between_df/test_diifs_between_df.py b/test/python/evaluation/inspectors/diffs_between_df/test_diifs_between_df.py
index 8164134d..b794b0a4 100644
--- a/test/python/evaluation/inspectors/diffs_between_df/test_diifs_between_df.py
+++ b/test/python/evaluation/inspectors/diffs_between_df/test_diifs_between_df.py
@@ -3,7 +3,7 @@
import pytest
from src.python.evaluation.common.pandas_util import get_solutions_df_by_file_path
-from src.python.evaluation.common.util import ColumnName, EvaluationArgument
+from src.python.evaluation.common.util import ColumnName
from src.python.evaluation.inspectors.common.statistics import PenaltyIssue
from src.python.evaluation.inspectors.diffs_between_df import find_diffs
from src.python.review.inspectors.inspector_type import InspectorType
@@ -15,7 +15,7 @@
ColumnName.GRADE.value: [],
ColumnName.DECREASED_GRADE.value: [],
ColumnName.USER.value: 0,
- EvaluationArgument.TRACEBACK.value: {},
+ ColumnName.TRACEBACK.value: {},
ColumnName.PENALTY.value: {},
}
@@ -23,7 +23,7 @@
ColumnName.GRADE.value: [1, 2],
ColumnName.DECREASED_GRADE.value: [],
ColumnName.USER.value: 0,
- EvaluationArgument.TRACEBACK.value: {},
+ ColumnName.TRACEBACK.value: {},
ColumnName.PENALTY.value: {},
}
@@ -55,7 +55,7 @@
ColumnName.GRADE.value: [],
ColumnName.DECREASED_GRADE.value: [],
ColumnName.USER.value: 0,
- EvaluationArgument.TRACEBACK.value: {
+ ColumnName.TRACEBACK.value: {
1: ISSUES,
},
ColumnName.PENALTY.value: {},
@@ -65,7 +65,7 @@
ColumnName.GRADE.value: [2, 3],
ColumnName.DECREASED_GRADE.value: [],
ColumnName.USER.value: 0,
- EvaluationArgument.TRACEBACK.value: {
+ ColumnName.TRACEBACK.value: {
1: ISSUES,
},
ColumnName.PENALTY.value: {},
@@ -75,7 +75,7 @@
ColumnName.GRADE.value: [],
ColumnName.DECREASED_GRADE.value: [2, 3],
ColumnName.USER.value: 0,
- EvaluationArgument.TRACEBACK.value: {},
+ ColumnName.TRACEBACK.value: {},
ColumnName.PENALTY.value: {},
}
diff --git a/whitelist.txt b/whitelist.txt
index e253cce3..6063b302 100644
--- a/whitelist.txt
+++ b/whitelist.txt
@@ -124,7 +124,6 @@ WEBP
SVG
EPS
xaxis
-
preprocessing
num
dataloader
@@ -151,6 +150,7 @@ QodanaDataset
cuda
f1
WANDB
+preprocess
PNG
consts
Measurer