Changes
On November 28, 2024 at 1:16:49 PM UTC, admin:
-
Changed value of field
extra_authors
to[{'extra_author': 'Iser, Markus', 'familyName': 'Iser', 'givenName': 'Markus', 'orcid': ''}]
in Experimental data for the paper "a comprehensive study of k-portfolios of recent sat solvers"
f | 1 | { | f | 1 | { |
2 | "author": "Bach, Jakob", | 2 | "author": "Bach, Jakob", | ||
3 | "author_email": "", | 3 | "author_email": "", | ||
n | n | 4 | "citation": [], | ||
4 | "creator_user_id": "17755db4-395a-4b3b-ac09-e8e3484ca700", | 5 | "creator_user_id": "17755db4-395a-4b3b-ac09-e8e3484ca700", | ||
5 | "doi": "10.35097/1331", | 6 | "doi": "10.35097/1331", | ||
6 | "doi_date_published": "2023", | 7 | "doi_date_published": "2023", | ||
7 | "doi_publisher": "", | 8 | "doi_publisher": "", | ||
8 | "doi_status": "True", | 9 | "doi_status": "True", | ||
9 | "extra_authors": [ | 10 | "extra_authors": [ | ||
10 | { | 11 | { | ||
11 | "extra_author": "Iser, Markus", | 12 | "extra_author": "Iser, Markus", | ||
n | n | 13 | "familyName": "Iser", | ||
14 | "givenName": "Markus", | ||||
12 | "orcid": "" | 15 | "orcid": "" | ||
13 | } | 16 | } | ||
14 | ], | 17 | ], | ||
n | n | 18 | "familyName": "Bach", | ||
19 | "givenName": "Jakob", | ||||
15 | "groups": [], | 20 | "groups": [], | ||
16 | "id": "6079d941-ec81-4518-9d15-da5c777d4790", | 21 | "id": "6079d941-ec81-4518-9d15-da5c777d4790", | ||
17 | "isopen": false, | 22 | "isopen": false, | ||
18 | "license_id": "CC BY 4.0 Attribution", | 23 | "license_id": "CC BY 4.0 Attribution", | ||
19 | "license_title": "CC BY 4.0 Attribution", | 24 | "license_title": "CC BY 4.0 Attribution", | ||
20 | "metadata_created": "2023-08-04T08:50:33.477137", | 25 | "metadata_created": "2023-08-04T08:50:33.477137", | ||
n | 21 | "metadata_modified": "2023-08-04T09:29:05.113312", | n | 26 | "metadata_modified": "2024-11-28T13:16:49.400943", |
22 | "name": "rdr-doi-10-35097-1331", | 27 | "name": "rdr-doi-10-35097-1331", | ||
23 | "notes": "Abstract: These are the experimental data for the | 28 | "notes": "Abstract: These are the experimental data for the | ||
24 | paper\r\n\r\n> Bach, Jakob, Markus Iser, and Klemens B\u00f6hm. \"A | 29 | paper\r\n\r\n> Bach, Jakob, Markus Iser, and Klemens B\u00f6hm. \"A | ||
25 | Comprehensive Study of k-Portfolios of Recent SAT | 30 | Comprehensive Study of k-Portfolios of Recent SAT | ||
26 | Solvers\"\r\n\r\npublished at the conference [*SAT | 31 | Solvers\"\r\n\r\npublished at the conference [*SAT | ||
27 | 2022*](http://satisfiability.org/SAT22/).\r\nYou can find the paper | 32 | 2022*](http://satisfiability.org/SAT22/).\r\nYou can find the paper | ||
28 | [here](https://www.doi.org/10.4230/LIPIcs.SAT.2022.2) and the code | 33 | [here](https://www.doi.org/10.4230/LIPIcs.SAT.2022.2) and the code | ||
29 | [here](https://github.com/Jakob-Bach/Small-Portfolios).\r\nSee the | 34 | [here](https://github.com/Jakob-Bach/Small-Portfolios).\r\nSee the | ||
30 | `README` for details.\r\nTechnicalRemarks: # Experimental Data for the | 35 | `README` for details.\r\nTechnicalRemarks: # Experimental Data for the | ||
31 | Paper \"A Comprehensive Study of k-Portfolios of Recent SAT | 36 | Paper \"A Comprehensive Study of k-Portfolios of Recent SAT | ||
32 | Solvers\"\r\n\r\nThese are the experimental data for the | 37 | Solvers\"\r\n\r\nThese are the experimental data for the | ||
33 | paper\r\n\r\n> Bach, Jakob, Markus Iser, and Klemens B\u00f6hm. \"A | 38 | paper\r\n\r\n> Bach, Jakob, Markus Iser, and Klemens B\u00f6hm. \"A | ||
34 | Comprehensive Study of k-Portfolios of Recent SAT | 39 | Comprehensive Study of k-Portfolios of Recent SAT | ||
35 | Solvers\"\r\n\r\naccepted at the conference [*SAT | 40 | Solvers\"\r\n\r\naccepted at the conference [*SAT | ||
36 | 2022*](http://satisfiability.org/SAT22/).\r\nCheck our [GitHub | 41 | 2022*](http://satisfiability.org/SAT22/).\r\nCheck our [GitHub | ||
37 | repository](https://github.com/Jakob-Bach/Small-Portfolios) for the | 42 | repository](https://github.com/Jakob-Bach/Small-Portfolios) for the | ||
38 | code and instructions to reproduce the experiments.\r\nThe data were | 43 | code and instructions to reproduce the experiments.\r\nThe data were | ||
39 | obtained on a server with an `AMD EPYC 7551` | 44 | obtained on a server with an `AMD EPYC 7551` | ||
40 | [CPU](https://www.amd.com/en/products/cpu/amd-epyc-7551) (32 physical | 45 | [CPU](https://www.amd.com/en/products/cpu/amd-epyc-7551) (32 physical | ||
41 | cores, base clock of 2.0 GHz) and 128 GB RAM.\r\nThe Python version | 46 | cores, base clock of 2.0 GHz) and 128 GB RAM.\r\nThe Python version | ||
42 | was `3.8.10`.\r\nWith this configuration, the experimental pipeline | 47 | was `3.8.10`.\r\nWith this configuration, the experimental pipeline | ||
43 | (`run_experiments.py`) took about 25 h.\r\n\r\nThe commit hash for the | 48 | (`run_experiments.py`) took about 25 h.\r\n\r\nThe commit hash for the | ||
44 | last run of the experimental pipeline (`run_experiments.py`) is | 49 | last run of the experimental pipeline (`run_experiments.py`) is | ||
45 | mall-Portfolios/tree/d402353e7f5804d3b693c3057f203a2c719c4098).\r\nThe | 50 | mall-Portfolios/tree/d402353e7f5804d3b693c3057f203a2c719c4098).\r\nThe | ||
46 | commit hash for the last run of the evaluation pipeline | 51 | commit hash for the last run of the evaluation pipeline | ||
47 | (`run_evaluation.py`) is | 52 | (`run_evaluation.py`) is | ||
48 | all-Portfolios/tree/5ba0468088fc0ce0ce3d2f04229946c1df83fa9d).\r\nBoth | 53 | all-Portfolios/tree/5ba0468088fc0ce0ce3d2f04229946c1df83fa9d).\r\nBoth | ||
49 | commits are also tagged.\r\n\r\nIn the following, we | 54 | commits are also tagged.\r\n\r\nIn the following, we | ||
50 | describe:\r\n\r\n- how to use the experimental data\r\n- the | 55 | describe:\r\n\r\n- how to use the experimental data\r\n- the | ||
51 | structure/content of each data file\r\n\r\n## Using the | 56 | structure/content of each data file\r\n\r\n## Using the | ||
52 | Data\r\n\r\nMost of the experimental pipeline's input and output files | 57 | Data\r\n\r\nMost of the experimental pipeline's input and output files | ||
53 | are plain CSVs.\r\nYou can easily read in any of the CSVs with | 58 | are plain CSVs.\r\nYou can easily read in any of the CSVs with | ||
54 | `pandas` if you are using Python:\r\n\r\n```python\r\nimport pandas as | 59 | `pandas` if you are using Python:\r\n\r\n```python\r\nimport pandas as | ||
55 | pd\r\n\r\nresults = | 60 | pd\r\n\r\nresults = | ||
56 | pd.read_csv('<path>/search_results.csv')\r\n```\r\n\r\nAll CSVs were | 61 | pd.read_csv('<path>/search_results.csv')\r\n```\r\n\r\nAll CSVs were | ||
57 | written with `<DataFrame>.to_csv(<path>, index=False)`, i.e., they | 62 | written with `<DataFrame>.to_csv(<path>, index=False)`, i.e., they | ||
58 | follow `pandas`' defaults for CSVs,\r\nso it is unnecessary to pass | 63 | follow `pandas`' defaults for CSVs,\r\nso it is unnecessary to pass | ||
59 | further parameters when reading them in with `pandas`.\r\nThe CSVs | 64 | further parameters when reading them in with `pandas`.\r\nThe CSVs | ||
60 | mostly contain plain numbers and strings;\r\nthe latter are only | 65 | mostly contain plain numbers and strings;\r\nthe latter are only | ||
61 | quoted if necessary, e.g., if they contain commas.\r\nHowever, the | 66 | quoted if necessary, e.g., if they contain commas.\r\nHowever, the | ||
62 | column `solvers` in `search_results.csv` contains lists of solver | 67 | column `solvers` in `search_results.csv` contains lists of solver | ||
63 | names.\r\nYou can convert this column from string to proper Python | 68 | names.\r\nYou can convert this column from string to proper Python | ||
64 | lists as follows:\r\n\r\n```python\r\nimport | 69 | lists as follows:\r\n\r\n```python\r\nimport | ||
65 | ast\r\n\r\nsearch_results['solvers'] = | 70 | ast\r\n\r\nsearch_results['solvers'] = | ||
66 | search_results['solvers'].apply(ast.literal_eval)\r\n```\r\n\r\n## | 71 | search_results['solvers'].apply(ast.literal_eval)\r\n```\r\n\r\n## | ||
67 | `*.db` files and corresponding `*.csv` files\r\n\r\nRaw | 72 | `*.db` files and corresponding `*.csv` files\r\n\r\nRaw | ||
68 | instance-feature databases and solver-runtime databases from | 73 | instance-feature databases and solver-runtime databases from | ||
69 | [GBD](https://gbd.iti.kit.edu),\r\nas well as CSV exports of | 74 | [GBD](https://gbd.iti.kit.edu),\r\nas well as CSV exports of | ||
70 | them.\r\nOutputs of the script `prepare_dataset.py`.\r\n\r\n- | 75 | them.\r\nOutputs of the script `prepare_dataset.py`.\r\n\r\n- | ||
71 | `meta`:\r\n Meta-data of SAT instances, e.g., in which competition(s) | 76 | `meta`:\r\n Meta-data of SAT instances, e.g., in which competition(s) | ||
72 | the instances were used.\r\n We only use this information to filter | 77 | the instances were used.\r\n We only use this information to filter | ||
73 | instances for the experimental datasets but not as features for | 78 | instances for the experimental datasets but not as features for | ||
74 | predictions.\r\n- `satzilla`:\r\n Instance features obtained with the | 79 | predictions.\r\n- `satzilla`:\r\n Instance features obtained with the | ||
75 | feature extractor of [*SATzilla | 80 | feature extractor of [*SATzilla | ||
76 | ubc.ca/labs/algorithms/Projects/SATzilla/Report_SAT_features.pdf).\r\n | 81 | ubc.ca/labs/algorithms/Projects/SATzilla/Report_SAT_features.pdf).\r\n | ||
77 | Numeric matrix, apart from the `hash` column that identifies | 82 | Numeric matrix, apart from the `hash` column that identifies | ||
78 | instances.\r\n Each row represents a SAT instance; each column | 83 | instances.\r\n Each row represents a SAT instance; each column | ||
79 | represents a feature (column names are feature names).\r\n The value | 84 | represents a feature (column names are feature names).\r\n The value | ||
80 | `timeout` represents missing values (feature extractor exceeded time | 85 | `timeout` represents missing values (feature extractor exceeded time | ||
81 | or memory limits).\r\n- `sc2020`:\r\n Solver runtimes from the [*SAT | 86 | or memory limits).\r\n- `sc2020`:\r\n Solver runtimes from the [*SAT | ||
82 | Competition 2020*](https://satcompetition.github.io/2020/).\r\n | 87 | Competition 2020*](https://satcompetition.github.io/2020/).\r\n | ||
83 | Numeric matrix, apart from the `hash` column that identifies | 88 | Numeric matrix, apart from the `hash` column that identifies | ||
84 | instances.\r\n Each row represents a SAT instance (not all of them | 89 | instances.\r\n Each row represents a SAT instance (not all of them | ||
85 | are actually from this SAT competition, so we filter instances | 90 | are actually from this SAT competition, so we filter instances | ||
86 | later);\r\n each column represents a solver (column names are solver | 91 | later);\r\n each column represents a solver (column names are solver | ||
87 | names).\r\n The values `failed`, `unknown`, and `timeout` represent | 92 | names).\r\n The values `failed`, `unknown`, and `timeout` represent | ||
88 | missing values (solver did not solve instance within cut-off | 93 | missing values (solver did not solve instance within cut-off | ||
89 | time).\r\n- `sc2021`:\r\n Solver runtimes from the [*SAT Competition | 94 | time).\r\n- `sc2021`:\r\n Solver runtimes from the [*SAT Competition | ||
90 | 2021*](https://satcompetition.github.io/2021/).\r\n Numeric matrix, | 95 | 2021*](https://satcompetition.github.io/2021/).\r\n Numeric matrix, | ||
91 | apart from the `hash` column that identifies instances.\r\n Each row | 96 | apart from the `hash` column that identifies instances.\r\n Each row | ||
92 | represents a SAT instance (not all of them are actually from this SAT | 97 | represents a SAT instance (not all of them are actually from this SAT | ||
93 | competition, so we filter instances later);\r\n each column | 98 | competition, so we filter instances later);\r\n each column | ||
94 | represents a solver (column names are solver names).\r\n The value | 99 | represents a solver (column names are solver names).\r\n The value | ||
95 | `timeout` represents missing values (solver did not solve instance | 100 | `timeout` represents missing values (solver did not solve instance | ||
96 | within cut-off time).\r\n\r\n## | 101 | within cut-off time).\r\n\r\n## | ||
97 | `sc(2020|2021)_features.csv`\r\n\r\nPre-processed instance-feature | 102 | `sc(2020|2021)_features.csv`\r\n\r\nPre-processed instance-feature | ||
98 | data for the *Main Track* of the SAT Competitions 2020 and | 103 | data for the *Main Track* of the SAT Competitions 2020 and | ||
99 | 2021.\r\nOutput of the script `prepare_dataset.py`; input to the | 104 | 2021.\r\nOutput of the script `prepare_dataset.py`; input to the | ||
100 | script `run_experiments.py`.\r\nNumeric matrix, apart from the `hash` | 105 | script `run_experiments.py`.\r\nNumeric matrix, apart from the `hash` | ||
101 | column that identifies instances\r\n(we do not have any categorical | 106 | column that identifies instances\r\n(we do not have any categorical | ||
102 | features; they would need to be encoded beforehand).\r\nEach row | 107 | features; they would need to be encoded beforehand).\r\nEach row | ||
103 | represents a SAT instance; each column represents a feature (column | 108 | represents a SAT instance; each column represents a feature (column | ||
104 | names are feature names).\r\nHas the same number of rows as the | 109 | names are feature names).\r\nHas the same number of rows as the | ||
105 | corresponding runtime file.\r\nThe empty string represents missing | 110 | corresponding runtime file.\r\nThe empty string represents missing | ||
106 | values caused by the feature extractor exceeding time or memory | 111 | values caused by the feature extractor exceeding time or memory | ||
107 | limits;\r\nthese missing values are handled (imputed) in the | 112 | limits;\r\nthese missing values are handled (imputed) in the | ||
108 | prediction pipeline.\r\n\r\n## | 113 | prediction pipeline.\r\n\r\n## | ||
109 | `sc(2020|2021)_runtimes.csv`\r\n\r\nPre-processed solver-runtime data | 114 | `sc(2020|2021)_runtimes.csv`\r\n\r\nPre-processed solver-runtime data | ||
110 | from the *Main Track* of the SAT Competitions 2020 and 2021.\r\nOutput | 115 | from the *Main Track* of the SAT Competitions 2020 and 2021.\r\nOutput | ||
111 | of the script `prepare_dataset.py`; input to the script | 116 | of the script `prepare_dataset.py`; input to the script | ||
112 | `run_experiments.py`.\r\nNumeric matrix, apart from the `hash` column | 117 | `run_experiments.py`.\r\nNumeric matrix, apart from the `hash` column | ||
113 | that identifies instances.\r\nEach row represents a SAT instance; each | 118 | that identifies instances.\r\nEach row represents a SAT instance; each | ||
114 | column represents a solver (column names are solver names).\r\nHas the | 119 | column represents a solver (column names are solver names).\r\nHas the | ||
115 | same number of rows as the corresponding feature file.\r\nMissing | 120 | same number of rows as the corresponding feature file.\r\nMissing | ||
116 | values were replaced with the double cut-off time according to PAR-2 | 121 | values were replaced with the double cut-off time according to PAR-2 | ||
117 | scoring (= 10000).\r\n\r\n## `search_results.csv`\r\n\r\nResults of | 122 | scoring (= 10000).\r\n\r\n## `search_results.csv`\r\n\r\nResults of | ||
118 | portfolio search, e.g., portfolios, train/test objective values, and | 123 | portfolio search, e.g., portfolios, train/test objective values, and | ||
119 | search times.\r\nOutput of the script `run_experiments.py`; input to | 124 | search times.\r\nOutput of the script `run_experiments.py`; input to | ||
120 | the script `run_evaluation.py`.\r\n\r\n- `solvers` (string, but | 125 | the script `run_evaluation.py`.\r\n\r\n- `solvers` (string, but | ||
121 | actually a list of strings):\r\n List of the names of the solvers | 126 | actually a list of strings):\r\n List of the names of the solvers | ||
122 | forming the portfolio.\r\n The solver names are column names in | 127 | forming the portfolio.\r\n The solver names are column names in | ||
123 | `sc(2020|2021)_runtimes.csv`.\r\n- `train_objective` (float):\r\n | 128 | `sc(2020|2021)_runtimes.csv`.\r\n- `train_objective` (float):\r\n | ||
124 | Objective value of a solution (= portfolio) to the | 129 | Objective value of a solution (= portfolio) to the | ||
125 | `K-Portfolio-Problem`,\r\n using the SAT instances from the training | 130 | `K-Portfolio-Problem`,\r\n using the SAT instances from the training | ||
126 | data.\r\n The objective is defined as the PAR-2 score of the | 131 | data.\r\n The objective is defined as the PAR-2 score of the | ||
127 | portfolio's virtual best solver (VBS).\r\n- `test_objective` | 132 | portfolio's virtual best solver (VBS).\r\n- `test_objective` | ||
128 | (float):\r\n Objective value of the `K-Portfolio-Problem` for the SAT | 133 | (float):\r\n Objective value of the `K-Portfolio-Problem` for the SAT | ||
129 | instances from the test data,\r\n i.e., take the solvers of the | 134 | instances from the test data,\r\n i.e., take the solvers of the | ||
130 | portfolio determined on the training data,\r\n and compute their VBS | 135 | portfolio determined on the training data,\r\n and compute their VBS | ||
131 | on the test instances (without running portfolio search again).\r\n- | 136 | on the test instances (without running portfolio search again).\r\n- | ||
132 | `(train|test)_portfolio_vws` (float):\r\n PAR-2 score of the virtual | 137 | `(train|test)_portfolio_vws` (float):\r\n PAR-2 score of the virtual | ||
133 | worst solver (VWS) formed from the portfolio,\r\n i.e., select the | 138 | worst solver (VWS) formed from the portfolio,\r\n i.e., select the | ||
134 | worst (in terms of PAR-2 score) solver from the portfolio for each | 139 | worst (in terms of PAR-2 score) solver from the portfolio for each | ||
135 | instance.\r\n Might be used for comparison purposes; we do not use it | 140 | instance.\r\n Might be used for comparison purposes; we do not use it | ||
136 | in our evaluation.\r\n Bounds the objective value for portfolios with | 141 | in our evaluation.\r\n Bounds the objective value for portfolios with | ||
137 | instance-specific solver selection.\r\n- `(train|test)_portfolio_sbs` | 142 | instance-specific solver selection.\r\n- `(train|test)_portfolio_sbs` | ||
138 | (float):\r\n PAR-2 score of the single best solver (SBS) from the | 143 | (float):\r\n PAR-2 score of the single best solver (SBS) from the | ||
139 | portfolio,\r\n i.e., the best individual solver contained in the | 144 | portfolio,\r\n i.e., the best individual solver contained in the | ||
140 | portfolio.\r\n Might be used for comparison purposes; we use it in | 145 | portfolio.\r\n Might be used for comparison purposes; we use it in | ||
141 | Figures 4 and 5 as a baseline.\r\n- `(train|test)_portfolio_sws` | 146 | Figures 4 and 5 as a baseline.\r\n- `(train|test)_portfolio_sws` | ||
142 | (float):\r\n PAR-2 score of the single worst solver (SWS) from the | 147 | (float):\r\n PAR-2 score of the single worst solver (SWS) from the | ||
143 | portfolio,\r\n i.e., the worst individual solver contained in the | 148 | portfolio,\r\n i.e., the worst individual solver contained in the | ||
144 | portfolio.\r\n Might be used for comparison purposes; we do not use | 149 | portfolio.\r\n Might be used for comparison purposes; we do not use | ||
145 | it in our evaluation.\r\n- `(train|test)_global_sws` (float):\r\n | 150 | it in our evaluation.\r\n- `(train|test)_global_sws` (float):\r\n | ||
146 | PAR-2 score of the single worst solver (SWS) from all solvers | 151 | PAR-2 score of the single worst solver (SWS) from all solvers | ||
147 | (independent from current portfolio),\r\n i.e., the globally worst | 152 | (independent from current portfolio),\r\n i.e., the globally worst | ||
148 | individual solver.\r\n Might be used for comparison purposes; we use | 153 | individual solver.\r\n Might be used for comparison purposes; we use | ||
149 | it in Figures 1 and 2 for the submodularity-based upper bound.\r\n- | 154 | it in Figures 1 and 2 for the submodularity-based upper bound.\r\n- | ||
150 | `search_time` (float):\r\n Runtime (in seconds) of the portfolio | 155 | `search_time` (float):\r\n Runtime (in seconds) of the portfolio | ||
151 | search (on the particular dataset and cross-validation fold,\r\n with | 156 | search (on the particular dataset and cross-validation fold,\r\n with | ||
152 | the particular portfolio-search approach).\r\n- `search_id` (int):\r\n | 157 | the particular portfolio-search approach).\r\n- `search_id` (int):\r\n | ||
153 | Identifier denoting combinations of dataset, cross-validation fold, | 158 | Identifier denoting combinations of dataset, cross-validation fold, | ||
154 | and portfolio-search approach.\r\n The experimental pipeline | 159 | and portfolio-search approach.\r\n The experimental pipeline | ||
155 | parallelizes these tasks.\r\n Each `search_id` might be associated | 160 | parallelizes these tasks.\r\n Each `search_id` might be associated | ||
156 | with multiple portfolios;\r\n combining `search_id` and `solution_id` | 161 | with multiple portfolios;\r\n combining `search_id` and `solution_id` | ||
157 | allows joining `search_results` and `prediction_results`.\r\n- | 162 | allows joining `search_results` and `prediction_results`.\r\n- | ||
158 | `solution_id` (int):\r\n Identifier to distinguish between multiple | 163 | `solution_id` (int):\r\n Identifier to distinguish between multiple | ||
159 | portfolios found with\r\n a particular portfolio-search approach on a | 164 | portfolios found with\r\n a particular portfolio-search approach on a | ||
160 | particular dataset and cross-validation fold.\r\n Apart from | 165 | particular dataset and cross-validation fold.\r\n Apart from | ||
161 | `mip_search` (the *optimal solution*), all search approaches yield | 166 | `mip_search` (the *optimal solution*), all search approaches yield | ||
162 | multiple portfolios.\r\n- `fold_id` (int in `{0, 1, 2, 3, 4}`):\r\n | 167 | multiple portfolios.\r\n- `fold_id` (int in `{0, 1, 2, 3, 4}`):\r\n | ||
163 | Index of the cross-validation fold.\r\n- `problem` (string, 2 | 168 | Index of the cross-validation fold.\r\n- `problem` (string, 2 | ||
164 | different values):\r\n Dataset name (in our experiments: `SC2020` or | 169 | different values):\r\n Dataset name (in our experiments: `SC2020` or | ||
165 | `SC2021`).\r\n- `algorithm` (string, 4 different values):\r\n Search | 170 | `SC2021`).\r\n- `algorithm` (string, 4 different values):\r\n Search | ||
166 | approach used to determine portfolios\r\n (in our experiments: | 171 | approach used to determine portfolios\r\n (in our experiments: | ||
167 | `beam_search`, `kbest_search`, `mip_search`, or `random_search`).\r\n- | 172 | `beam_search`, `kbest_search`, `mip_search`, or `random_search`).\r\n- | ||
168 | `k` (int in `[1, 48]`):\r\n Desired number of solvers in the | 173 | `k` (int in `[1, 48]`):\r\n Desired number of solvers in the | ||
169 | portfolio, an input parameter to portfolio search.\r\n The actual | 174 | portfolio, an input parameter to portfolio search.\r\n The actual | ||
170 | number of solvers in column `solvers` might differ,\r\n as *beam | 175 | number of solvers in column `solvers` might differ,\r\n as *beam | ||
171 | search* and *k-best* are only run with the maximal `k` for each | 176 | search* and *k-best* are only run with the maximal `k` for each | ||
172 | dataset,\r\n but also yield all smaller portfolios (intermediate | 177 | dataset,\r\n but also yield all smaller portfolios (intermediate | ||
173 | results).\r\n- `w` (int in `[1, 100]`, but stored as float):\r\n Beam | 178 | results).\r\n- `w` (int in `[1, 100]`, but stored as float):\r\n Beam | ||
174 | width if *beam search* was used, or number of random samples if | 179 | width if *beam search* was used, or number of random samples if | ||
175 | *random search* was used.\r\n Missing value (represented as an empty | 180 | *random search* was used.\r\n Missing value (represented as an empty | ||
176 | string) for the other two search approaches.\r\n Input parameter to | 181 | string) for the other two search approaches.\r\n Input parameter to | ||
177 | portfolio search.\r\n\r\n## `prediction_results.csv`\r\n\r\nResults of | 182 | portfolio search.\r\n\r\n## `prediction_results.csv`\r\n\r\nResults of | ||
178 | predictions with portfolios, e.g., train/test prediction performance, | 183 | predictions with portfolios, e.g., train/test prediction performance, | ||
179 | train/test objective values, and feature importance.\r\nOutput of the | 184 | train/test objective values, and feature importance.\r\nOutput of the | ||
180 | script `run_experiments.py`; input to the script | 185 | script `run_experiments.py`; input to the script | ||
181 | `run_evaluation.py`.\r\n\r\n- `model` (string, 2 different | 186 | `run_evaluation.py`.\r\n\r\n- `model` (string, 2 different | ||
182 | values):\r\n Name of the prediction model (in our experiments: | 187 | values):\r\n Name of the prediction model (in our experiments: | ||
183 | `Random Forest` and `XGBoost`).\r\n Each prediction model is trained | 188 | `Random Forest` and `XGBoost`).\r\n Each prediction model is trained | ||
184 | for each portfolio from the search\r\n (thus, `prediction_results` | 189 | for each portfolio from the search\r\n (thus, `prediction_results` | ||
185 | has twice the number of rows as `search_results`).\r\n- `pred_time` | 190 | has twice the number of rows as `search_results`).\r\n- `pred_time` | ||
186 | (float):\r\n Runtime (in seconds) for training the prediction model | 191 | (float):\r\n Runtime (in seconds) for training the prediction model | ||
187 | (for one portfolio) on the training data,\r\n and making predictions | 192 | (for one portfolio) on the training data,\r\n and making predictions | ||
188 | on training data as well as test data.\r\n- `(train|test)_pred_mcc` | 193 | on training data as well as test data.\r\n- `(train|test)_pred_mcc` | ||
189 | (float in `[-1, 1]`):\r\n Prediction performance in terms of Matthews | 194 | (float in `[-1, 1]`):\r\n Prediction performance in terms of Matthews | ||
190 | Correlation Coefficient for predicting\r\n the best solver from the | 195 | Correlation Coefficient for predicting\r\n the best solver from the | ||
191 | portfolio for each instance.\r\n- `(train|test)_pred_objective` | 196 | portfolio for each instance.\r\n- `(train|test)_pred_objective` | ||
192 | (float):\r\n PAR-2 score of the (instance-specific) solver | 197 | (float):\r\n PAR-2 score of the (instance-specific) solver | ||
193 | recommendations made by the prediction model.\r\n- | 198 | recommendations made by the prediction model.\r\n- | ||
194 | `imp.<feature_name>` (float in `[0, 1]`):\r\n Feature importances | 199 | `imp.<feature_name>` (float in `[0, 1]`):\r\n Feature importances | ||
195 | extracted from the prediction model after training.\r\n Importances | 200 | extracted from the prediction model after training.\r\n Importances | ||
196 | are normalized and should sum up to one for each row. \r\n Missing | 201 | are normalized and should sum up to one for each row. \r\n Missing | ||
197 | values (represented as empty strings) occur if no prediction model was | 202 | values (represented as empty strings) occur if no prediction model was | ||
198 | trained\r\n since the prediction target only had one class, e.g.,\r\n | 203 | trained\r\n since the prediction target only had one class, e.g.,\r\n | ||
199 | portfolio had size one or always the same solver (out of multiple | 204 | portfolio had size one or always the same solver (out of multiple | ||
200 | solvers) was best.\r\n- `search_id` (int), `solution_id` (int):\r\n | 205 | solvers) was best.\r\n- `search_id` (int), `solution_id` (int):\r\n | ||
201 | Same as in `search_results.csv`, can be used for joining the | 206 | Same as in `search_results.csv`, can be used for joining the | ||
202 | results.\r\n\r\n## `Evaluation_console_output.txt`\r\n\r\nOutput of | 207 | results.\r\n\r\n## `Evaluation_console_output.txt`\r\n\r\nOutput of | ||
203 | the script `run_evaluation.py`, manually copied from the console to a | 208 | the script `run_evaluation.py`, manually copied from the console to a | ||
204 | file.", | 209 | file.", | ||
205 | "num_resources": 0, | 210 | "num_resources": 0, | ||
206 | "num_tags": 5, | 211 | "num_tags": 5, | ||
207 | "orcid": "0000-0003-0301-2798", | 212 | "orcid": "0000-0003-0301-2798", | ||
208 | "organization": { | 213 | "organization": { | ||
209 | "approval_status": "approved", | 214 | "approval_status": "approved", | ||
210 | "created": "2023-01-12T13:30:23.238233", | 215 | "created": "2023-01-12T13:30:23.238233", | ||
211 | "description": "RADAR (Research Data Repository) is a | 216 | "description": "RADAR (Research Data Repository) is a | ||
212 | cross-disciplinary repository for archiving and publishing research | 217 | cross-disciplinary repository for archiving and publishing research | ||
213 | data from completed scientific studies and projects. The focus is on | 218 | data from completed scientific studies and projects. The focus is on | ||
214 | research data from subjects that do not yet have their own | 219 | research data from subjects that do not yet have their own | ||
215 | discipline-specific infrastructures for research data management. ", | 220 | discipline-specific infrastructures for research data management. ", | ||
216 | "id": "013c89a9-383c-4200-8baa-0f78bf1d91f9", | 221 | "id": "013c89a9-383c-4200-8baa-0f78bf1d91f9", | ||
217 | "image_url": "radar-logo.svg", | 222 | "image_url": "radar-logo.svg", | ||
218 | "is_organization": true, | 223 | "is_organization": true, | ||
219 | "name": "radar", | 224 | "name": "radar", | ||
220 | "state": "active", | 225 | "state": "active", | ||
221 | "title": "RADAR", | 226 | "title": "RADAR", | ||
222 | "type": "organization" | 227 | "type": "organization" | ||
223 | }, | 228 | }, | ||
224 | "owner_org": "013c89a9-383c-4200-8baa-0f78bf1d91f9", | 229 | "owner_org": "013c89a9-383c-4200-8baa-0f78bf1d91f9", | ||
225 | "private": false, | 230 | "private": false, | ||
226 | "production_year": "2022", | 231 | "production_year": "2022", | ||
227 | "publication_year": "2023", | 232 | "publication_year": "2023", | ||
228 | "publishers": [ | 233 | "publishers": [ | ||
229 | { | 234 | { | ||
230 | "publisher": "Karlsruhe Institute of Technology" | 235 | "publisher": "Karlsruhe Institute of Technology" | ||
231 | } | 236 | } | ||
232 | ], | 237 | ], | ||
t | t | 238 | "related_identifiers": [ | ||
239 | { | ||||
240 | "identifier": | ||||
241 | "https://publikationen.bibliothek.kit.edu/1000146629", | ||||
242 | "identifier_type": "URL", | ||||
243 | "relation_type": "IsIdenticalTo" | ||||
244 | } | ||||
245 | ], | ||||
233 | "relationships_as_object": [], | 246 | "relationships_as_object": [], | ||
234 | "relationships_as_subject": [], | 247 | "relationships_as_subject": [], | ||
235 | "repository_name": "RADAR (Research Data Repository)", | 248 | "repository_name": "RADAR (Research Data Repository)", | ||
236 | "resources": [], | 249 | "resources": [], | ||
237 | "services_used_list": "", | 250 | "services_used_list": "", | ||
238 | "source_metadata_created": "2023", | 251 | "source_metadata_created": "2023", | ||
239 | "source_metadata_modified": "", | 252 | "source_metadata_modified": "", | ||
240 | "state": "active", | 253 | "state": "active", | ||
241 | "subject_areas": [ | 254 | "subject_areas": [ | ||
242 | { | 255 | { | ||
243 | "subject_area_additional": "", | 256 | "subject_area_additional": "", | ||
244 | "subject_area_name": "Computer Science" | 257 | "subject_area_name": "Computer Science" | ||
245 | } | 258 | } | ||
246 | ], | 259 | ], | ||
247 | "tags": [ | 260 | "tags": [ | ||
248 | { | 261 | { | ||
249 | "display_name": "integer programming", | 262 | "display_name": "integer programming", | ||
250 | "id": "a9b29939-84d1-4165-bcd1-4896b09c1afb", | 263 | "id": "a9b29939-84d1-4165-bcd1-4896b09c1afb", | ||
251 | "name": "integer programming", | 264 | "name": "integer programming", | ||
252 | "state": "active", | 265 | "state": "active", | ||
253 | "vocabulary_id": null | 266 | "vocabulary_id": null | ||
254 | }, | 267 | }, | ||
255 | { | 268 | { | ||
256 | "display_name": "machine learning", | 269 | "display_name": "machine learning", | ||
257 | "id": "9e42784b-6ee7-47e8-a69a-28b8c510212b", | 270 | "id": "9e42784b-6ee7-47e8-a69a-28b8c510212b", | ||
258 | "name": "machine learning", | 271 | "name": "machine learning", | ||
259 | "state": "active", | 272 | "state": "active", | ||
260 | "vocabulary_id": null | 273 | "vocabulary_id": null | ||
261 | }, | 274 | }, | ||
262 | { | 275 | { | ||
263 | "display_name": "propositional satisfiability", | 276 | "display_name": "propositional satisfiability", | ||
264 | "id": "7839b968-6066-4ad6-b012-cc92d0dcfa9c", | 277 | "id": "7839b968-6066-4ad6-b012-cc92d0dcfa9c", | ||
265 | "name": "propositional satisfiability", | 278 | "name": "propositional satisfiability", | ||
266 | "state": "active", | 279 | "state": "active", | ||
267 | "vocabulary_id": null | 280 | "vocabulary_id": null | ||
268 | }, | 281 | }, | ||
269 | { | 282 | { | ||
270 | "display_name": "runtime prediction", | 283 | "display_name": "runtime prediction", | ||
271 | "id": "6dc3fc02-ea86-4384-a965-be4a6058c719", | 284 | "id": "6dc3fc02-ea86-4384-a965-be4a6058c719", | ||
272 | "name": "runtime prediction", | 285 | "name": "runtime prediction", | ||
273 | "state": "active", | 286 | "state": "active", | ||
274 | "vocabulary_id": null | 287 | "vocabulary_id": null | ||
275 | }, | 288 | }, | ||
276 | { | 289 | { | ||
277 | "display_name": "solver portfolios", | 290 | "display_name": "solver portfolios", | ||
278 | "id": "0f56bdf0-e6ba-43e2-b619-abf9e1c86c88", | 291 | "id": "0f56bdf0-e6ba-43e2-b619-abf9e1c86c88", | ||
279 | "name": "solver portfolios", | 292 | "name": "solver portfolios", | ||
280 | "state": "active", | 293 | "state": "active", | ||
281 | "vocabulary_id": null | 294 | "vocabulary_id": null | ||
282 | } | 295 | } | ||
283 | ], | 296 | ], | ||
284 | "title": "Experimental data for the paper \"a comprehensive study of | 297 | "title": "Experimental data for the paper \"a comprehensive study of | ||
285 | k-portfolios of recent sat solvers\"", | 298 | k-portfolios of recent sat solvers\"", | ||
286 | "type": "vdataset", | 299 | "type": "vdataset", | ||
287 | "url": "https://doi.org/10.35097/1331" | 300 | "url": "https://doi.org/10.35097/1331" | ||
288 | } | 301 | } |