Changes
On August 4, 2023 at 9:04:11 AM UTC, admin:
-
No fields were updated. See the metadata diff for more details.
f | 1 | { | f | 1 | { |
2 | "author": "Bach, Jakob", | 2 | "author": "Bach, Jakob", | ||
3 | "author_email": "", | 3 | "author_email": "", | ||
4 | "creator_user_id": "17755db4-395a-4b3b-ac09-e8e3484ca700", | 4 | "creator_user_id": "17755db4-395a-4b3b-ac09-e8e3484ca700", | ||
5 | "doi": "10.35097/1331", | 5 | "doi": "10.35097/1331", | ||
6 | "doi_date_published": "2023", | 6 | "doi_date_published": "2023", | ||
7 | "doi_publisher": "", | 7 | "doi_publisher": "", | ||
8 | "doi_status": "True", | 8 | "doi_status": "True", | ||
9 | "extra_authors": [ | 9 | "extra_authors": [ | ||
10 | { | 10 | { | ||
11 | "extra_author": "Iser, Markus", | 11 | "extra_author": "Iser, Markus", | ||
12 | "orcid": "" | 12 | "orcid": "" | ||
13 | } | 13 | } | ||
14 | ], | 14 | ], | ||
15 | "groups": [], | 15 | "groups": [], | ||
16 | "id": "6079d941-ec81-4518-9d15-da5c777d4790", | 16 | "id": "6079d941-ec81-4518-9d15-da5c777d4790", | ||
17 | "isopen": false, | 17 | "isopen": false, | ||
18 | "license_id": "CC BY 4.0 Attribution", | 18 | "license_id": "CC BY 4.0 Attribution", | ||
19 | "license_title": "CC BY 4.0 Attribution", | 19 | "license_title": "CC BY 4.0 Attribution", | ||
20 | "metadata_created": "2023-08-04T08:50:33.477137", | 20 | "metadata_created": "2023-08-04T08:50:33.477137", | ||
t | 21 | "metadata_modified": "2023-08-04T08:53:29.964090", | t | 21 | "metadata_modified": "2023-08-04T09:04:11.550672", |
22 | "name": "rdr-doi-10-35097-1331", | 22 | "name": "rdr-doi-10-35097-1331", | ||
23 | "notes": "Abstract: These are the experimental data for the | 23 | "notes": "Abstract: These are the experimental data for the | ||
24 | paper\r\n\r\n> Bach, Jakob, Markus Iser, and Klemens B\u00f6hm. \"A | 24 | paper\r\n\r\n> Bach, Jakob, Markus Iser, and Klemens B\u00f6hm. \"A | ||
25 | Comprehensive Study of k-Portfolios of Recent SAT | 25 | Comprehensive Study of k-Portfolios of Recent SAT | ||
26 | Solvers\"\r\n\r\npublished at the conference [*SAT | 26 | Solvers\"\r\n\r\npublished at the conference [*SAT | ||
27 | 2022*](http://satisfiability.org/SAT22/).\r\nYou can find the paper | 27 | 2022*](http://satisfiability.org/SAT22/).\r\nYou can find the paper | ||
28 | [here](https://www.doi.org/10.4230/LIPIcs.SAT.2022.2) and the code | 28 | [here](https://www.doi.org/10.4230/LIPIcs.SAT.2022.2) and the code | ||
29 | [here](https://github.com/Jakob-Bach/Small-Portfolios).\r\nSee the | 29 | [here](https://github.com/Jakob-Bach/Small-Portfolios).\r\nSee the | ||
30 | `README` for details.\r\nTechnicalRemarks: # Experimental Data for the | 30 | `README` for details.\r\nTechnicalRemarks: # Experimental Data for the | ||
31 | Paper \"A Comprehensive Study of k-Portfolios of Recent SAT | 31 | Paper \"A Comprehensive Study of k-Portfolios of Recent SAT | ||
32 | Solvers\"\r\n\r\nThese are the experimental data for the | 32 | Solvers\"\r\n\r\nThese are the experimental data for the | ||
33 | paper\r\n\r\n> Bach, Jakob, Markus Iser, and Klemens B\u00f6hm. \"A | 33 | paper\r\n\r\n> Bach, Jakob, Markus Iser, and Klemens B\u00f6hm. \"A | ||
34 | Comprehensive Study of k-Portfolios of Recent SAT | 34 | Comprehensive Study of k-Portfolios of Recent SAT | ||
35 | Solvers\"\r\n\r\naccepted at the conference [*SAT | 35 | Solvers\"\r\n\r\naccepted at the conference [*SAT | ||
36 | 2022*](http://satisfiability.org/SAT22/).\r\nCheck our [GitHub | 36 | 2022*](http://satisfiability.org/SAT22/).\r\nCheck our [GitHub | ||
37 | repository](https://github.com/Jakob-Bach/Small-Portfolios) for the | 37 | repository](https://github.com/Jakob-Bach/Small-Portfolios) for the | ||
38 | code and instructions to reproduce the experiments.\r\nThe data were | 38 | code and instructions to reproduce the experiments.\r\nThe data were | ||
39 | obtained on a server with an `AMD EPYC 7551` | 39 | obtained on a server with an `AMD EPYC 7551` | ||
40 | [CPU](https://www.amd.com/en/products/cpu/amd-epyc-7551) (32 physical | 40 | [CPU](https://www.amd.com/en/products/cpu/amd-epyc-7551) (32 physical | ||
41 | cores, base clock of 2.0 GHz) and 128 GB RAM.\r\nThe Python version | 41 | cores, base clock of 2.0 GHz) and 128 GB RAM.\r\nThe Python version | ||
42 | was `3.8.10`.\r\nWith this configuration, the experimental pipeline | 42 | was `3.8.10`.\r\nWith this configuration, the experimental pipeline | ||
43 | (`run_experiments.py`) took about 25 h.\r\n\r\nThe commit hash for the | 43 | (`run_experiments.py`) took about 25 h.\r\n\r\nThe commit hash for the | ||
44 | last run of the experimental pipeline (`run_experiments.py`) is | 44 | last run of the experimental pipeline (`run_experiments.py`) is | ||
45 | mall-Portfolios/tree/d402353e7f5804d3b693c3057f203a2c719c4098).\r\nThe | 45 | mall-Portfolios/tree/d402353e7f5804d3b693c3057f203a2c719c4098).\r\nThe | ||
46 | commit hash for the last run of the evaluation pipeline | 46 | commit hash for the last run of the evaluation pipeline | ||
47 | (`run_evaluation.py`) is | 47 | (`run_evaluation.py`) is | ||
48 | all-Portfolios/tree/5ba0468088fc0ce0ce3d2f04229946c1df83fa9d).\r\nBoth | 48 | all-Portfolios/tree/5ba0468088fc0ce0ce3d2f04229946c1df83fa9d).\r\nBoth | ||
49 | commits are also tagged.\r\n\r\nIn the following, we | 49 | commits are also tagged.\r\n\r\nIn the following, we | ||
50 | describe:\r\n\r\n- how to use the experimental data\r\n- the | 50 | describe:\r\n\r\n- how to use the experimental data\r\n- the | ||
51 | structure/content of each data file\r\n\r\n## Using the | 51 | structure/content of each data file\r\n\r\n## Using the | ||
52 | Data\r\n\r\nMost of the experimental pipeline's input and output files | 52 | Data\r\n\r\nMost of the experimental pipeline's input and output files | ||
53 | are plain CSVs.\r\nYou can easily read in any of the CSVs with | 53 | are plain CSVs.\r\nYou can easily read in any of the CSVs with | ||
54 | `pandas` if you are using Python:\r\n\r\n```python\r\nimport pandas as | 54 | `pandas` if you are using Python:\r\n\r\n```python\r\nimport pandas as | ||
55 | pd\r\n\r\nresults = | 55 | pd\r\n\r\nresults = | ||
56 | pd.read_csv('<path>/search_results.csv')\r\n```\r\n\r\nAll CSVs were | 56 | pd.read_csv('<path>/search_results.csv')\r\n```\r\n\r\nAll CSVs were | ||
57 | written with `<DataFrame>.to_csv(<path>, index=False)`, i.e., they | 57 | written with `<DataFrame>.to_csv(<path>, index=False)`, i.e., they | ||
58 | follow `pandas`' defaults for CSVs,\r\nso it is unnecessary to pass | 58 | follow `pandas`' defaults for CSVs,\r\nso it is unnecessary to pass | ||
59 | further parameters when reading them in with `pandas`.\r\nThe CSVs | 59 | further parameters when reading them in with `pandas`.\r\nThe CSVs | ||
60 | mostly contain plain numbers and strings;\r\nthe latter are only | 60 | mostly contain plain numbers and strings;\r\nthe latter are only | ||
61 | quoted if necessary, e.g., if they contain commas.\r\nHowever, the | 61 | quoted if necessary, e.g., if they contain commas.\r\nHowever, the | ||
62 | column `solvers` in `search_results.csv` contains lists of solver | 62 | column `solvers` in `search_results.csv` contains lists of solver | ||
63 | names.\r\nYou can convert this column from string to proper Python | 63 | names.\r\nYou can convert this column from string to proper Python | ||
64 | lists as follows:\r\n\r\n```python\r\nimport | 64 | lists as follows:\r\n\r\n```python\r\nimport | ||
65 | ast\r\n\r\nsearch_results['solvers'] = | 65 | ast\r\n\r\nsearch_results['solvers'] = | ||
66 | search_results['solvers'].apply(ast.literal_eval)\r\n```\r\n\r\n## | 66 | search_results['solvers'].apply(ast.literal_eval)\r\n```\r\n\r\n## | ||
67 | `*.db` files and corresponding `*.csv` files\r\n\r\nRaw | 67 | `*.db` files and corresponding `*.csv` files\r\n\r\nRaw | ||
68 | instance-feature databases and solver-runtime databases from | 68 | instance-feature databases and solver-runtime databases from | ||
69 | [GBD](https://gbd.iti.kit.edu),\r\nas well as CSV exports of | 69 | [GBD](https://gbd.iti.kit.edu),\r\nas well as CSV exports of | ||
70 | them.\r\nOutputs of the script `prepare_dataset.py`.\r\n\r\n- | 70 | them.\r\nOutputs of the script `prepare_dataset.py`.\r\n\r\n- | ||
71 | `meta`:\r\n Meta-data of SAT instances, e.g., in which competition(s) | 71 | `meta`:\r\n Meta-data of SAT instances, e.g., in which competition(s) | ||
72 | the instances were used.\r\n We only use this information to filter | 72 | the instances were used.\r\n We only use this information to filter | ||
73 | instances for the experimental datasets but not as features for | 73 | instances for the experimental datasets but not as features for | ||
74 | predictions.\r\n- `satzilla`:\r\n Instance features obtained with the | 74 | predictions.\r\n- `satzilla`:\r\n Instance features obtained with the | ||
75 | feature extractor of [*SATzilla | 75 | feature extractor of [*SATzilla | ||
76 | ubc.ca/labs/algorithms/Projects/SATzilla/Report_SAT_features.pdf).\r\n | 76 | ubc.ca/labs/algorithms/Projects/SATzilla/Report_SAT_features.pdf).\r\n | ||
77 | Numeric matrix, apart from the `hash` column that identifies | 77 | Numeric matrix, apart from the `hash` column that identifies | ||
78 | instances.\r\n Each row represents a SAT instance; each column | 78 | instances.\r\n Each row represents a SAT instance; each column | ||
79 | represents a feature (column names are feature names).\r\n The value | 79 | represents a feature (column names are feature names).\r\n The value | ||
80 | `timeout` represents missing values (feature extractor exceeded time | 80 | `timeout` represents missing values (feature extractor exceeded time | ||
81 | or memory limits).\r\n- `sc2020`:\r\n Solver runtimes from the [*SAT | 81 | or memory limits).\r\n- `sc2020`:\r\n Solver runtimes from the [*SAT | ||
82 | Competition 2020*](https://satcompetition.github.io/2020/).\r\n | 82 | Competition 2020*](https://satcompetition.github.io/2020/).\r\n | ||
83 | Numeric matrix, apart from the `hash` column that identifies | 83 | Numeric matrix, apart from the `hash` column that identifies | ||
84 | instances.\r\n Each row represents a SAT instance (not all of them | 84 | instances.\r\n Each row represents a SAT instance (not all of them | ||
85 | are actually from this SAT competition, so we filter instances | 85 | are actually from this SAT competition, so we filter instances | ||
86 | later);\r\n each column represents a solver (column names are solver | 86 | later);\r\n each column represents a solver (column names are solver | ||
87 | names).\r\n The values `failed`, `unknown`, and `timeout` represent | 87 | names).\r\n The values `failed`, `unknown`, and `timeout` represent | ||
88 | missing values (solver did not solve instance within cut-off | 88 | missing values (solver did not solve instance within cut-off | ||
89 | time).\r\n- `sc2021`:\r\n Solver runtimes from the [*SAT Competition | 89 | time).\r\n- `sc2021`:\r\n Solver runtimes from the [*SAT Competition | ||
90 | 2021*](https://satcompetition.github.io/2021/).\r\n Numeric matrix, | 90 | 2021*](https://satcompetition.github.io/2021/).\r\n Numeric matrix, | ||
91 | apart from the `hash` column that identifies instances.\r\n Each row | 91 | apart from the `hash` column that identifies instances.\r\n Each row | ||
92 | represents a SAT instance (not all of them are actually from this SAT | 92 | represents a SAT instance (not all of them are actually from this SAT | ||
93 | competition, so we filter instances later);\r\n each column | 93 | competition, so we filter instances later);\r\n each column | ||
94 | represents a solver (column names are solver names).\r\n The value | 94 | represents a solver (column names are solver names).\r\n The value | ||
95 | `timeout` represents missing values (solver did not solve instance | 95 | `timeout` represents missing values (solver did not solve instance | ||
96 | within cut-off time).\r\n\r\n## | 96 | within cut-off time).\r\n\r\n## | ||
97 | `sc(2020|2021)_features.csv`\r\n\r\nPre-processed instance-feature | 97 | `sc(2020|2021)_features.csv`\r\n\r\nPre-processed instance-feature | ||
98 | data for the *Main Track* of the SAT Competitions 2020 and | 98 | data for the *Main Track* of the SAT Competitions 2020 and | ||
99 | 2021.\r\nOutput of the script `prepare_dataset.py`; input to the | 99 | 2021.\r\nOutput of the script `prepare_dataset.py`; input to the | ||
100 | script `run_experiments.py`.\r\nNumeric matrix, apart from the `hash` | 100 | script `run_experiments.py`.\r\nNumeric matrix, apart from the `hash` | ||
101 | column that identifies instances\r\n(we do not have any categorical | 101 | column that identifies instances\r\n(we do not have any categorical | ||
102 | features; they would need to be encoded beforehand).\r\nEach row | 102 | features; they would need to be encoded beforehand).\r\nEach row | ||
103 | represents a SAT instance; each column represents a feature (column | 103 | represents a SAT instance; each column represents a feature (column | ||
104 | names are feature names).\r\nHas the same number of rows as the | 104 | names are feature names).\r\nHas the same number of rows as the | ||
105 | corresponding runtime file.\r\nThe empty string represents missing | 105 | corresponding runtime file.\r\nThe empty string represents missing | ||
106 | values caused by the feature extractor exceeding time or memory | 106 | values caused by the feature extractor exceeding time or memory | ||
107 | limits;\r\nthese missing values are handled (imputed) in the | 107 | limits;\r\nthese missing values are handled (imputed) in the | ||
108 | prediction pipeline.\r\n\r\n## | 108 | prediction pipeline.\r\n\r\n## | ||
109 | `sc(2020|2021)_runtimes.csv`\r\n\r\nPre-processed solver-runtime data | 109 | `sc(2020|2021)_runtimes.csv`\r\n\r\nPre-processed solver-runtime data | ||
110 | from the *Main Track* of the SAT Competitions 2020 and 2021.\r\nOutput | 110 | from the *Main Track* of the SAT Competitions 2020 and 2021.\r\nOutput | ||
111 | of the script `prepare_dataset.py`; input to the script | 111 | of the script `prepare_dataset.py`; input to the script | ||
112 | `run_experiments.py`.\r\nNumeric matrix, apart from the `hash` column | 112 | `run_experiments.py`.\r\nNumeric matrix, apart from the `hash` column | ||
113 | that identifies instances.\r\nEach row represents a SAT instance; each | 113 | that identifies instances.\r\nEach row represents a SAT instance; each | ||
114 | column represents a solver (column names are solver names).\r\nHas the | 114 | column represents a solver (column names are solver names).\r\nHas the | ||
115 | same number of rows as the corresponding feature file.\r\nMissing | 115 | same number of rows as the corresponding feature file.\r\nMissing | ||
116 | values were replaced with the double cut-off time according to PAR-2 | 116 | values were replaced with the double cut-off time according to PAR-2 | ||
117 | scoring (= 10000).\r\n\r\n## `search_results.csv`\r\n\r\nResults of | 117 | scoring (= 10000).\r\n\r\n## `search_results.csv`\r\n\r\nResults of | ||
118 | portfolio search, e.g., portfolios, train/test objective values, and | 118 | portfolio search, e.g., portfolios, train/test objective values, and | ||
119 | search times.\r\nOutput of the script `run_experiments.py`; input to | 119 | search times.\r\nOutput of the script `run_experiments.py`; input to | ||
120 | the script `run_evaluation.py`.\r\n\r\n- `solvers` (string, but | 120 | the script `run_evaluation.py`.\r\n\r\n- `solvers` (string, but | ||
121 | actually a list of strings):\r\n List of the names of the solvers | 121 | actually a list of strings):\r\n List of the names of the solvers | ||
122 | forming the portfolio.\r\n The solver names are column names in | 122 | forming the portfolio.\r\n The solver names are column names in | ||
123 | `sc(2020|2021)_runtimes.csv`.\r\n- `train_objective` (float):\r\n | 123 | `sc(2020|2021)_runtimes.csv`.\r\n- `train_objective` (float):\r\n | ||
124 | Objective value of a solution (= portfolio) to the | 124 | Objective value of a solution (= portfolio) to the | ||
125 | `K-Portfolio-Problem`,\r\n using the SAT instances from the training | 125 | `K-Portfolio-Problem`,\r\n using the SAT instances from the training | ||
126 | data.\r\n The objective is defined as the PAR-2 score of the | 126 | data.\r\n The objective is defined as the PAR-2 score of the | ||
127 | portfolio's virtual best solver (VBS).\r\n- `test_objective` | 127 | portfolio's virtual best solver (VBS).\r\n- `test_objective` | ||
128 | (float):\r\n Objective value of the `K-Portfolio-Problem` for the SAT | 128 | (float):\r\n Objective value of the `K-Portfolio-Problem` for the SAT | ||
129 | instances from the test data,\r\n i.e., take the solvers of the | 129 | instances from the test data,\r\n i.e., take the solvers of the | ||
130 | portfolio determined on the training data,\r\n and compute their VBS | 130 | portfolio determined on the training data,\r\n and compute their VBS | ||
131 | on the test instances (without running portfolio search again).\r\n- | 131 | on the test instances (without running portfolio search again).\r\n- | ||
132 | `(train|test)_portfolio_vws` (float):\r\n PAR-2 score of the virtual | 132 | `(train|test)_portfolio_vws` (float):\r\n PAR-2 score of the virtual | ||
133 | worst solver (VWS) formed from the portfolio,\r\n i.e., select the | 133 | worst solver (VWS) formed from the portfolio,\r\n i.e., select the | ||
134 | worst (in terms of PAR-2 score) solver from the portfolio for each | 134 | worst (in terms of PAR-2 score) solver from the portfolio for each | ||
135 | instance.\r\n Might be used for comparison purposes; we do not use it | 135 | instance.\r\n Might be used for comparison purposes; we do not use it | ||
136 | in our evaluation.\r\n Bounds the objective value for portfolios with | 136 | in our evaluation.\r\n Bounds the objective value for portfolios with | ||
137 | instance-specific solver selection.\r\n- `(train|test)_portfolio_sbs` | 137 | instance-specific solver selection.\r\n- `(train|test)_portfolio_sbs` | ||
138 | (float):\r\n PAR-2 score of the single best solver (SBS) from the | 138 | (float):\r\n PAR-2 score of the single best solver (SBS) from the | ||
139 | portfolio,\r\n i.e., the best individual solver contained in the | 139 | portfolio,\r\n i.e., the best individual solver contained in the | ||
140 | portfolio.\r\n Might be used for comparison purposes; we use it in | 140 | portfolio.\r\n Might be used for comparison purposes; we use it in | ||
141 | Figures 4 and 5 as a baseline.\r\n- `(train|test)_portfolio_sws` | 141 | Figures 4 and 5 as a baseline.\r\n- `(train|test)_portfolio_sws` | ||
142 | (float):\r\n PAR-2 score of the single worst solver (SWS) from the | 142 | (float):\r\n PAR-2 score of the single worst solver (SWS) from the | ||
143 | portfolio,\r\n i.e., the worst individual solver contained in the | 143 | portfolio,\r\n i.e., the worst individual solver contained in the | ||
144 | portfolio.\r\n Might be used for comparison purposes; we do not use | 144 | portfolio.\r\n Might be used for comparison purposes; we do not use | ||
145 | it in our evaluation.\r\n- `(train|test)_global_sws` (float):\r\n | 145 | it in our evaluation.\r\n- `(train|test)_global_sws` (float):\r\n | ||
146 | PAR-2 score of the single worst solver (SWS) from all solvers | 146 | PAR-2 score of the single worst solver (SWS) from all solvers | ||
147 | (independent from current portfolio),\r\n i.e., the globally worst | 147 | (independent from current portfolio),\r\n i.e., the globally worst | ||
148 | individual solver.\r\n Might be used for comparison purposes; we use | 148 | individual solver.\r\n Might be used for comparison purposes; we use | ||
149 | it in Figures 1 and 2 for the submodularity-based upper bound.\r\n- | 149 | it in Figures 1 and 2 for the submodularity-based upper bound.\r\n- | ||
150 | `search_time` (float):\r\n Runtime (in seconds) of the portfolio | 150 | `search_time` (float):\r\n Runtime (in seconds) of the portfolio | ||
151 | search (on the particular dataset and cross-validation fold,\r\n with | 151 | search (on the particular dataset and cross-validation fold,\r\n with | ||
152 | the particular portfolio-search approach).\r\n- `search_id` (int):\r\n | 152 | the particular portfolio-search approach).\r\n- `search_id` (int):\r\n | ||
153 | Identifier denoting combinations of dataset, cross-validation fold, | 153 | Identifier denoting combinations of dataset, cross-validation fold, | ||
154 | and portfolio-search approach.\r\n The experimental pipeline | 154 | and portfolio-search approach.\r\n The experimental pipeline | ||
155 | parallelizes these tasks.\r\n Each `search_id` might be associated | 155 | parallelizes these tasks.\r\n Each `search_id` might be associated | ||
156 | with multiple portfolios;\r\n combining `search_id` and `solution_id` | 156 | with multiple portfolios;\r\n combining `search_id` and `solution_id` | ||
157 | allows joining `search_results` and `prediction_results`.\r\n- | 157 | allows joining `search_results` and `prediction_results`.\r\n- | ||
158 | `solution_id` (int):\r\n Identifier to distinguish between multiple | 158 | `solution_id` (int):\r\n Identifier to distinguish between multiple | ||
159 | portfolios found with\r\n a particular portfolio-search approach on a | 159 | portfolios found with\r\n a particular portfolio-search approach on a | ||
160 | particular dataset and cross-validation fold.\r\n Apart from | 160 | particular dataset and cross-validation fold.\r\n Apart from | ||
161 | `mip_search` (the *optimal solution*), all search approaches yield | 161 | `mip_search` (the *optimal solution*), all search approaches yield | ||
162 | multiple portfolios.\r\n- `fold_id` (int in `{0, 1, 2, 3, 4}`):\r\n | 162 | multiple portfolios.\r\n- `fold_id` (int in `{0, 1, 2, 3, 4}`):\r\n | ||
163 | Index of the cross-validation fold.\r\n- `problem` (string, 2 | 163 | Index of the cross-validation fold.\r\n- `problem` (string, 2 | ||
164 | different values):\r\n Dataset name (in our experiments: `SC2020` or | 164 | different values):\r\n Dataset name (in our experiments: `SC2020` or | ||
165 | `SC2021`).\r\n- `algorithm` (string, 4 different values):\r\n Search | 165 | `SC2021`).\r\n- `algorithm` (string, 4 different values):\r\n Search | ||
166 | approach used to determine portfolios\r\n (in our experiments: | 166 | approach used to determine portfolios\r\n (in our experiments: | ||
167 | `beam_search`, `kbest_search`, `mip_search`, or `random_search`).\r\n- | 167 | `beam_search`, `kbest_search`, `mip_search`, or `random_search`).\r\n- | ||
168 | `k` (int in `[1, 48]`):\r\n Desired number of solvers in the | 168 | `k` (int in `[1, 48]`):\r\n Desired number of solvers in the | ||
169 | portfolio, an input parameter to portfolio search.\r\n The actual | 169 | portfolio, an input parameter to portfolio search.\r\n The actual | ||
170 | number of solvers in column `solvers` might differ,\r\n as *beam | 170 | number of solvers in column `solvers` might differ,\r\n as *beam | ||
171 | search* and *k-best* are only run with the maximal `k` for each | 171 | search* and *k-best* are only run with the maximal `k` for each | ||
172 | dataset,\r\n but also yield all smaller portfolios (intermediate | 172 | dataset,\r\n but also yield all smaller portfolios (intermediate | ||
173 | results).\r\n- `w` (int in `[1, 100]`, but stored as float):\r\n Beam | 173 | results).\r\n- `w` (int in `[1, 100]`, but stored as float):\r\n Beam | ||
174 | width if *beam search* was used, or number of random samples if | 174 | width if *beam search* was used, or number of random samples if | ||
175 | *random search* was used.\r\n Missing value (represented as an empty | 175 | *random search* was used.\r\n Missing value (represented as an empty | ||
176 | string) for the other two search approaches.\r\n Input parameter to | 176 | string) for the other two search approaches.\r\n Input parameter to | ||
177 | portfolio search.\r\n\r\n## `prediction_results.csv`\r\n\r\nResults of | 177 | portfolio search.\r\n\r\n## `prediction_results.csv`\r\n\r\nResults of | ||
178 | predictions with portfolios, e.g., train/test prediction performance, | 178 | predictions with portfolios, e.g., train/test prediction performance, | ||
179 | train/test objective values, and feature importance.\r\nOutput of the | 179 | train/test objective values, and feature importance.\r\nOutput of the | ||
180 | script `run_experiments.py`; input to the script | 180 | script `run_experiments.py`; input to the script | ||
181 | `run_evaluation.py`.\r\n\r\n- `model` (string, 2 different | 181 | `run_evaluation.py`.\r\n\r\n- `model` (string, 2 different | ||
182 | values):\r\n Name of the prediction model (in our experiments: | 182 | values):\r\n Name of the prediction model (in our experiments: | ||
183 | `Random Forest` and `XGBoost`).\r\n Each prediction model is trained | 183 | `Random Forest` and `XGBoost`).\r\n Each prediction model is trained | ||
184 | for each portfolio from the search\r\n (thus, `prediction_results` | 184 | for each portfolio from the search\r\n (thus, `prediction_results` | ||
185 | has twice the number of rows as `search_results`).\r\n- `pred_time` | 185 | has twice the number of rows as `search_results`).\r\n- `pred_time` | ||
186 | (float):\r\n Runtime (in seconds) for training the prediction model | 186 | (float):\r\n Runtime (in seconds) for training the prediction model | ||
187 | (for one portfolio) on the training data,\r\n and making predictions | 187 | (for one portfolio) on the training data,\r\n and making predictions | ||
188 | on training data as well as test data.\r\n- `(train|test)_pred_mcc` | 188 | on training data as well as test data.\r\n- `(train|test)_pred_mcc` | ||
189 | (float in `[-1, 1]`):\r\n Prediction performance in terms of Matthews | 189 | (float in `[-1, 1]`):\r\n Prediction performance in terms of Matthews | ||
190 | Correlation Coefficient for predicting\r\n the best solver from the | 190 | Correlation Coefficient for predicting\r\n the best solver from the | ||
191 | portfolio for each instance.\r\n- `(train|test)_pred_objective` | 191 | portfolio for each instance.\r\n- `(train|test)_pred_objective` | ||
192 | (float):\r\n PAR-2 score of the (instance-specific) solver | 192 | (float):\r\n PAR-2 score of the (instance-specific) solver | ||
193 | recommendations made by the prediction model.\r\n- | 193 | recommendations made by the prediction model.\r\n- | ||
194 | `imp.<feature_name>` (float in `[0, 1]`):\r\n Feature importances | 194 | `imp.<feature_name>` (float in `[0, 1]`):\r\n Feature importances | ||
195 | extracted from the prediction model after training.\r\n Importances | 195 | extracted from the prediction model after training.\r\n Importances | ||
196 | are normalized and should sum up to one for each row. \r\n Missing | 196 | are normalized and should sum up to one for each row. \r\n Missing | ||
197 | values (represented as empty strings) occur if no prediction model was | 197 | values (represented as empty strings) occur if no prediction model was | ||
198 | trained\r\n since the prediction target only had one class, e.g.,\r\n | 198 | trained\r\n since the prediction target only had one class, e.g.,\r\n | ||
199 | portfolio had size one or always the same solver (out of multiple | 199 | portfolio had size one or always the same solver (out of multiple | ||
200 | solvers) was best.\r\n- `search_id` (int), `solution_id` (int):\r\n | 200 | solvers) was best.\r\n- `search_id` (int), `solution_id` (int):\r\n | ||
201 | Same as in `search_results.csv`, can be used for joining the | 201 | Same as in `search_results.csv`, can be used for joining the | ||
202 | results.\r\n\r\n## `Evaluation_console_output.txt`\r\n\r\nOutput of | 202 | results.\r\n\r\n## `Evaluation_console_output.txt`\r\n\r\nOutput of | ||
203 | the script `run_evaluation.py`, manually copied from the console to a | 203 | the script `run_evaluation.py`, manually copied from the console to a | ||
204 | file.", | 204 | file.", | ||
205 | "num_resources": 0, | 205 | "num_resources": 0, | ||
206 | "num_tags": 5, | 206 | "num_tags": 5, | ||
207 | "orcid": "0000-0003-0301-2798", | 207 | "orcid": "0000-0003-0301-2798", | ||
208 | "organization": { | 208 | "organization": { | ||
209 | "approval_status": "approved", | 209 | "approval_status": "approved", | ||
210 | "created": "2023-01-12T13:30:23.238233", | 210 | "created": "2023-01-12T13:30:23.238233", | ||
211 | "description": "RADAR (Research Data Repository) is a | 211 | "description": "RADAR (Research Data Repository) is a | ||
212 | cross-disciplinary repository for archiving and publishing research | 212 | cross-disciplinary repository for archiving and publishing research | ||
213 | data from completed scientific studies and projects. The focus is on | 213 | data from completed scientific studies and projects. The focus is on | ||
214 | research data from subjects that do not yet have their own | 214 | research data from subjects that do not yet have their own | ||
215 | discipline-specific infrastructures for research data management. ", | 215 | discipline-specific infrastructures for research data management. ", | ||
216 | "id": "013c89a9-383c-4200-8baa-0f78bf1d91f9", | 216 | "id": "013c89a9-383c-4200-8baa-0f78bf1d91f9", | ||
217 | "image_url": "radar-logo.svg", | 217 | "image_url": "radar-logo.svg", | ||
218 | "is_organization": true, | 218 | "is_organization": true, | ||
219 | "name": "radar", | 219 | "name": "radar", | ||
220 | "state": "active", | 220 | "state": "active", | ||
221 | "title": "RADAR", | 221 | "title": "RADAR", | ||
222 | "type": "organization" | 222 | "type": "organization" | ||
223 | }, | 223 | }, | ||
224 | "owner_org": "013c89a9-383c-4200-8baa-0f78bf1d91f9", | 224 | "owner_org": "013c89a9-383c-4200-8baa-0f78bf1d91f9", | ||
225 | "private": false, | 225 | "private": false, | ||
226 | "production_year": "2022", | 226 | "production_year": "2022", | ||
227 | "publication_year": "2023", | 227 | "publication_year": "2023", | ||
228 | "publishers": [ | 228 | "publishers": [ | ||
229 | { | 229 | { | ||
230 | "publisher": "Karlsruhe Institute of Technology" | 230 | "publisher": "Karlsruhe Institute of Technology" | ||
231 | } | 231 | } | ||
232 | ], | 232 | ], | ||
233 | "relationships_as_object": [], | 233 | "relationships_as_object": [], | ||
234 | "relationships_as_subject": [], | 234 | "relationships_as_subject": [], | ||
235 | "repository_name": "RADAR (Research Data Repository)", | 235 | "repository_name": "RADAR (Research Data Repository)", | ||
236 | "resources": [], | 236 | "resources": [], | ||
237 | "services_used_list": "", | 237 | "services_used_list": "", | ||
238 | "source_metadata_created": "2023", | 238 | "source_metadata_created": "2023", | ||
239 | "source_metadata_modified": "", | 239 | "source_metadata_modified": "", | ||
240 | "state": "active", | 240 | "state": "active", | ||
241 | "subject_areas": [ | 241 | "subject_areas": [ | ||
242 | { | 242 | { | ||
243 | "subject_area_additional": "", | 243 | "subject_area_additional": "", | ||
244 | "subject_area_name": "Computer Science" | 244 | "subject_area_name": "Computer Science" | ||
245 | } | 245 | } | ||
246 | ], | 246 | ], | ||
247 | "tags": [ | 247 | "tags": [ | ||
248 | { | 248 | { | ||
249 | "display_name": "integer programming", | 249 | "display_name": "integer programming", | ||
250 | "id": "a9b29939-84d1-4165-bcd1-4896b09c1afb", | 250 | "id": "a9b29939-84d1-4165-bcd1-4896b09c1afb", | ||
251 | "name": "integer programming", | 251 | "name": "integer programming", | ||
252 | "state": "active", | 252 | "state": "active", | ||
253 | "vocabulary_id": null | 253 | "vocabulary_id": null | ||
254 | }, | 254 | }, | ||
255 | { | 255 | { | ||
256 | "display_name": "machine learning", | 256 | "display_name": "machine learning", | ||
257 | "id": "9e42784b-6ee7-47e8-a69a-28b8c510212b", | 257 | "id": "9e42784b-6ee7-47e8-a69a-28b8c510212b", | ||
258 | "name": "machine learning", | 258 | "name": "machine learning", | ||
259 | "state": "active", | 259 | "state": "active", | ||
260 | "vocabulary_id": null | 260 | "vocabulary_id": null | ||
261 | }, | 261 | }, | ||
262 | { | 262 | { | ||
263 | "display_name": "propositional satisfiability", | 263 | "display_name": "propositional satisfiability", | ||
264 | "id": "7839b968-6066-4ad6-b012-cc92d0dcfa9c", | 264 | "id": "7839b968-6066-4ad6-b012-cc92d0dcfa9c", | ||
265 | "name": "propositional satisfiability", | 265 | "name": "propositional satisfiability", | ||
266 | "state": "active", | 266 | "state": "active", | ||
267 | "vocabulary_id": null | 267 | "vocabulary_id": null | ||
268 | }, | 268 | }, | ||
269 | { | 269 | { | ||
270 | "display_name": "runtime prediction", | 270 | "display_name": "runtime prediction", | ||
271 | "id": "6dc3fc02-ea86-4384-a965-be4a6058c719", | 271 | "id": "6dc3fc02-ea86-4384-a965-be4a6058c719", | ||
272 | "name": "runtime prediction", | 272 | "name": "runtime prediction", | ||
273 | "state": "active", | 273 | "state": "active", | ||
274 | "vocabulary_id": null | 274 | "vocabulary_id": null | ||
275 | }, | 275 | }, | ||
276 | { | 276 | { | ||
277 | "display_name": "solver portfolios", | 277 | "display_name": "solver portfolios", | ||
278 | "id": "0f56bdf0-e6ba-43e2-b619-abf9e1c86c88", | 278 | "id": "0f56bdf0-e6ba-43e2-b619-abf9e1c86c88", | ||
279 | "name": "solver portfolios", | 279 | "name": "solver portfolios", | ||
280 | "state": "active", | 280 | "state": "active", | ||
281 | "vocabulary_id": null | 281 | "vocabulary_id": null | ||
282 | } | 282 | } | ||
283 | ], | 283 | ], | ||
284 | "title": "Experimental data for the paper \"a comprehensive study of | 284 | "title": "Experimental data for the paper \"a comprehensive study of | ||
285 | k-portfolios of recent sat solvers\"", | 285 | k-portfolios of recent sat solvers\"", | ||
286 | "type": "vdataset", | 286 | "type": "vdataset", | ||
287 | "url": "https://doi.org/10.35097/1331" | 287 | "url": "https://doi.org/10.35097/1331" | ||
288 | } | 288 | } |