Changes
On August 4, 2023 at 8:53:45 AM UTC, admin:
-
No fields were updated. See the metadata diff for more details.
f | 1 | { | f | 1 | { |
2 | "author": "Demir, Nurullah", | 2 | "author": "Demir, Nurullah", | ||
3 | "author_email": "", | 3 | "author_email": "", | ||
4 | "creator_user_id": "17755db4-395a-4b3b-ac09-e8e3484ca700", | 4 | "creator_user_id": "17755db4-395a-4b3b-ac09-e8e3484ca700", | ||
5 | "doi": "10.35097/1560", | 5 | "doi": "10.35097/1560", | ||
6 | "doi_date_published": "2023", | 6 | "doi_date_published": "2023", | ||
7 | "doi_publisher": "", | 7 | "doi_publisher": "", | ||
8 | "doi_status": "True", | 8 | "doi_status": "True", | ||
9 | "extra_authors": [ | 9 | "extra_authors": [ | ||
10 | { | 10 | { | ||
11 | "extra_author": "Gro\u00dfe-Kampmann, Matteo", | 11 | "extra_author": "Gro\u00dfe-Kampmann, Matteo", | ||
12 | "orcid": "" | 12 | "orcid": "" | ||
13 | }, | 13 | }, | ||
14 | { | 14 | { | ||
15 | "extra_author": "Urban, Tobias", | 15 | "extra_author": "Urban, Tobias", | ||
16 | "orcid": "" | 16 | "orcid": "" | ||
17 | }, | 17 | }, | ||
18 | { | 18 | { | ||
19 | "extra_author": "Wressnegger, Christian", | 19 | "extra_author": "Wressnegger, Christian", | ||
20 | "orcid": "" | 20 | "orcid": "" | ||
21 | }, | 21 | }, | ||
22 | { | 22 | { | ||
23 | "extra_author": "Holz, Thorsten", | 23 | "extra_author": "Holz, Thorsten", | ||
24 | "orcid": "" | 24 | "orcid": "" | ||
25 | }, | 25 | }, | ||
26 | { | 26 | { | ||
27 | "extra_author": "Pohlmann, Norbert", | 27 | "extra_author": "Pohlmann, Norbert", | ||
28 | "orcid": "" | 28 | "orcid": "" | ||
29 | } | 29 | } | ||
30 | ], | 30 | ], | ||
31 | "groups": [], | 31 | "groups": [], | ||
32 | "id": "d65cf4a0-1a31-41fd-939b-77b8c7455c82", | 32 | "id": "d65cf4a0-1a31-41fd-939b-77b8c7455c82", | ||
33 | "isopen": false, | 33 | "isopen": false, | ||
34 | "license_id": "CC BY 4.0 Attribution", | 34 | "license_id": "CC BY 4.0 Attribution", | ||
35 | "license_title": "CC BY 4.0 Attribution", | 35 | "license_title": "CC BY 4.0 Attribution", | ||
36 | "metadata_created": "2023-08-04T08:51:04.867670", | 36 | "metadata_created": "2023-08-04T08:51:04.867670", | ||
t | 37 | "metadata_modified": "2023-08-04T08:52:11.435804", | t | 37 | "metadata_modified": "2023-08-04T08:53:45.313819", |
38 | "name": "rdr-doi-10-35097-1560", | 38 | "name": "rdr-doi-10-35097-1560", | ||
39 | "notes": "Abstract: Web measurement studies can shed light on not | 39 | "notes": "Abstract: Web measurement studies can shed light on not | ||
40 | yet fully understood phenomena and thus are essential for analyzing | 40 | yet fully understood phenomena and thus are essential for analyzing | ||
41 | how the modern Web works. This often requires building new and | 41 | how the modern Web works. This often requires building new and | ||
42 | adjusting existing crawling setups, which has led to a wide variety of | 42 | adjusting existing crawling setups, which has led to a wide variety of | ||
43 | analysis tools for different (but related) aspects. If these efforts | 43 | analysis tools for different (but related) aspects. If these efforts | ||
44 | are not sufficiently documented, the reproducibility and replicability | 44 | are not sufficiently documented, the reproducibility and replicability | ||
45 | of the measurements may suffer---two properties that are crucial to | 45 | of the measurements may suffer---two properties that are crucial to | ||
46 | sustainable research.\r\nIn this paper, we survey 117 recent research | 46 | sustainable research.\r\nIn this paper, we survey 117 recent research | ||
47 | papers to derive best practices for Web-based measurement studies and | 47 | papers to derive best practices for Web-based measurement studies and | ||
48 | specify criteria that need to be met in practice.\r\nWhen applying | 48 | specify criteria that need to be met in practice.\r\nWhen applying | ||
49 | these criteria to the surveyed papers, we find that the experimental | 49 | these criteria to the surveyed papers, we find that the experimental | ||
50 | setup and other aspects essential to reproducing and replicating | 50 | setup and other aspects essential to reproducing and replicating | ||
51 | results are often missing.\r\nWe underline the criticality of this | 51 | results are often missing.\r\nWe underline the criticality of this | ||
52 | finding by performing a large-scale Web measurement study on4.5 | 52 | finding by performing a large-scale Web measurement study on4.5 | ||
53 | million pages with 24 different measurement setups to demonstrate the | 53 | million pages with 24 different measurement setups to demonstrate the | ||
54 | influence of the individual criteria. Our experiments show that slight | 54 | influence of the individual criteria. Our experiments show that slight | ||
55 | differences in the experimental setup directly affect the overall | 55 | differences in the experimental setup directly affect the overall | ||
56 | results and must be documented accurately and | 56 | results and must be documented accurately and | ||
57 | carefully.\r\nTechnicalRemarks: This dataset holds additional material | 57 | carefully.\r\nTechnicalRemarks: This dataset holds additional material | ||
58 | to the paper \"Reproducibility and Replicability of Web Measurement | 58 | to the paper \"Reproducibility and Replicability of Web Measurement | ||
59 | Studies\" submitted to the ACM Web Conference 2022. It contains the | 59 | Studies\" submitted to the ACM Web Conference 2022. It contains the | ||
60 | measurement data (requests, responses, visited URLs, cookies, and | 60 | measurement data (requests, responses, visited URLs, cookies, and | ||
61 | LocalStorage objects) we have collected from 25 different profiles. | 61 | LocalStorage objects) we have collected from 25 different profiles. | ||
62 | All data is in CSV format (exported from the Google BigQuery service) | 62 | All data is in CSV format (exported from the Google BigQuery service) | ||
63 | and can be imported into any database. \r\n\r\nTable sizes (according | 63 | and can be imported into any database. \r\n\r\nTable sizes (according | ||
64 | to Google BigQuery):\r\n\r\nCookies: 2.8 GB\r\nLocalStorage: 6 | 64 | to Google BigQuery):\r\n\r\nCookies: 2.8 GB\r\nLocalStorage: 6 | ||
65 | GB\r\nRequests: 626.6 GB\r\nResponses: 501.6 GB\r\nURL: 38 | 65 | GB\r\nRequests: 626.6 GB\r\nResponses: 501.6 GB\r\nURL: 38 | ||
66 | MB\r\nVisits: 935 MB\r\n\r\nNote: Although our paper does not include | 66 | MB\r\nVisits: 935 MB\r\n\r\nNote: Although our paper does not include | ||
67 | the analysis for the collected Cookie and LocalStorage objects, we | 67 | the analysis for the collected Cookie and LocalStorage objects, we | ||
68 | publish them for further studies.\r\n\r\nYou can find further | 68 | publish them for further studies.\r\n\r\nYou can find further | ||
69 | information about our study on [our repository in | 69 | information about our study on [our repository in | ||
70 | seven/Reproducibility-and-Replicability-of-Web-Measurement-Studies).", | 70 | seven/Reproducibility-and-Replicability-of-Web-Measurement-Studies).", | ||
71 | "num_resources": 0, | 71 | "num_resources": 0, | ||
72 | "num_tags": 5, | 72 | "num_tags": 5, | ||
73 | "orcid": "", | 73 | "orcid": "", | ||
74 | "organization": { | 74 | "organization": { | ||
75 | "approval_status": "approved", | 75 | "approval_status": "approved", | ||
76 | "created": "2023-01-12T13:30:23.238233", | 76 | "created": "2023-01-12T13:30:23.238233", | ||
77 | "description": "RADAR (Research Data Repository) is a | 77 | "description": "RADAR (Research Data Repository) is a | ||
78 | cross-disciplinary repository for archiving and publishing research | 78 | cross-disciplinary repository for archiving and publishing research | ||
79 | data from completed scientific studies and projects. The focus is on | 79 | data from completed scientific studies and projects. The focus is on | ||
80 | research data from subjects that do not yet have their own | 80 | research data from subjects that do not yet have their own | ||
81 | discipline-specific infrastructures for research data management. ", | 81 | discipline-specific infrastructures for research data management. ", | ||
82 | "id": "013c89a9-383c-4200-8baa-0f78bf1d91f9", | 82 | "id": "013c89a9-383c-4200-8baa-0f78bf1d91f9", | ||
83 | "image_url": "radar-logo.svg", | 83 | "image_url": "radar-logo.svg", | ||
84 | "is_organization": true, | 84 | "is_organization": true, | ||
85 | "name": "radar", | 85 | "name": "radar", | ||
86 | "state": "active", | 86 | "state": "active", | ||
87 | "title": "RADAR", | 87 | "title": "RADAR", | ||
88 | "type": "organization" | 88 | "type": "organization" | ||
89 | }, | 89 | }, | ||
90 | "owner_org": "013c89a9-383c-4200-8baa-0f78bf1d91f9", | 90 | "owner_org": "013c89a9-383c-4200-8baa-0f78bf1d91f9", | ||
91 | "private": false, | 91 | "private": false, | ||
92 | "production_year": "2021", | 92 | "production_year": "2021", | ||
93 | "publication_year": "2023", | 93 | "publication_year": "2023", | ||
94 | "publishers": [ | 94 | "publishers": [ | ||
95 | { | 95 | { | ||
96 | "publisher": "Karlsruhe Institute of Technology" | 96 | "publisher": "Karlsruhe Institute of Technology" | ||
97 | } | 97 | } | ||
98 | ], | 98 | ], | ||
99 | "relationships_as_object": [], | 99 | "relationships_as_object": [], | ||
100 | "relationships_as_subject": [], | 100 | "relationships_as_subject": [], | ||
101 | "repository_name": "RADAR (Research Data Repository)", | 101 | "repository_name": "RADAR (Research Data Repository)", | ||
102 | "resources": [], | 102 | "resources": [], | ||
103 | "services_used_list": "", | 103 | "services_used_list": "", | ||
104 | "source_metadata_created": "2023", | 104 | "source_metadata_created": "2023", | ||
105 | "source_metadata_modified": "", | 105 | "source_metadata_modified": "", | ||
106 | "state": "active", | 106 | "state": "active", | ||
107 | "subject_areas": [ | 107 | "subject_areas": [ | ||
108 | { | 108 | { | ||
109 | "subject_area_additional": "", | 109 | "subject_area_additional": "", | ||
110 | "subject_area_name": "Computer Science" | 110 | "subject_area_name": "Computer Science" | ||
111 | } | 111 | } | ||
112 | ], | 112 | ], | ||
113 | "tags": [ | 113 | "tags": [ | ||
114 | { | 114 | { | ||
115 | "display_name": "Web measurements", | 115 | "display_name": "Web measurements", | ||
116 | "id": "961a5529-8310-4de7-a41e-2ea79b836a9f", | 116 | "id": "961a5529-8310-4de7-a41e-2ea79b836a9f", | ||
117 | "name": "Web measurements", | 117 | "name": "Web measurements", | ||
118 | "state": "active", | 118 | "state": "active", | ||
119 | "vocabulary_id": null | 119 | "vocabulary_id": null | ||
120 | }, | 120 | }, | ||
121 | { | 121 | { | ||
122 | "display_name": "privacy", | 122 | "display_name": "privacy", | ||
123 | "id": "af080db8-7f8f-49a0-a429-dc93037ce39e", | 123 | "id": "af080db8-7f8f-49a0-a429-dc93037ce39e", | ||
124 | "name": "privacy", | 124 | "name": "privacy", | ||
125 | "state": "active", | 125 | "state": "active", | ||
126 | "vocabulary_id": null | 126 | "vocabulary_id": null | ||
127 | }, | 127 | }, | ||
128 | { | 128 | { | ||
129 | "display_name": "replicability", | 129 | "display_name": "replicability", | ||
130 | "id": "7c3f6d2e-28bb-4b17-9508-db293bc7a0af", | 130 | "id": "7c3f6d2e-28bb-4b17-9508-db293bc7a0af", | ||
131 | "name": "replicability", | 131 | "name": "replicability", | ||
132 | "state": "active", | 132 | "state": "active", | ||
133 | "vocabulary_id": null | 133 | "vocabulary_id": null | ||
134 | }, | 134 | }, | ||
135 | { | 135 | { | ||
136 | "display_name": "reproducibility", | 136 | "display_name": "reproducibility", | ||
137 | "id": "94c644a1-e1b6-4e9a-a828-89466ab0ff0a", | 137 | "id": "94c644a1-e1b6-4e9a-a828-89466ab0ff0a", | ||
138 | "name": "reproducibility", | 138 | "name": "reproducibility", | ||
139 | "state": "active", | 139 | "state": "active", | ||
140 | "vocabulary_id": null | 140 | "vocabulary_id": null | ||
141 | }, | 141 | }, | ||
142 | { | 142 | { | ||
143 | "display_name": "security", | 143 | "display_name": "security", | ||
144 | "id": "c3018e45-a63d-4de4-a11a-5edbc0aee325", | 144 | "id": "c3018e45-a63d-4de4-a11a-5edbc0aee325", | ||
145 | "name": "security", | 145 | "name": "security", | ||
146 | "state": "active", | 146 | "state": "active", | ||
147 | "vocabulary_id": null | 147 | "vocabulary_id": null | ||
148 | } | 148 | } | ||
149 | ], | 149 | ], | ||
150 | "title": "Reproducibility and replicability of web measurement | 150 | "title": "Reproducibility and replicability of web measurement | ||
151 | studies", | 151 | studies", | ||
152 | "type": "vdataset", | 152 | "type": "vdataset", | ||
153 | "url": "https://doi.org/10.35097/1560" | 153 | "url": "https://doi.org/10.35097/1560" | ||
154 | } | 154 | } |