2018
|
Kapidakis, S. Error Analysis on Harvesting Data over the Internet Conference 11th ACM International Conference on PErvasive Technologies Related to Assistive Environments, PETRA , ACM, 2018. @conference{Kapidakis2018,
title = {Error Analysis on Harvesting Data over the Internet},
author = {Kapidakis, S.},
url = {https://dl.acm.org/doi/abs/10.1145/3197768.3201537},
doi = {https://doi.org/10.1145/3197768.3201537},
year = {2018},
date = {2018-06-29},
booktitle = {11th ACM International Conference on PErvasive Technologies Related to Assistive Environments, PETRA },
publisher = {ACM},
abstract = {Harvesting tasks gather information to a central repository. We studied 880560 harvesting tasks from 3446 harvesting services in 354 harvesting rounds during a period of 15 months, of which 382705 failed and the remaining tasks occasionally returning fewer records. A significant part of the Open Archive Initiative harvesting services never worked or have ceased working while many other services fail occasionally. A harvesting task includes many stages of information exchange, and each one of them may fail - but with different consequences each time. We studied the reported warning messages, the number of records returned, and the required response time to discover relations among them. We found that about half of the harvesting tasks on each harvesting round fail, and the number of failing tasks is slowly increasing. We developed a method of analysis that can be used to reverse engineering such complex network systems and to categorize the reasons of failure into useful classes. Our results do not indicate a new approach to harvesting or conclude to a breakthrough advice, but make clear the complexity of the operation in an ever changing networking environment and alarm the reader that some facts that may be considered trivial, actually they are not! They help us to better understand the risks involved, and to design more reliable procedures and improved ways to closely monitor them.},
keywords = {error analysis, harvesting data, repositories},
pubstate = {published},
tppubtype = {conference}
}
Harvesting tasks gather information to a central repository. We studied 880560 harvesting tasks from 3446 harvesting services in 354 harvesting rounds during a period of 15 months, of which 382705 failed and the remaining tasks occasionally returning fewer records. A significant part of the Open Archive Initiative harvesting services never worked or have ceased working while many other services fail occasionally. A harvesting task includes many stages of information exchange, and each one of them may fail - but with different consequences each time. We studied the reported warning messages, the number of records returned, and the required response time to discover relations among them. We found that about half of the harvesting tasks on each harvesting round fail, and the number of failing tasks is slowly increasing. We developed a method of analysis that can be used to reverse engineering such complex network systems and to categorize the reasons of failure into useful classes. Our results do not indicate a new approach to harvesting or conclude to a breakthrough advice, but make clear the complexity of the operation in an ever changing networking environment and alarm the reader that some facts that may be considered trivial, actually they are not! They help us to better understand the risks involved, and to design more reliable procedures and improved ways to closely monitor them. |
Kapidakis, S. Unexpected Errors from Metadata OAI-PMH Providers Conference Qualitative and Quantitative Methods in Libraries (QQML), vol. 7, no. 2, 2018, ISSN: 2241-1925. @conference{Kapidakis2018c,
title = {Unexpected Errors from Metadata OAI-PMH Providers},
author = {Kapidakis, S.},
url = {http://78.46.229.148/ojs/index.php/qqml/article/view/468},
issn = {2241-1925},
year = {2018},
date = {2018-01-18},
booktitle = {Qualitative and Quantitative Methods in Libraries (QQML)},
volume = {7},
number = {2},
abstract = {We study the behavior and the failure reason of metadata harvesting services. We used existing OAI services and we created our own OAI client to issue requests to them for many harvesting rounds, collecting the appropriate information. We studied 1407537 harvesting tasks from 3446 harvesting services in 552 harvesting rounds during a period of 2 years, of which 618812 (44%) failed and the remaining tasks occasionally returning fewer records. We examined the reported outcome messages, the number of records returned and the response time to discover failing patterns. While most messages indicate temporary errors, we revealed messages with specific details that indicate permanent affect or no effect to the returned metadata records.},
keywords = {error analysis, Metadata, oia-pmh},
pubstate = {published},
tppubtype = {conference}
}
We study the behavior and the failure reason of metadata harvesting services. We used existing OAI services and we created our own OAI client to issue requests to them for many harvesting rounds, collecting the appropriate information. We studied 1407537 harvesting tasks from 3446 harvesting services in 552 harvesting rounds during a period of 2 years, of which 618812 (44%) failed and the remaining tasks occasionally returning fewer records. We examined the reported outcome messages, the number of records returned and the response time to discover failing patterns. While most messages indicate temporary errors, we revealed messages with specific details that indicate permanent affect or no effect to the returned metadata records. |