Cite Article

Schema Matching for Large-Scale Data Based on Ontology Clustering Method

Choose citation format

BibTeX

@article{IJASEIT2133,
   author = {Harith Oraibi Alani and Saidah Saad},
   title = {Schema Matching for Large-Scale Data Based on Ontology Clustering Method},
   journal = {International Journal on Advanced Science, Engineering and Information Technology},
   volume = {7},
   number = {5},
   year = {2017},
   pages = {1790--1797},
   keywords = {automatic schema matching; large-scale data; ontology; clustering; web interfaces},
   abstract = {Holistic schema matching is the process of identifying semantic correspondences among multiple schemas at once. The key challenge behind holistic schema matching lies in selecting an appropriate method that has the ability to maintain effectiveness and efficiency. Effectiveness refers to the quality of matching while efficiency refers to the time and memory consumed within the matching process. Several approaches have been proposed for holistic schema matching. These approaches were mainly dependent on clustering techniques. In fact, clustering aims to group the similar fields within the schemas in multiple groups or clusters. However, fields on schemas contain much complicated semantic relations due to schema level. Ontology which is a hierarchy of taxonomies, has the ability to identify semantic correspondences with various levels. Hence, this study aims to propose an ontology-based clustering approach for holistic schema matching. Two datasets have been used from ICQ query interfaces consisting of 40 interfaces, which refer to Airfare and Job. The ontology used in this study has been built using the XBenchMatch which is a benchmark lexicon that contains rich semantic correspondences for the field of schema matching. In order to accommodate the schema matching using the ontology, a rule-based clustering approach is used with multiple distance measures including Dice, Cosine and Jaccard. The evaluation has been conducted using the common information retrieval metrics; precision, recall and f-measure. In order to assess the performance of the proposed ontology-based clustering, a comparison among two experiments has been performed. The first experiment aims to conduct the ontology-based clustering approach (i.e. using ontology and rule-based clustering), while the second experiment aims to conduct the traditional clustering approaches without the use of ontology. Results show that the proposed ontology-based clustering approach has outperformed the traditional clustering approaches without ontology by achieving an f-measure of 94% for Airfare and 92% for Job datasets. This emphasizes the strength of ontology in terms of identifying correspondences with semantic level variation.},
   issn = {2088-5334},
   publisher = {INSIGHT - Indonesian Society for Knowledge and Human Development},
   url = {http://ijaseit.insightsociety.org/index.php?option=com_content&view=article&id=9&Itemid=1&article_id=2133},
   doi = {10.18517/ijaseit.7.5.2133}
}

EndNote

%A Alani, Harith Oraibi
%A Saad, Saidah
%D 2017
%T Schema Matching for Large-Scale Data Based on Ontology Clustering Method
%B 2017
%9 automatic schema matching; large-scale data; ontology; clustering; web interfaces
%! Schema Matching for Large-Scale Data Based on Ontology Clustering Method
%K automatic schema matching; large-scale data; ontology; clustering; web interfaces
%X Holistic schema matching is the process of identifying semantic correspondences among multiple schemas at once. The key challenge behind holistic schema matching lies in selecting an appropriate method that has the ability to maintain effectiveness and efficiency. Effectiveness refers to the quality of matching while efficiency refers to the time and memory consumed within the matching process. Several approaches have been proposed for holistic schema matching. These approaches were mainly dependent on clustering techniques. In fact, clustering aims to group the similar fields within the schemas in multiple groups or clusters. However, fields on schemas contain much complicated semantic relations due to schema level. Ontology which is a hierarchy of taxonomies, has the ability to identify semantic correspondences with various levels. Hence, this study aims to propose an ontology-based clustering approach for holistic schema matching. Two datasets have been used from ICQ query interfaces consisting of 40 interfaces, which refer to Airfare and Job. The ontology used in this study has been built using the XBenchMatch which is a benchmark lexicon that contains rich semantic correspondences for the field of schema matching. In order to accommodate the schema matching using the ontology, a rule-based clustering approach is used with multiple distance measures including Dice, Cosine and Jaccard. The evaluation has been conducted using the common information retrieval metrics; precision, recall and f-measure. In order to assess the performance of the proposed ontology-based clustering, a comparison among two experiments has been performed. The first experiment aims to conduct the ontology-based clustering approach (i.e. using ontology and rule-based clustering), while the second experiment aims to conduct the traditional clustering approaches without the use of ontology. Results show that the proposed ontology-based clustering approach has outperformed the traditional clustering approaches without ontology by achieving an f-measure of 94% for Airfare and 92% for Job datasets. This emphasizes the strength of ontology in terms of identifying correspondences with semantic level variation.
%U http://ijaseit.insightsociety.org/index.php?option=com_content&view=article&id=9&Itemid=1&article_id=2133
%R doi:10.18517/ijaseit.7.5.2133
%J International Journal on Advanced Science, Engineering and Information Technology
%V 7
%N 5
%@ 2088-5334

IEEE

Harith Oraibi Alani and Saidah Saad,"Schema Matching for Large-Scale Data Based on Ontology Clustering Method," International Journal on Advanced Science, Engineering and Information Technology, vol. 7, no. 5, pp. 1790-1797, 2017. [Online]. Available: http://dx.doi.org/10.18517/ijaseit.7.5.2133.

RefMan/ProCite (RIS)

TY  - JOUR
AU  - Alani, Harith Oraibi
AU  - Saad, Saidah
PY  - 2017
TI  - Schema Matching for Large-Scale Data Based on Ontology Clustering Method
JF  - International Journal on Advanced Science, Engineering and Information Technology; Vol. 7 (2017) No. 5
Y2  - 2017
SP  - 1790
EP  - 1797
SN  - 2088-5334
PB  - INSIGHT - Indonesian Society for Knowledge and Human Development
KW  - automatic schema matching; large-scale data; ontology; clustering; web interfaces
N2  - Holistic schema matching is the process of identifying semantic correspondences among multiple schemas at once. The key challenge behind holistic schema matching lies in selecting an appropriate method that has the ability to maintain effectiveness and efficiency. Effectiveness refers to the quality of matching while efficiency refers to the time and memory consumed within the matching process. Several approaches have been proposed for holistic schema matching. These approaches were mainly dependent on clustering techniques. In fact, clustering aims to group the similar fields within the schemas in multiple groups or clusters. However, fields on schemas contain much complicated semantic relations due to schema level. Ontology which is a hierarchy of taxonomies, has the ability to identify semantic correspondences with various levels. Hence, this study aims to propose an ontology-based clustering approach for holistic schema matching. Two datasets have been used from ICQ query interfaces consisting of 40 interfaces, which refer to Airfare and Job. The ontology used in this study has been built using the XBenchMatch which is a benchmark lexicon that contains rich semantic correspondences for the field of schema matching. In order to accommodate the schema matching using the ontology, a rule-based clustering approach is used with multiple distance measures including Dice, Cosine and Jaccard. The evaluation has been conducted using the common information retrieval metrics; precision, recall and f-measure. In order to assess the performance of the proposed ontology-based clustering, a comparison among two experiments has been performed. The first experiment aims to conduct the ontology-based clustering approach (i.e. using ontology and rule-based clustering), while the second experiment aims to conduct the traditional clustering approaches without the use of ontology. Results show that the proposed ontology-based clustering approach has outperformed the traditional clustering approaches without ontology by achieving an f-measure of 94% for Airfare and 92% for Job datasets. This emphasizes the strength of ontology in terms of identifying correspondences with semantic level variation.
UR  - http://ijaseit.insightsociety.org/index.php?option=com_content&view=article&id=9&Itemid=1&article_id=2133
DO  - 10.18517/ijaseit.7.5.2133

RefWorks

RT Journal Article
ID 2133
A1 Alani, Harith Oraibi
A1 Saad, Saidah
T1 Schema Matching for Large-Scale Data Based on Ontology Clustering Method
JF International Journal on Advanced Science, Engineering and Information Technology
VO 7
IS 5
YR 2017
SP 1790
OP 1797
SN 2088-5334
PB INSIGHT - Indonesian Society for Knowledge and Human Development
K1 automatic schema matching; large-scale data; ontology; clustering; web interfaces
AB Holistic schema matching is the process of identifying semantic correspondences among multiple schemas at once. The key challenge behind holistic schema matching lies in selecting an appropriate method that has the ability to maintain effectiveness and efficiency. Effectiveness refers to the quality of matching while efficiency refers to the time and memory consumed within the matching process. Several approaches have been proposed for holistic schema matching. These approaches were mainly dependent on clustering techniques. In fact, clustering aims to group the similar fields within the schemas in multiple groups or clusters. However, fields on schemas contain much complicated semantic relations due to schema level. Ontology which is a hierarchy of taxonomies, has the ability to identify semantic correspondences with various levels. Hence, this study aims to propose an ontology-based clustering approach for holistic schema matching. Two datasets have been used from ICQ query interfaces consisting of 40 interfaces, which refer to Airfare and Job. The ontology used in this study has been built using the XBenchMatch which is a benchmark lexicon that contains rich semantic correspondences for the field of schema matching. In order to accommodate the schema matching using the ontology, a rule-based clustering approach is used with multiple distance measures including Dice, Cosine and Jaccard. The evaluation has been conducted using the common information retrieval metrics; precision, recall and f-measure. In order to assess the performance of the proposed ontology-based clustering, a comparison among two experiments has been performed. The first experiment aims to conduct the ontology-based clustering approach (i.e. using ontology and rule-based clustering), while the second experiment aims to conduct the traditional clustering approaches without the use of ontology. Results show that the proposed ontology-based clustering approach has outperformed the traditional clustering approaches without ontology by achieving an f-measure of 94% for Airfare and 92% for Job datasets. This emphasizes the strength of ontology in terms of identifying correspondences with semantic level variation.
LK http://ijaseit.insightsociety.org/index.php?option=com_content&view=article&id=9&Itemid=1&article_id=2133
DO  - 10.18517/ijaseit.7.5.2133