{"id":8487,"date":"2020-03-27T22:03:17","date_gmt":"2020-03-27T21:03:17","guid":{"rendered":"https:\/\/complex-systems-ai.com\/?page_id=8487"},"modified":"2022-12-03T23:04:55","modified_gmt":"2022-12-03T22:04:55","slug":"qualite-sur-le-nombre-de-clusters","status":"publish","type":"page","link":"https:\/\/complex-systems-ai.com\/en\/data-partitioning\/quality-over-number-of-clusters\/","title":{"rendered":"Quality on the number of clusters"},"content":{"rendered":"<div data-elementor-type=\"wp-page\" data-elementor-id=\"8487\" class=\"elementor elementor-8487\">\n\t\t\t\t\t\t<section class=\"elementor-section elementor-top-section elementor-element elementor-element-3cdff08 elementor-section-boxed elementor-section-height-default elementor-section-height-default\" data-id=\"3cdff08\" data-element_type=\"section\" data-e-type=\"section\">\n\t\t\t\t\t\t<div class=\"elementor-container elementor-column-gap-default\">\n\t\t\t\t\t<div class=\"elementor-column elementor-col-33 elementor-top-column elementor-element elementor-element-fb1521b\" data-id=\"fb1521b\" data-element_type=\"column\" data-e-type=\"column\">\n\t\t\t<div class=\"elementor-widget-wrap elementor-element-populated\">\n\t\t\t\t\t\t<div class=\"elementor-element elementor-element-1c059c9 elementor-align-justify elementor-widget elementor-widget-button\" data-id=\"1c059c9\" data-element_type=\"widget\" data-e-type=\"widget\" data-widget_type=\"button.default\">\n\t\t\t\t<div class=\"elementor-widget-container\">\n\t\t\t\t\t\t\t\t\t<div class=\"elementor-button-wrapper\">\n\t\t\t\t\t<a class=\"elementor-button elementor-button-link elementor-size-sm\" href=\"https:\/\/complex-systems-ai.com\/en\/data-partitioning\/\">\n\t\t\t\t\t\t<span class=\"elementor-button-content-wrapper\">\n\t\t\t\t\t\t\t\t\t<span class=\"elementor-button-text\">Data partitioning<\/span>\n\t\t\t\t\t<\/span>\n\t\t\t\t\t<\/a>\n\t\t\t\t<\/div>\n\t\t\t\t\t\t\t\t<\/div>\n\t\t\t\t<\/div>\n\t\t\t\t\t<\/div>\n\t\t<\/div>\n\t\t\t\t<div class=\"elementor-column elementor-col-33 elementor-top-column elementor-element elementor-element-af4191c\" data-id=\"af4191c\" data-element_type=\"column\" data-e-type=\"column\">\n\t\t\t<div class=\"elementor-widget-wrap elementor-element-populated\">\n\t\t\t\t\t\t<div class=\"elementor-element elementor-element-978ab6d elementor-align-justify elementor-widget elementor-widget-button\" data-id=\"978ab6d\" data-element_type=\"widget\" data-e-type=\"widget\" data-widget_type=\"button.default\">\n\t\t\t\t<div class=\"elementor-widget-container\">\n\t\t\t\t\t\t\t\t\t<div class=\"elementor-button-wrapper\">\n\t\t\t\t\t<a class=\"elementor-button elementor-button-link elementor-size-sm\" href=\"https:\/\/complex-systems-ai.com\/en\/\">\n\t\t\t\t\t\t<span class=\"elementor-button-content-wrapper\">\n\t\t\t\t\t\t\t\t\t<span class=\"elementor-button-text\">Home page<\/span>\n\t\t\t\t\t<\/span>\n\t\t\t\t\t<\/a>\n\t\t\t\t<\/div>\n\t\t\t\t\t\t\t\t<\/div>\n\t\t\t\t<\/div>\n\t\t\t\t\t<\/div>\n\t\t<\/div>\n\t\t\t\t<div class=\"elementor-column elementor-col-33 elementor-top-column elementor-element elementor-element-824e819\" data-id=\"824e819\" data-element_type=\"column\" data-e-type=\"column\">\n\t\t\t<div class=\"elementor-widget-wrap elementor-element-populated\">\n\t\t\t\t\t\t<div class=\"elementor-element elementor-element-4db28f8 elementor-align-justify elementor-widget elementor-widget-button\" data-id=\"4db28f8\" data-element_type=\"widget\" data-e-type=\"widget\" data-widget_type=\"button.default\">\n\t\t\t\t<div class=\"elementor-widget-container\">\n\t\t\t\t\t\t\t\t\t<div class=\"elementor-button-wrapper\">\n\t\t\t\t\t<a class=\"elementor-button elementor-button-link elementor-size-sm\" href=\"https:\/\/en.wikipedia.org\/wiki\/Cluster_analysis\" target=\"_blank\" rel=\"noopener\">\n\t\t\t\t\t\t<span class=\"elementor-button-content-wrapper\">\n\t\t\t\t\t\t\t\t\t<span class=\"elementor-button-text\">Wiki<\/span>\n\t\t\t\t\t<\/span>\n\t\t\t\t\t<\/a>\n\t\t\t\t<\/div>\n\t\t\t\t\t\t\t\t<\/div>\n\t\t\t\t<\/div>\n\t\t\t\t\t<\/div>\n\t\t<\/div>\n\t\t\t\t\t<\/div>\n\t\t<\/section>\n\t\t\t\t<section class=\"elementor-section elementor-top-section elementor-element elementor-element-1d071f2b elementor-section-boxed elementor-section-height-default elementor-section-height-default\" data-id=\"1d071f2b\" data-element_type=\"section\" data-e-type=\"section\">\n\t\t\t\t\t\t<div class=\"elementor-container elementor-column-gap-default\">\n\t\t\t\t\t<div class=\"elementor-column elementor-col-100 elementor-top-column elementor-element elementor-element-14defe6e\" data-id=\"14defe6e\" data-element_type=\"column\" data-e-type=\"column\">\n\t\t\t<div class=\"elementor-widget-wrap elementor-element-populated\">\n\t\t\t\t\t\t<div class=\"elementor-element elementor-element-681fe80d elementor-widget elementor-widget-text-editor\" data-id=\"681fe80d\" data-element_type=\"widget\" data-e-type=\"widget\" data-widget_type=\"text-editor.default\">\n\t\t\t\t<div class=\"elementor-widget-container\">\n\t\t\t\t\t\t\t\t\t\n<div id=\"ez-toc-container\" class=\"ez-toc-v2_0_82_2 counter-hierarchy ez-toc-counter ez-toc-grey ez-toc-container-direction\">\n<div class=\"ez-toc-title-container\">\n<p class=\"ez-toc-title\" style=\"cursor:inherit\">Contents<\/p>\n<span class=\"ez-toc-title-toggle\"><a href=\"#\" class=\"ez-toc-pull-right ez-toc-btn ez-toc-btn-xs ez-toc-btn-default ez-toc-toggle\" aria-label=\"Toggle Table of Content\"><span class=\"ez-toc-js-icon-con\"><span class=\"\"><span class=\"eztoc-hide\" style=\"display:none;\">Toggle<\/span><span class=\"ez-toc-icon-toggle-span\"><svg style=\"fill: #999;color:#999\" xmlns=\"http:\/\/www.w3.org\/2000\/svg\" class=\"list-377408\" width=\"20px\" height=\"20px\" viewbox=\"0 0 24 24\" fill=\"none\"><path d=\"M6 6H4v2h2V6zm14 0H8v2h12V6zM4 11h2v2H4v-2zm16 0H8v2h12v-2zM4 16h2v2H4v-2zm16 0H8v2h12v-2z\" fill=\"currentColor\"><\/path><\/svg><svg style=\"fill: #999;color:#999\" class=\"arrow-unsorted-368013\" xmlns=\"http:\/\/www.w3.org\/2000\/svg\" width=\"10px\" height=\"10px\" viewbox=\"0 0 24 24\" version=\"1.2\" baseprofile=\"tiny\"><path d=\"M18.2 9.3l-6.2-6.3-6.2 6.3c-.2.2-.3.4-.3.7s.1.5.3.7c.2.2.4.3.7.3h11c.3 0 .5-.1.7-.3.2-.2.3-.5.3-.7s-.1-.5-.3-.7zM5.8 14.7l6.2 6.3 6.2-6.3c.2-.2.3-.5.3-.7s-.1-.5-.3-.7c-.2-.2-.4-.3-.7-.3h-11c-.3 0-.5.1-.7.3-.2.2-.3.5-.3.7s.1.5.3.7z\"\/><\/svg><\/span><\/span><\/span><\/a><\/span><\/div>\n<nav><ul class='ez-toc-list ez-toc-list-level-1' ><li class='ez-toc-page-1 ez-toc-heading-level-2'><a class=\"ez-toc-link ez-toc-heading-1\" href=\"https:\/\/complex-systems-ai.com\/en\/data-partitioning\/quality-over-number-of-clusters\/#Qualite-sur-le-nombre-de-clusters\" >Quality on the number of clusters<\/a><\/li><\/ul><\/nav><\/div>\n<h2><span class=\"ez-toc-section\" id=\"Qualite-sur-le-nombre-de-clusters\"><\/span>Quality on the number of clusters<span class=\"ez-toc-section-end\"><\/span><\/h2>\n<p>A subject related to the validation of clusters is to decide if the number of clusters obtained is the right one (Quality on the number of clusters). This point is particularly important for algorithms which need this value as a parameter. The usual procedure is to compare the characteristics of groups of different sizes. Usually, internal criteria indices are used in this comparison. A graph of these indices for different numbers of clusters can show the most likely number of clusters.<\/p>\n\n<p>Some of the internal validity indices can be used for this purpose: Calinsky Harabasz index, <a href=\"https:\/\/complex-systems-ai.com\/en\/data-partitioning\/internal-quality-criteria\/\">Silhouette<\/a>. Using the intra-class dispersion matrix (S_W), other criteria can be defined (Hartigan index and Krzanowski Lai index):<\/p>\n\n<figure class=\"wp-block-image size-large\"><img fetchpriority=\"high\" decoding=\"async\" class=\"alignnone\" src=\"https:\/\/complex-systems-ai.com\/wp-content\/uploads\/2020\/03\/eval34.png\" alt=\"Quality on the number of clusters\" width=\"424\" height=\"229\" title=\"\"><\/figure>\n\n<p>Let&#039;s estimate the number of clusters by comparing a cluster with the expected distribution of the data given the null hypothesis (no clusters). Let us calculate different groupings of data increasing the number of clusters and compare them to the data clusters (B) generated with a uniform distribution.<\/p>\n\n<p>The interclass distance matrix S_W is calculated for both and compared. The correct number of clusters is the one where the largest difference appears between the S_W of the data and the uniform data (first term of the following equation):<\/p>\n\n<figure class=\"wp-block-image size-large\"><img decoding=\"async\" class=\"alignnone\" src=\"https:\/\/complex-systems-ai.com\/wp-content\/uploads\/2020\/03\/eval35.png\" alt=\"Quality on the number of clusters\" width=\"412\" height=\"63\" title=\"\"><\/figure>\n\n<p>The probable number of clusters is the smallest number that satisfies:<\/p>\n\n<figure class=\"wp-block-image size-large\"><img decoding=\"async\" class=\"alignnone\" src=\"https:\/\/complex-systems-ai.com\/wp-content\/uploads\/2020\/03\/eval36.png\" alt=\"Quality on the number of clusters\" width=\"278\" height=\"39\" title=\"\"><\/figure>\n\n<p>where s_k is defined as (sd_k is the standard deviation of the first term of Gap):<\/p>\n\n<figure class=\"wp-block-image size-large\"><img loading=\"lazy\" decoding=\"async\" class=\"alignnone\" src=\"https:\/\/complex-systems-ai.com\/wp-content\/uploads\/2020\/03\/eval37.png\" alt=\"Quality on the number of clusters\" width=\"188\" height=\"50\" title=\"\"><\/figure>\n\t\t\t\t\t\t\t\t<\/div>\n\t\t\t\t<\/div>\n\t\t\t\t\t<\/div>\n\t\t<\/div>\n\t\t\t\t\t<\/div>\n\t\t<\/section>\n\t\t\t\t<\/div>","protected":false},"excerpt":{"rendered":"<p>Data Partitioning Wiki Home Page Quality on Number of Clusters A topic related to cluster validation is deciding whether the \u2026 <\/p>","protected":false},"author":1,"featured_media":0,"parent":8271,"menu_order":0,"comment_status":"closed","ping_status":"closed","template":"","meta":{"footnotes":""},"class_list":["post-8487","page","type-page","status-publish","hentry"],"amp_enabled":true,"_links":{"self":[{"href":"https:\/\/complex-systems-ai.com\/en\/wp-json\/wp\/v2\/pages\/8487","targetHints":{"allow":["GET"]}}],"collection":[{"href":"https:\/\/complex-systems-ai.com\/en\/wp-json\/wp\/v2\/pages"}],"about":[{"href":"https:\/\/complex-systems-ai.com\/en\/wp-json\/wp\/v2\/types\/page"}],"author":[{"embeddable":true,"href":"https:\/\/complex-systems-ai.com\/en\/wp-json\/wp\/v2\/users\/1"}],"replies":[{"embeddable":true,"href":"https:\/\/complex-systems-ai.com\/en\/wp-json\/wp\/v2\/comments?post=8487"}],"version-history":[{"count":8,"href":"https:\/\/complex-systems-ai.com\/en\/wp-json\/wp\/v2\/pages\/8487\/revisions"}],"predecessor-version":[{"id":19020,"href":"https:\/\/complex-systems-ai.com\/en\/wp-json\/wp\/v2\/pages\/8487\/revisions\/19020"}],"up":[{"embeddable":true,"href":"https:\/\/complex-systems-ai.com\/en\/wp-json\/wp\/v2\/pages\/8271"}],"wp:attachment":[{"href":"https:\/\/complex-systems-ai.com\/en\/wp-json\/wp\/v2\/media?parent=8487"}],"curies":[{"name":"wp","href":"https:\/\/api.w.org\/{rel}","templated":true}]}}