Finding count of duplicate numbers in a sorted array


The algorithm for finding the count of duplicate elements makes use of binary search to find the first occurrence and the last occurrence of the element to be searched.

  • Finding the first occurrence
    If the searched element located at index mid and its previous element (i.e at index mid-1) match, binary search continues in the sorted space to the left side of index mid i.e from index beg till index mid-1. This way the search continues until the previous element is the same as the element to be searched. When the search terminates we get the index of the first occurrence.

  • Finding the last occurrence
    If the searched element located at index mid and its next element (i.e at index mid+1) matches the searched value, the search continues in the sorted space to the right side of mid i.e from index mid+1 till index end. When the search terminates we get the index of the last occurrence.

Β Β Β Β Β Β Thus, count of duplicate elements = Index of the last occurrence - Index of the first occurrence + 1


Example

Binary_Search_Duplicates

Time complexity : Log ( N ), as we use the binary search algorithm at the core for finding the count of duplicate numbers.

Why is mid calculated as mid = beg + (end-beg)/2 ?


Program for finding the count of duplicate numbers within a sorted array using binary search

def First ( array, n ) :
    
    beg = 0
    end = len(array) - 1

    while (beg <= end) :

        mid = int (beg + (end-beg)/2)

        if (array[mid] == n) :
            if (mid-1 >= 0 and array[mid-1] == n) :
               end = mid-1
               continue

            return mid 

        elif (array[mid] < n) :
            beg = mid + 1 
        else :
            end = mid - 1 

    return -1  

def Last (array, n) :
    
    beg = 0
    end = len(array)-1

    while (beg <= end) :

        mid = int(beg + (end-beg)/2)

        if (array[mid] == n) :
            if (mid+1 < len(array) and array[mid+1] == n) :
               beg = mid + 1 
               continue
            return mid 

        elif (array[mid] < n) :
            beg = mid + 1 
        else :
            end = mid - 1

    return -1; 

array = [ 1, 2, 3, 9, 9, 9, 9, 10, 10, 12, 13 ]
n = int (input("Enter the number : "))

first_index = First (array, n)
last_index  = Last (array, n)

if (first_index == -1 or last_index == - 1) :
    print("Element does not exist")
else :
    print("First occurrence of " + str(n) + " is at index : " + str(first_index))
    print("Last occurrence of " + str(n) + " is at index : " + str(last_index))
    print("Total count : "+ str(last_index - first_index + 1)) 

Output

Enter the number : 9
First occurrence of 9 is at index : 3
Last occurrence of 9 is at index : 6
Total count : 4

Enter the number : 10
First occurrence of 10 is at index : 7
Last occurrence of 10 is at index : 8
Total count : 2

Enter the number : 5
Element does not exist

Enter the number : 12
First occurrence of 12 is at index : 9
Last occurrence of 12 is at index : 9
Total count : 1

#include<iostream>
#include<vector>

using namespace std;

int First (vector<int>& vec, int n) {

    int beg = 0;
    int end = vec.size() - 1;

    while (beg <= end) {

        int mid = beg + (end-beg)/2;

        if (vec[mid] == n) {
            if (mid-1 >= 0 and vec[mid-1] == n) {
               end = mid-1;
               continue;
            }
            return mid;
        }

        else if (vec[mid] < n)
            beg = mid + 1;
        else
            end = mid - 1;
    }
    return -1;
}

int Last (vector<int>& vec, int n) {

    int beg = 0;
    int end = vec.size()-1;

    while (beg <= end) {

        int mid = beg + (end-beg)/2;

        if (vec[mid] == n) {
            if (mid+1 < vec.size() and  vec[mid+1] == n) {
               beg = mid + 1;
               continue;
            }
            return mid;
        }

        else if (vec[mid] < n)
            beg = mid + 1;
        else
            end = mid - 1;
    }
    return -1;
}

int main() {
   vector<int> vec = { 1, 2, 3, 9, 9, 9, 9, 10, 10, 12, 13 };

   int first_index, last_index, n;
   cout << "Enter the number : ";
   cin >> n;

   first_index = First (vec, n);
   last_index =  Last (vec, n);

   if (first_index == -1 or last_index == - 1) {
       cout << "Element does not exist" << endl;
   } else {
       cout << "First occurrence of " << n << " is at index : " << first_index << endl;
       cout << "Last occurrence of " << n << " is at index : " << last_index << endl;
       cout << "Total count : " << last_index - first_index + 1;
   }
   return 0;
}

Output

Enter the number : 13
First occurrence of 13 is at index : 10
Last occurrence of 13 is at index : 10
Total count : 1

Enter the number : 8
Element does not exist

Enter the number : 5
Element does not exist

Enter the number : 3
First occurrence of 3 is at index : 2
Last occurrence of 3 is at index : 2
Total count : 1

Enter the number : 9
First occurrence of 9 is at index : 3
Last occurrence of 9 is at index : 6
Total count : 4

public class FindDuplicates {

    int FirstOccurrence (int[] array, int n) {

        int beg = 0;
        int end = array.length - 1;

        while (beg <= end) {

            int mid = beg + (end-beg)/2;

            if (array[mid] == n) {
                if (mid-1 >= 0 && array[mid-1] == n) {
                   end = mid-1;
                   continue;
                }
                return mid;
            }

            else if (array[mid] < n)
                beg = mid + 1;
            else
                end = mid - 1;
        }
        return -1;
    }

    int LastOccurrence (int[] array, int n) {

        int beg = 0;
        int end = array.length - 1;

        while (beg <= end) {

            int mid = beg + (end-beg)/2;

            if (array[mid] == n) {
                if (mid+1 < array.length && array[mid+1] == n) {
                   beg = mid + 1;
                   continue;
                }
                return mid;
            }

            else if (array[mid] < n)
                beg = mid + 1;
            else
                end = mid - 1;
        }
        return -1;
    }

    public static void main (String[] args) {

        int[] array = { 1, 2, 3, 9, 9, 9, 9, 10, 10, 12, 13 };
        int first_index, last_index, n;
        n = 9;

        FindDuplicates fd = new FindDuplicates();

        first_index = fd.FirstOccurrence(array, n);
        last_index =  fd.LastOccurrence(array, n);

        if (first_index == -1 || last_index == - 1) {
            System.out.println("Element does not exist");
        } else {
            System.out.println("First Occurrence of " + n + " is at index : " + first_index);
            System.out.println("Last Occurrence of " + n + " is at index : " + last_index);
            System.out.println("Total Count : "+ (last_index - first_index + 1));
        }

        fd = null;
    }
}

Output

First Occurrence of 9 is at index : 3
Last Occurrence of 9 is at index : 6
Total Count : 4



Copyright (c) 2019-2022, Algotree.org.
All rights reserved.