Finding count of duplicate numbers in a sorted array


Algorithm for finding the count of duplicate elements makes use of binary search to find the first occurrence and the last occurrence of the element to be searched.

  • Finding the first occurrence
    If the searched element located at index mid and its previous element (i.e at index mid-1) match, binary search continues in the sorted space to the left side of index mid i.e from index beg till index mid-1. This way the search continues until the previous element is the same as the element to be searched. When the search terminates we get the index of the first occurrence.

  • Finding the last occurrence
    If the searched element located at index mid and its next element (i.e at index mid+1) matches the searched value, the search continues in the sorted space to the right side of mid i.e from index mid+1 till index end. When the search terminates we get the index of the last occurrence.

      Thus, count of duplicate elements = Index of the last occurrence - Index of the first occurrence + 1


Example

Binary_Search_Duplicates

Time complexity : Log ( N ), as we use the binary search algorithm at the core for finding the count of duplicate numbers.

Why is mid calculated as mid = beg + (end-beg)/2 ?


Program for finding the count of duplicate numbers within a sorted array using binary search

def FirstOccurrence ( array, n ) :
    
    beg = 0
    end = len(array) - 1

    while (beg <= end) :

        mid = int (beg + (end-beg)/2)

        if (array[mid] == n) :
            if (mid-1 >= 0 and array[mid-1] == n) :
               end = mid-1
               continue

            return mid 

        elif (array[mid] < n) :
            beg = mid + 1 
        else :
            end = mid - 1 

    return -1  

def LastOccurrence (array, n) :
    
    beg = 0
    end = len(array)-1

    while (beg <= end) :

        mid = int(beg + (end-beg)/2)

        if (array[mid] == n) :
            if (mid+1 < len(array) and array[mid+1] == n) :
               beg = mid + 1 
               continue
            return mid 

        elif (array[mid] < n) :
            beg = mid + 1 
        else :
            end = mid - 1

    return -1; 

array = [ 1, 2, 3, 9, 9, 9, 9, 10, 10, 12, 13 ]
n = int (input("Enter the number : "))

first_index = FirstOccurrence (array, n)
last_index  = LastOccurrence (array, n)

if (first_index == -1 or last_index == - 1) :
    print("Element does not exist")
else :
    print("First occurrence of " + str(n) + " is at index : " + str(first_index))
    print("Last occurrence of " + str(n) + " is at index : " + str(last_index))
    print("Total count : "+ str(last_index - first_index + 1)) 

Output

Enter the number : 9
First occurrence of 9 is at index : 3
Last occurrence of 9 is at index : 6
Total count : 4

Enter the number : 10
First occurrence of 10 is at index : 7
Last occurrence of 10 is at index : 8
Total count : 2

Enter the number : 5
Element does not exist

Enter the number : 12
First occurrence of 12 is at index : 9
Last occurrence of 12 is at index : 9
Total count : 1

#include<iostream>
#include<vector>

using namespace std;

int FirstOccurrence (vector<int> &, int);
int LastOccurrence (vector<int> &, int);

int FirstOccurrence (vector<int> &vec, int n) {

    int beg = 0;
    int end = vec.size() - 1;

    while (beg <= end) {

        int mid = beg + (end-beg)/2;

        if (vec[mid] == n) {
            if (mid-1 >= 0 and vec[mid-1] == n) {
               end = mid-1;
               continue;
            }
            return mid;
        }

        else if (vec[mid] < n)
            beg = mid + 1;
        else
            end = mid - 1;
    }
    return -1;
}

int LastOccurrence (vector<int> &vec, int n) {

    int beg = 0;
    int end = vec.size()-1;

    while (beg <= end) {

        int mid = beg + (end-beg)/2;

        if (vec[mid] == n) {
            if (mid+1 < vec.size() and  vec[mid+1] == n) {
               beg = mid + 1;
               continue;
            }
            return mid;
        }

        else if (vec[mid] < n)
            beg = mid + 1;
        else
            end = mid - 1;
    }
    return -1;
}

int main()
{
   vector<int> vec = { 1, 2, 3, 9, 9, 9, 9, 10, 10, 12, 13 };

   int first_index, last_index, n;
   cout << "Enter the number : ";
   cin >> n;

   first_index = FirstOccurrence (vec, n);
   last_index =  LastOccurrence (vec, n);

   if (first_index == -1 or last_index == - 1) {
       cout << "Element does not exist" << endl;
   } else {
       cout << "First occurrence of " << n << " is at index : " << first_index << endl;
       cout << "Last occurrence of " << n << " is at index : " << last_index << endl;
       cout << "Total count : " << last_index - first_index + 1;
   }
   return 0;
}

Output

Enter the number : 13
First occurrence of 13 is at index : 10
Last occurrence of 13 is at index : 10
Total count : 1

Enter the number : 8
Element does not exist

Enter the number : 5
Element does not exist

Enter the number : 3
First occurrence of 3 is at index : 2
Last occurrence of 3 is at index : 2
Total count : 1

Enter the number : 9
First occurrence of 9 is at index : 3
Last occurrence of 9 is at index : 6
Total count : 4

public class FindDuplicates {

    int FirstOccurrence (int[] array, int n) {

        int beg = 0;
        int end = array.length - 1;

        while (beg <= end) {

            int mid = beg + (end-beg)/2;

            if (array[mid] == n) {
                if (mid-1 >= 0 && array[mid-1] == n) {
                   end = mid-1;
                   continue;
                }
                return mid;
            }

            else if (array[mid] < n)
                beg = mid + 1;
            else
                end = mid - 1;
        }
        return -1;
    }

    int LastOccurrence (int[] array, int n) {

        int beg = 0;
        int end = array.length - 1;

        while (beg <= end) {

            int mid = beg + (end-beg)/2;

            if (array[mid] == n) {
                if (mid+1 < array.length && array[mid+1] == n) {
                   beg = mid + 1;
                   continue;
                }
                return mid;
            }

            else if (array[mid] < n)
                beg = mid + 1;
            else
                end = mid - 1;
        }
        return -1;
    }

    public static void main(String[] args) {

        int[] array = { 1, 2, 3, 9, 9, 9, 9, 10, 10, 12, 13 };
        int first_index, last_index, n;
        n = 9;

        FindDuplicates fd = new FindDuplicates();

        first_index = fd.FirstOccurrence(array, n);
        last_index =  fd.LastOccurrence(array, n);

        if (first_index == -1 || last_index == - 1) {
            System.out.println("Element does not exist");
        } else {
            System.out.println("First Occurrence of " + n + " is at index : " + first_index);
            System.out.println("Last Occurrence of " + n + " is at index : " + last_index);
            System.out.println("Total Count : "+ (last_index - first_index + 1));
        }

        fd = null;
    }
}

Output

First Occurrence of 9 is at index : 3
Last Occurrence of 9 is at index : 6
Total Count : 4



Copyright (c) 2019-2021, Algotree.org.
All rights reserved.