diff --git a/doc/specs/stdlib_strings.md b/doc/specs/stdlib_strings.md index d5f22519c..087589bbc 100644 --- a/doc/specs/stdlib_strings.md +++ b/doc/specs/stdlib_strings.md @@ -280,7 +280,7 @@ end program demo_slice Returns the starting index of the `occurrence`th occurrence of the substring `pattern` in the input string `string`. Default value of `occurrence` is set to `1`. -If `consider_overlapping` is not provided or is set to `.true.` the function counts two overlapping occurrences of substring as two different occurrences. +If `consider_overlapping` is not provided or is set to `.true.` the function counts two overlapping occurrences of substring `pattern` as two different occurrences. If `occurrence`th occurrence is not found, function returns `0`. #### Syntax @@ -308,7 +308,7 @@ Elemental function #### Result value -The result is a scalar of integer type or integer array of rank equal to the highest rank among all dummy arguments. +The result is a scalar of integer type or an integer array of rank equal to the highest rank among all dummy arguments. #### Example @@ -381,4 +381,56 @@ program demo_replace_all ! string <-- "technology here, technology there, technology everywhere" end program demo_replace_all -``` \ No newline at end of file +``` + + + +### `count` + +#### Description + +Returns the number of times the substring `pattern` has occurred in the input string `string`. +If `consider_overlapping` is not provided or is set to `.true.` the function counts two overlapping occurrences of substring `pattern` as two different occurrences. + +#### Syntax + +`string = [[stdlib_strings(module):count(interface)]] (string, pattern [, consider_overlapping])` + +#### Status + +Experimental + +#### Class + +Elemental function + +#### Argument + +- `string`: Character scalar or [[stdlib_string_type(module):string_type(type)]]. + This argument is intent(in). +- `pattern`: Character scalar or [[stdlib_string_type(module):string_type(type)]]. + This argument is intent(in). +- `consider_overlapping`: logical. + This argument is intent(in) and optional. + +#### Result value + +The result is a scalar of integer type or an integer array of rank equal to the highest rank among all dummy arguments. + +#### Example + +```fortran +program demo_count + use stdlib_string_type, only: string_type, assignment(=) + use stdlib_strings, only : count + implicit none + type(string_type) :: string + + string = "How much wood would a woodchuck chuck if a woodchuck could chuck wood?" + + print *, count(string, "wood") ! 4 + print *, count(string, ["would", "chuck", "could"]) ! [1, 4, 1] + print *, count("a long queueueueue", "ueu", [.false., .true.]) ! [2, 4] + +end program demo_count +``` diff --git a/src/stdlib_strings.f90 b/src/stdlib_strings.f90 index 6bfd8a630..2554dbfb1 100644 --- a/src/stdlib_strings.f90 +++ b/src/stdlib_strings.f90 @@ -12,7 +12,7 @@ module stdlib_strings public :: strip, chomp public :: starts_with, ends_with - public :: slice, find, replace_all + public :: slice, find, replace_all, count !> Remove leading and trailing whitespace characters. @@ -93,6 +93,18 @@ module stdlib_strings module procedure :: replace_all_char_char_char end interface replace_all + !> Version: experimental + !> + !> Returns the number of times substring 'pattern' has appeared in the + !> input string 'string' + !> [Specifications](../page/specs/stdlib_strings.html#count) + interface count + module procedure :: count_string_string + module procedure :: count_string_char + module procedure :: count_char_string + module procedure :: count_char_char + end interface count + contains @@ -443,9 +455,7 @@ elemental function find_char_char(string, pattern, occurrence, consider_overlapp logical, intent(in), optional :: consider_overlapping integer :: lps_array(len(pattern)) integer :: res, s_i, p_i, length_string, length_pattern, occurrence_ - logical :: consider_overlapping_ - consider_overlapping_ = optval(consider_overlapping, .true.) occurrence_ = optval(occurrence, 1) res = 0 length_string = len(string) @@ -464,7 +474,7 @@ elemental function find_char_char(string, pattern, occurrence, consider_overlapp if (occurrence_ == 0) then res = s_i - length_pattern + 1 exit - else if (consider_overlapping_) then + else if (optval(consider_overlapping, .true.)) then p_i = lps_array(p_i) else p_i = 0 @@ -649,4 +659,85 @@ pure function replace_all_char_char_char(string, pattern, replacement) result(re end function replace_all_char_char_char + !> Returns the number of times substring 'pattern' has appeared in the + !> input string 'string' + !> Returns an integer + elemental function count_string_string(string, pattern, consider_overlapping) result(res) + type(string_type), intent(in) :: string + type(string_type), intent(in) :: pattern + logical, intent(in), optional :: consider_overlapping + integer :: res + + res = count(char(string), char(pattern), consider_overlapping) + + end function count_string_string + + !> Returns the number of times substring 'pattern' has appeared in the + !> input string 'string' + !> Returns an integer + elemental function count_string_char(string, pattern, consider_overlapping) result(res) + type(string_type), intent(in) :: string + character(len=*), intent(in) :: pattern + logical, intent(in), optional :: consider_overlapping + integer :: res + + res = count(char(string), pattern, consider_overlapping) + + end function count_string_char + + !> Returns the number of times substring 'pattern' has appeared in the + !> input string 'string' + !> Returns an integer + elemental function count_char_string(string, pattern, consider_overlapping) result(res) + character(len=*), intent(in) :: string + type(string_type), intent(in) :: pattern + logical, intent(in), optional :: consider_overlapping + integer :: res + + res = count(string, char(pattern), consider_overlapping) + + end function count_char_string + + !> Returns the number of times substring 'pattern' has appeared in the + !> input string 'string' + !> Returns an integer + elemental function count_char_char(string, pattern, consider_overlapping) result(res) + character(len=*), intent(in) :: string + character(len=*), intent(in) :: pattern + logical, intent(in), optional :: consider_overlapping + integer :: lps_array(len(pattern)) + integer :: res, s_i, p_i, length_string, length_pattern + + res = 0 + length_string = len(string) + length_pattern = len(pattern) + + if (length_pattern > 0 .and. length_pattern <= length_string) then + lps_array = compute_lps(pattern) + + s_i = 1 + p_i = 1 + do while (s_i <= length_string) + if (string(s_i:s_i) == pattern(p_i:p_i)) then + if (p_i == length_pattern) then + res = res + 1 + if (optval(consider_overlapping, .true.)) then + p_i = lps_array(p_i) + else + p_i = 0 + end if + end if + s_i = s_i + 1 + p_i = p_i + 1 + else if (p_i > 1) then + p_i = lps_array(p_i - 1) + 1 + else + s_i = s_i + 1 + end if + end do + end if + + end function count_char_char + + end module stdlib_strings diff --git a/src/tests/string/test_string_functions.f90 b/src/tests/string/test_string_functions.f90 index d0afb745a..ca44d956d 100644 --- a/src/tests/string/test_string_functions.f90 +++ b/src/tests/string/test_string_functions.f90 @@ -4,7 +4,7 @@ module test_string_functions use stdlib_error, only : check use stdlib_string_type, only : string_type, assignment(=), operator(==), & to_lower, to_upper, to_title, to_sentence, reverse - use stdlib_strings, only: slice, find, replace_all + use stdlib_strings, only: slice, find, replace_all, count use stdlib_optval, only: optval use stdlib_ascii, only : to_string implicit none @@ -355,8 +355,8 @@ subroutine test_replace_all call check(replace_all(test_string_1, "TAT", "ATA") == & & "mutate DNA sequence: GTATACGATAGCCGTAATATA", & & "replace_all: 1 string_type & 2 character scalar, test case 1") - call check(replace_all("mutate DNA sequence: AGAGAGCCTAGAGAGAG", test_pattern_2, & - & "GC") == "mutate DNA sequence: GCGAGCCTGCGGCG", & + call check(replace_all("mutate DNA sequence: AGAGAGCCTAGAGAGAG", test_pattern_2, "GC") == & + & "mutate DNA sequence: GCGAGCCTGCGGCG", & & "replace_all: 1 string_type & 2 character scalar, test case 2") call check(replace_all("mutate DNA sequence: GTTATCGTATGCCGTAATTAT", "TA", & & test_replacement_2) == "mutate DNA sequence: GTagaTCGagaTGCCGagaATagaT", & @@ -378,6 +378,49 @@ subroutine test_replace_all end subroutine test_replace_all + subroutine test_count + type(string_type) :: test_string_1, test_string_2, test_pattern_1, test_pattern_2 + test_string_1 = "DNA sequence: AGAGAGAGTCCTGTCGAGA" + test_string_2 = "DNA sequence: GTCCTGTCCTGTCAGA" + test_pattern_1 = "AGA" + test_pattern_2 = "GTCCTGTC" + + ! all 2 as string_type + call check(all(count([test_string_1, test_string_2], test_pattern_1) == [4, 1]), & + & 'count: all 2 as string_type, test case 1') + call check(all(count(test_string_1, [test_pattern_1, test_pattern_2], .false.) == [3, 1]), & + & 'count: all 2 as string_type, test case 2') + call check(count(test_string_2, test_pattern_1, .false.) == 1, & + & 'count: all 2 as string_type, test case 3') + call check(all(count([test_string_2, test_string_2, test_string_1], & + & [test_pattern_2, test_pattern_2, test_pattern_1], [.true., .false., .false.]) == & + & [2, 1, 3]), 'count: all 2 as string_type, test case 4') + call check(all(count([[test_string_1, test_string_2], [test_string_1, test_string_2]], & + & [[test_pattern_1, test_pattern_2], [test_pattern_2, test_pattern_1]], .true.) == & + & [[4, 2], [1, 1]]), 'count: all 2 as string_type, test case 5') + + ! 1 string_type and 1 character scalar + call check(all(count(test_string_1, ["AGA", "GTC"], [.true., .false.]) == [4, 2]), & + & 'count: 1 string_type and 1 character scalar, test case 1') + call check(all(count([test_string_1, test_string_2], ["CTC", "GTC"], [.true., .false.]) == & + & [0, 3]), 'count: 1 string_type and 1 character scalar, test case 2') + call check(all(count(["AGAGAGAGTCCTGTCGAGA", "AGAGAGAGTCCTGTCGAGA"], & + & test_pattern_1, [.false., .true.]) == [3, 4]), & + & 'count: 1 string_type and 1 character scalar, test case 3') + call check(count(test_string_1, "GAG") == 4, & + & 'count: 1 string_type and 1 character scalar, test case 4') + call check(count("DNA sequence: GTCCTGTCCTGTCAGA", test_pattern_2, .false.) == 1, & + & 'count: 1 string_type and 1 character scalar, test case 5') + + ! all 2 character scalar + call check(all(count("", ["mango", "trees"], .true.) == [0, 0]), & + & 'count: all 2 character scalar, test case 1') + call check(count("", "", .true.) == 0, 'count: all 2 character scalar, test case 2') + call check(all(count(["mango", "trees"], "", .true.) == [0, 0]), & + & 'count: all 2 character scalar, test case 3') + + end subroutine test_count + end module test_string_functions @@ -394,5 +437,6 @@ program tester call test_slice_gen call test_find call test_replace_all + call test_count end program tester