libSUFR
a LIBrary of Some Useful Fortran Routines
All Classes Namespaces Files Functions Variables Pages
text_html.f90
Go to the documentation of this file.
1!> \file text_html.f90 Procedures to manipulate text strings containing HTML code
2
3
4! Copyright (c) 2002-2025 Marc van der Sluys - Nikhef/Utrecht University - marc.vandersluys.nl
5!
6! This file is part of the libSUFR package,
7! see: http://libsufr.sourceforge.net/
8!
9! This is free software: you can redistribute it and/or modify it under the terms of the European Union
10! Public Licence 1.2 (EUPL 1.2). This software is distributed in the hope that it will be useful, but
11! WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS FOR A PARTICULAR
12! PURPOSE. See the EU Public Licence for more details. You should have received a copy of the European
13! Union Public Licence along with this code. If not, see <https://www.eupl.eu/1.2/en/>.
14!
15!
16
17
18
19
20!***********************************************************************************************************************************
21!> \brief Procedures to manipulate text strings containing HTML code
22
24 implicit none
25 save
26
27contains
28
29
30 !*********************************************************************************************************************************
31 !> \brief Remove HTML code from a string using all remove_html_* subroutines available in libSUFR
32 !!
33 !! \param str String to remove HTML code from
34 !! \param debug Print debug info (T/F, optional)
35
36 subroutine remove_html(str, debug)
37 implicit none
38 character, intent(inout) :: str*(*)
39 logical, intent(in), optional :: debug
40
41 logical :: print_debug_info
42
43 print_debug_info = .false.
44 if(present(debug)) print_debug_info = debug
45
46 call remove_html_links(str, print_debug_info)
47 call remove_html_images(str, print_debug_info)
49 call remove_html_br_p_hr(str)
50
51 end subroutine remove_html
52 !*********************************************************************************************************************************
53
54
55 !*********************************************************************************************************************************
56 !> \brief Remove HTML links from a string (but keep the text in the link)
57 !!
58 !! \param str String to remove HTML code from
59 !! \param debug Print debug info (T/F, optional)
60
61 subroutine remove_html_links(str, debug)
63 implicit none
64 character, intent(inout) :: str*(*)
65 logical, intent(in), optional :: debug
66
67 integer :: l, i1,i2
68 character :: tstr*(len(str))
69 logical :: print_debug_info
70
71 print_debug_info = .false.
72 if(present(debug)) print_debug_info = debug
73
74 ! Remove '<a ...>':
75 i1 = -1
76 i2 = -1
77 do while(i1.ne.0) ! There may be multiple instances
78 l = len_trim(str)
79
80 i1 = index(str,'<a ',back=.false.)
81 if(i1.gt.0) i2 = index(str(i1:l),'>',back=.false.)
82 if(i1*i2.gt.0) then
83 tstr = str(1:i1-1)//str(i1+i2:l)
84 if(print_debug_info) then
85 print*,i1,i2,i1+i2,l
86 print*,str(1:i1-1)
87 print*,str(i1+i2:l)
88 end if
89 str = tstr
90 !return
91 end if
92 end do
93
94 !return
95
96 ! Remove '</a>':
97 call remove_substring(str, '</a>')
98
99 end subroutine remove_html_links
100 !*********************************************************************************************************************************
101
102
103
104 !*********************************************************************************************************************************
105 !> \brief Remove code for HTML images from a string
106 !!
107 !! \param str String to remove HTML code from
108 !! \param debug Print debug info (T/F, optional)
109
110 subroutine remove_html_images(str, debug)
111
112 implicit none
113 character, intent(inout) :: str*(*)
114 logical, intent(in), optional :: debug
115
116 integer :: l, i1,i2
117 character :: tstr*(len(str))
118 logical :: print_debug_info
119
120 print_debug_info = .false.
121 if(present(debug)) print_debug_info = debug
122
123 ! Remove '<img ...>'
124 i1 = -1
125 i2 = -1
126 do while(i1.ne.0) ! There may be multiple instances
127 l = len_trim(str)
128
129 i1 = index(str,'<img ',back=.false.)
130 if(i1.gt.0) i2 = index(str(i1:l),'>',back=.false.)
131 if(i1*i2.gt.0) then
132 tstr = str(1:i1-1)//str(i1+i2:l)
133 if(print_debug_info) then
134 print*,i1,i2,i1+i2,l
135 print*,str(1:i1-1)
136 print*,str(i1+i2:l)
137 end if
138 str = tstr
139 !return
140 end if
141 end do
142
143 end subroutine remove_html_images
144 !*********************************************************************************************************************************
145
146
147
148 !*********************************************************************************************************************************
149 !> \brief Remove HTML bold and italics from a string
150 !!
151 !! \param str String to remove HTML code from
152
154 use sufr_text, only: remove_substring
155
156 implicit none
157 character, intent(inout) :: str*(*)
158
159 call remove_substring(str, '<b>')
160 call remove_substring(str, '</b>')
161 call remove_substring(str, '<i>')
162 call remove_substring(str, '</i>')
163 call remove_substring(str, '<sub>')
164 call remove_substring(str, '</sub>')
165 call remove_substring(str, '<sup>')
166 call remove_substring(str, '</sup>')
167
168 end subroutine remove_html_bold_italics
169 !*********************************************************************************************************************************
170
171
172
173 !*********************************************************************************************************************************
174 !> \brief Remove HTML <br>, <p>...</p>, <hr> from a string - replace them by a space
175 !!
176 !! \param str String to remove HTML code from
177
178 subroutine remove_html_br_p_hr(str)
180
181 implicit none
182 character, intent(inout) :: str*(*)
183
184 call replace_substring(str, '<br>', ' ')
185 call replace_substring(str, '<p>', ' ')
186 call replace_substring(str, '</p>', ' ')
187 call replace_substring(str, '<hr>', ' ')
188
189 call replace_substring(str, '&nbsp;', ' ')
190 call replace_substring(str, '&ndash;', '-')
191 call replace_substring(str, '&mdash;', '-')
192
193 end subroutine remove_html_br_p_hr
194 !*********************************************************************************************************************************
195
196
197
198end module sufr_text_html
199!***********************************************************************************************************************************
200
Procedures to manipulate text strings containing HTML code.
Definition text_html.f90:23
subroutine remove_html_links(str, debug)
Remove HTML links from a string (but keep the text in the link)
Definition text_html.f90:62
subroutine remove_html(str, debug)
Remove HTML code from a string using all remove_html_* subroutines available in libSUFR.
Definition text_html.f90:37
subroutine remove_html_bold_italics(str)
Remove HTML bold and italics from a string.
subroutine remove_html_images(str, debug)
Remove code for HTML images from a string.
subroutine remove_html_br_p_hr(str)
Remove HTML ,.
Procedures to manipulate text/strings.
Definition text.f90:21
pure subroutine replace_substring(string, str_srch, str_repl)
Search and replace occurences of a substring in a string as often as the search string is found.
Definition text.f90:144
subroutine remove_substring(string, substr, debug)
Remove a substring from a string, if present.
Definition text.f90:222