1   /**
2    * Copyright (c) 2000-2008 Liferay, Inc. All rights reserved.
3    *
4    * Permission is hereby granted, free of charge, to any person obtaining a copy
5    * of this software and associated documentation files (the "Software"), to deal
6    * in the Software without restriction, including without limitation the rights
7    * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
8    * copies of the Software, and to permit persons to whom the Software is
9    * furnished to do so, subject to the following conditions:
10   *
11   * The above copyright notice and this permission notice shall be included in
12   * all copies or substantial portions of the Software.
13   *
14   * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
15   * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
16   * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
17   * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
18   * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
19   * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
20   * SOFTWARE.
21   */
22  
23  package com.liferay.util;
24  
25  import com.liferay.portal.kernel.util.StringMaker;
26  import com.liferay.portal.kernel.util.StringPool;
27  import com.liferay.portal.kernel.util.StringUtil;
28  
29  /**
30   * <a href="Html.java.html"><b><i>View Source</i></b></a>
31   *
32   * @author Brian Wing Shun Chan
33   * @author Clarence Shen
34   * @author Harry Mark
35   *
36   */
37  public class Html {
38  
39      public static String escape(String text) {
40          if (text == null) {
41              return null;
42          }
43  
44          // Escape using XSS recommendations from
45          // http://www.owasp.org/index.php/Cross_Site_Scripting
46          // #How_to_Protect_Yourself
47  
48          StringMaker sm = new StringMaker(text.length());
49  
50          for (int i = 0; i < text.length(); i++) {
51              char c = text.charAt(i);
52  
53              switch (c) {
54                  case '<':
55                      sm.append("&lt;");
56  
57                      break;
58  
59                  case '>':
60                      sm.append("&gt;");
61  
62                      break;
63  
64                  case '&':
65                      sm.append("&amp;");
66  
67                      break;
68  
69                  case '"':
70                      sm.append("&#034;");
71  
72                      break;
73  
74                  case '\'':
75                      sm.append("&#039;");
76  
77                      break;
78  
79                  case '(':
80                      sm.append("&#040;");
81  
82                      break;
83  
84                  case ')':
85                      sm.append("&#041;");
86  
87                      break;
88  
89                  case '#':
90                      sm.append("&#035;");
91  
92                      break;
93  
94                  case '%':
95                      sm.append("&#037;");
96  
97                      break;
98  
99                  case ';':
100                     sm.append("&#059;");
101 
102                     break;
103 
104                 case '+':
105                     sm.append("&#043;");
106 
107                     break;
108 
109                 case '-':
110                     sm.append("&#045;");
111 
112                     break;
113 
114                 default:
115                     sm.append(c);
116 
117                     break;
118             }
119         }
120 
121         return sm.toString();
122     }
123 
124     public static String fromInputSafe(String text) {
125         return StringUtil.replace(text, "&amp;", "&");
126     }
127 
128     public static String replaceMsWordCharacters(String text) {
129         return StringUtil.replace(text, _MS_WORD_UNICODE, _MS_WORD_HTML);
130     }
131 
132     public static String stripBetween(String text, String tag) {
133         return StringUtil.stripBetween(text, "<" + tag, "</" + tag + ">");
134     }
135 
136     public static String stripComments(String text) {
137         return StringUtil.stripBetween(text, "<!--", "-->");
138     }
139 
140     public static String stripHtml(String text) {
141         if (text == null) {
142             return null;
143         }
144 
145         text = stripComments(text);
146 
147         StringMaker sm = new StringMaker(text.length());
148 
149         int x = 0;
150         int y = text.indexOf("<");
151 
152         while (y != -1) {
153             sm.append(text.substring(x, y));
154             sm.append(StringPool.SPACE);
155 
156             // Look for text enclosed by <script></script>
157 
158             boolean scriptFound = _isScriptTag(text, y + 1);
159 
160             if (scriptFound) {
161                 int pos = y + _TAG_SCRIPT.length;
162 
163                 // Find end of the tag
164 
165                 pos = text.indexOf(">", pos);
166 
167                 if (pos >= 0) {
168 
169                     // Check if preceding character is / (i.e. is this instance
170                     // of <script/>)
171 
172                     if (text.charAt(pos-1) != '/') {
173 
174                         // Search for the ending </script> tag
175 
176                         for (;;) {
177                             pos = text.indexOf("</", pos);
178 
179                             if (pos >= 0) {
180                                 if (_isScriptTag(text, pos + 2)) {
181                                     y = pos;
182 
183                                     break;
184                                 }
185                                 else {
186 
187                                     // Skip past "</"
188 
189                                     pos += 2;
190                                 }
191                             }
192                             else {
193                                 break;
194                             }
195                         }
196                     }
197                 }
198             }
199 
200             x = text.indexOf(">", y);
201 
202             if (x == -1) {
203                 break;
204             }
205 
206             x++;
207 
208             if (x < y) {
209 
210                 // <b>Hello</b
211 
212                 break;
213             }
214 
215             y = text.indexOf("<", x);
216         }
217 
218         if (y == -1) {
219             sm.append(text.substring(x, text.length()));
220         }
221 
222         return sm.toString();
223     }
224 
225     public static String toInputSafe(String text) {
226         return StringUtil.replace(
227             text,
228             new String[] {"&", "\""},
229             new String[] {"&amp;", "&quot;"});
230     }
231 
232     public static String unescape(String text) {
233         if (text == null) {
234             return null;
235         }
236 
237         // Optimize this
238 
239         text = StringUtil.replace(text, "&lt;", "<");
240         text = StringUtil.replace(text, "&gt;", ">");
241         text = StringUtil.replace(text, "&amp;", "&");
242         text = StringUtil.replace(text, "&#034;", "\"");
243         text = StringUtil.replace(text, "&#039;", "'");
244         text = StringUtil.replace(text, "&#040;", "(");
245         text = StringUtil.replace(text, "&#041;", ")");
246         text = StringUtil.replace(text, "&#035;", "#");
247         text = StringUtil.replace(text, "&#037;", "%");
248         text = StringUtil.replace(text, "&#059;", ";");
249         text = StringUtil.replace(text, "&#043;", "+");
250         text = StringUtil.replace(text, "&#045;", "-");
251 
252         return text;
253     }
254 
255     private static boolean _isScriptTag(String text, int start) {
256         char item;
257         int pos = start;
258 
259         if (pos + _TAG_SCRIPT.length + 1 <= text.length()) {
260             for (int i = 0; i < _TAG_SCRIPT.length; i++) {
261                 item = text.charAt(pos++);
262 
263                 if (Character.toLowerCase(item) != _TAG_SCRIPT[i]) {
264                     return false;
265                 }
266             }
267 
268             item = text.charAt(pos);
269 
270             // Check that char after "script" is not a letter (i.e. another tag)
271 
272             return !Character.isLetter(item);
273         }
274         else {
275             return false;
276         }
277     }
278 
279     private static final String[] _MS_WORD_UNICODE = new String[] {
280         "\u00ae", "\u2019", "\u201c", "\u201d"
281     };
282 
283     private static final String[] _MS_WORD_HTML = new String[] {
284         "&reg;", StringPool.APOSTROPHE, StringPool.QUOTE, StringPool.QUOTE
285     };
286 
287     private static final char[] _TAG_SCRIPT = {'s', 'c', 'r', 'i', 'p', 't'};
288 
289 }