1   /*
2    * Licensed to the Apache Software Foundation (ASF) under one or more
3    * contributor license agreements.  See the NOTICE file distributed with
4    * this work for additional information regarding copyright ownership.
5    * The ASF licenses this file to You under the Apache License, Version 2.0
6    * (the "License"); you may not use this file except in compliance with
7    * the License.  You may obtain a copy of the License at
8    *
9    *      http://www.apache.org/licenses/LICENSE-2.0
10   *
11   * Unless required by applicable law or agreed to in writing, software
12   * distributed under the License is distributed on an "AS IS" BASIS,
13   * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
14   * See the License for the specific language governing permissions and
15   * limitations under the License.
16   */
17  package org.apache.commons.fileupload2.core;
18  
19  import java.io.IOException;
20  import java.nio.charset.Charset;
21  import java.nio.charset.StandardCharsets;
22  import java.util.ArrayList;
23  import java.util.HashMap;
24  import java.util.List;
25  import java.util.Locale;
26  import java.util.Map;
27  import java.util.Objects;
28  
29  import org.apache.commons.fileupload2.core.FileItemFactory.AbstractFileItemBuilder;
30  import org.apache.commons.io.IOUtils;
31  
32  /**
33   * High level API for processing file uploads.
34   * <p>
35   * This class handles multiple files per single HTML widget, sent using {@code multipart/mixed} encoding type, as specified by
36   * <a href="http://www.ietf.org/rfc/rfc1867.txt">RFC 1867</a>. Use {@link #parseRequest(RequestContext)} to acquire a list of {@link FileItem}s associated with
37   * a given HTML widget.
38   * </p>
39   * <p>
40   * How the data for individual parts is stored is determined by the factory used to create them; a given part may be in memory, on disk, or somewhere else.
41   * </p>
42   *
43   * @param <R> The request context type.
44   * @param <I> The FileItem type.
45   * @param <F> the FileItemFactory type.
46   */
47  public abstract class AbstractFileUpload<R, I extends FileItem<I>, F extends FileItemFactory<I>> {
48  
49      /**
50       * Boundary parameter key.
51       */
52      private static final String BOUNDARY_KEY = "boundary";
53  
54      /**
55       * Name parameter key.
56       */
57      private static final String NAME_KEY = "name";
58  
59      /**
60       * File name parameter key.
61       */
62      private static final String FILENAME_KEY = "filename";
63  
64      /**
65       * HTTP content type header name.
66       */
67      public static final String CONTENT_TYPE = "Content-type";
68  
69      /**
70       * HTTP content disposition header name.
71       */
72      public static final String CONTENT_DISPOSITION = "Content-disposition";
73  
74      /**
75       * HTTP content length header name.
76       */
77      public static final String CONTENT_LENGTH = "Content-length";
78  
79      /**
80       * Content-disposition value for form data.
81       */
82      public static final String FORM_DATA = "form-data";
83  
84      /**
85       * Content-disposition value for file attachment.
86       */
87      public static final String ATTACHMENT = "attachment";
88  
89      /**
90       * Part of HTTP content type header.
91       */
92      public static final String MULTIPART = "multipart/";
93  
94      /**
95       * HTTP content type header for multipart forms.
96       */
97      public static final String MULTIPART_FORM_DATA = "multipart/form-data";
98  
99      /**
100      * HTTP content type header for multiple uploads.
101      */
102     public static final String MULTIPART_MIXED = "multipart/mixed";
103 
104     /**
105      * Utility method that determines whether the request contains multipart content.
106      * <p>
107      * <strong>NOTE:</strong> This method will be moved to the {@code ServletFileUpload} class after the FileUpload 1.1 release. Unfortunately, since this
108      * method is static, it is not possible to provide its replacement until this method is removed.
109      * </p>
110      *
111      * @param ctx The request context to be evaluated. Must be non-null.
112      * @return {@code true} if the request is multipart; {@code false} otherwise.
113      */
114     public static final boolean isMultipartContent(final RequestContext ctx) {
115         final var contentType = ctx.getContentType();
116         if (contentType == null) {
117             return false;
118         }
119         return contentType.toLowerCase(Locale.ENGLISH).startsWith(MULTIPART);
120     }
121 
122     /**
123      * The maximum size permitted for the complete request, as opposed to {@link #fileSizeMax}. A value of -1 indicates no maximum.
124      */
125     private long sizeMax = -1;
126 
127     /**
128      * The maximum size permitted for a single uploaded file, as opposed to {@link #sizeMax}. A value of -1 indicates no maximum.
129      */
130     private long fileSizeMax = -1;
131 
132     /**
133      * The maximum permitted number of files that may be uploaded in a single request. A value of -1 indicates no maximum.
134      */
135     private long fileCountMax = -1;
136 
137     /**
138      * The content encoding to use when reading part headers.
139      */
140     private Charset headerCharset;
141 
142     /**
143      * The progress listener.
144      */
145     private ProgressListener progressListener = ProgressListener.NOP;
146 
147     /**
148      * The factory to use to create new form items.
149      */
150     private F fileItemFactory;
151 
152     /**
153      * Gets the boundary from the {@code Content-type} header.
154      *
155      * @param contentType The value of the content type header from which to extract the boundary value.
156      * @return The boundary, as a byte array.
157      */
158     public byte[] getBoundary(final String contentType) {
159         final var parser = new ParameterParser();
160         parser.setLowerCaseNames(true);
161         // Parameter parser can handle null input
162         final var params = parser.parse(contentType, new char[] { ';', ',' });
163         final var boundaryStr = params.get(BOUNDARY_KEY);
164         return boundaryStr != null ? boundaryStr.getBytes(StandardCharsets.ISO_8859_1) : null;
165     }
166 
167     /**
168      * Gets the field name from the {@code Content-disposition} header.
169      *
170      * @param headers A {@code Map} containing the HTTP request headers.
171      * @return The field name for the current {@code encapsulation}.
172      */
173     public String getFieldName(final FileItemHeaders headers) {
174         return getFieldName(headers.getHeader(CONTENT_DISPOSITION));
175     }
176 
177     /**
178      * Gets the field name, which is given by the content-disposition header.
179      *
180      * @param contentDisposition The content-dispositions header value.
181      * @return The field name.
182      */
183     private String getFieldName(final String contentDisposition) {
184         String fieldName = null;
185         if (contentDisposition != null && contentDisposition.toLowerCase(Locale.ENGLISH).startsWith(FORM_DATA)) {
186             final var parser = new ParameterParser();
187             parser.setLowerCaseNames(true);
188             // Parameter parser can handle null input
189             final var params = parser.parse(contentDisposition, ';');
190             fieldName = params.get(NAME_KEY);
191             if (fieldName != null) {
192                 fieldName = fieldName.trim();
193             }
194         }
195         return fieldName;
196     }
197 
198     /**
199      * Gets the maximum number of files allowed in a single request.
200      *
201      * @return The maximum number of files allowed in a single request.
202      */
203     public long getFileCountMax() {
204         return fileCountMax;
205     }
206 
207     /**
208      * Gets the factory class used when creating file items.
209      *
210      * @return The factory class for new file items.
211      */
212     public F getFileItemFactory() {
213         return fileItemFactory;
214     }
215 
216     /**
217      * Gets the file name from the {@code Content-disposition} header.
218      *
219      * @param headers The HTTP headers object.
220      *
221      * @return The file name for the current {@code encapsulation}.
222      */
223     public String getFileName(final FileItemHeaders headers) {
224         return getFileName(headers.getHeader(CONTENT_DISPOSITION));
225     }
226 
227     /**
228      * Gets the given content-disposition headers file name.
229      *
230      * @param contentDisposition The content-disposition headers value.
231      * @return The file name
232      */
233     private String getFileName(final String contentDisposition) {
234         String fileName = null;
235         if (contentDisposition != null) {
236             final var cdl = contentDisposition.toLowerCase(Locale.ENGLISH);
237             if (cdl.startsWith(FORM_DATA) || cdl.startsWith(ATTACHMENT)) {
238                 final var parser = new ParameterParser();
239                 parser.setLowerCaseNames(true);
240                 // Parameter parser can handle null input
241                 final var params = parser.parse(contentDisposition, ';');
242                 if (params.containsKey(FILENAME_KEY)) {
243                     fileName = params.get(FILENAME_KEY);
244                     if (fileName != null) {
245                         fileName = fileName.trim();
246                     } else {
247                         // Even if there is no value, the parameter is present,
248                         // so we return an empty file name rather than no file
249                         // name.
250                         fileName = "";
251                     }
252                 }
253             }
254         }
255         return fileName;
256     }
257 
258     /**
259      * Gets the maximum allowed size of a single uploaded file, as opposed to {@link #getSizeMax()}.
260      *
261      * @see #setFileSizeMax(long)
262      * @return Maximum size of a single uploaded file.
263      */
264     public long getFileSizeMax() {
265         return fileSizeMax;
266     }
267 
268     /**
269      * Gets the character encoding used when reading the headers of an individual part. When not specified, or {@code null}, the request encoding is used. If
270      * that is also not specified, or {@code null}, the platform default encoding is used.
271      *
272      * @return The encoding used to read part headers.
273      */
274     public Charset getHeaderCharset() {
275         return headerCharset;
276     }
277 
278     /**
279      * Gets a file item iterator.
280      *
281      * @param request The servlet request to be parsed.
282      * @return An iterator to instances of {@code FileItemInput} parsed from the request, in the order that they were transmitted.
283      * @throws FileUploadException if there are problems reading/parsing the request or storing files.
284      * @throws IOException         An I/O error occurred. This may be a network error while communicating with the client or a problem while storing the
285      *                             uploaded content.
286      */
287     public abstract FileItemInputIterator getItemIterator(R request) throws FileUploadException, IOException;
288 
289     /**
290      * Gets an <a href="http://www.ietf.org/rfc/rfc1867.txt">RFC 1867</a> compliant {@code multipart/form-data} stream.
291      *
292      * @param requestContext The context for the request to be parsed.
293      * @return An iterator to instances of {@code FileItemInput} parsed from the request, in the order that they were transmitted.
294      * @throws FileUploadException if there are problems reading/parsing the request or storing files.
295      * @throws IOException         An I/O error occurred. This may be a network error while communicating with the client or a problem while storing the
296      *                             uploaded content.
297      */
298     public FileItemInputIterator getItemIterator(final RequestContext requestContext) throws FileUploadException, IOException {
299         return new FileItemInputIteratorImpl(this, requestContext);
300     }
301 
302     /**
303      * Parses the {@code header-part} and returns as key/value pairs.
304      * <p>
305      * If there are multiple headers of the same names, the name will map to a comma-separated list containing the values.
306      * </p>
307      *
308      * @param headerPart The {@code header-part} of the current {@code encapsulation}.
309      * @return A {@code Map} containing the parsed HTTP request headers.
310      */
311     public FileItemHeaders getParsedHeaders(final String headerPart) {
312         final var len = headerPart.length();
313         final var headers = newFileItemHeaders();
314         var start = 0;
315         for (;;) {
316             var end = parseEndOfLine(headerPart, start);
317             if (start == end) {
318                 break;
319             }
320             final var header = new StringBuilder(headerPart.substring(start, end));
321             start = end + 2;
322             while (start < len) {
323                 var nonWs = start;
324                 while (nonWs < len) {
325                     final var c = headerPart.charAt(nonWs);
326                     if (c != ' ' && c != '\t') {
327                         break;
328                     }
329                     ++nonWs;
330                 }
331                 if (nonWs == start) {
332                     break;
333                 }
334                 // Continuation line found
335                 end = parseEndOfLine(headerPart, nonWs);
336                 header.append(' ').append(headerPart, nonWs, end);
337                 start = end + 2;
338             }
339             parseHeaderLine(headers, header.toString());
340         }
341         return headers;
342     }
343 
344     /**
345      * Gets the progress listener.
346      *
347      * @return The progress listener, if any, or null.
348      */
349     public ProgressListener getProgressListener() {
350         return progressListener;
351     }
352 
353     /**
354      * Gets the maximum allowed size of a complete request, as opposed to {@link #getFileSizeMax()}.
355      *
356      * @return The maximum allowed size, in bytes. The default value of -1 indicates, that there is no limit.
357      * @see #setSizeMax(long)
358      *
359      */
360     public long getSizeMax() {
361         return sizeMax;
362     }
363 
364     /**
365      * Creates a new instance of {@link FileItemHeaders}.
366      *
367      * @return The new instance.
368      */
369     protected FileItemHeaders newFileItemHeaders() {
370         return AbstractFileItemBuilder.newFileItemHeaders();
371     }
372 
373     /**
374      * Skips bytes until the end of the current line.
375      *
376      * @param headerPart The headers, which are being parsed.
377      * @param end        Index of the last byte, which has yet been processed.
378      * @return Index of the \r\n sequence, which indicates end of line.
379      */
380     private int parseEndOfLine(final String headerPart, final int end) {
381         var index = end;
382         for (;;) {
383             final var offset = headerPart.indexOf('\r', index);
384             if (offset == -1 || offset + 1 >= headerPart.length()) {
385                 throw new IllegalStateException("Expected headers to be terminated by an empty line.");
386             }
387             if (headerPart.charAt(offset + 1) == '\n') {
388                 return offset;
389             }
390             index = offset + 1;
391         }
392     }
393 
394     /**
395      * Parses the next header line.
396      *
397      * @param headers String with all headers.
398      * @param header  Map where to store the current header.
399      */
400     private void parseHeaderLine(final FileItemHeaders headers, final String header) {
401         final var colonOffset = header.indexOf(':');
402         if (colonOffset == -1) {
403             // This header line is malformed, skip it.
404             return;
405         }
406         final var headerName = header.substring(0, colonOffset).trim();
407         final var headerValue = header.substring(colonOffset + 1).trim();
408         headers.addHeader(headerName, headerValue);
409     }
410 
411     /**
412      * Parses an <a href="http://www.ietf.org/rfc/rfc1867.txt">RFC 1867</a> compliant {@code multipart/form-data} stream.
413      *
414      * @param request The servlet request to be parsed.
415      * @return A map of {@code FileItem} instances parsed from the request.
416      * @throws FileUploadException if there are problems reading/parsing the request or storing files.
417      */
418     public abstract Map<String, List<I>> parseParameterMap(R request) throws FileUploadException;
419 
420     /**
421      * Parses an <a href="http://www.ietf.org/rfc/rfc1867.txt">RFC 1867</a> compliant {@code multipart/form-data} stream.
422      *
423      * @param ctx The context for the request to be parsed.
424      * @return A map of {@code FileItem} instances parsed from the request.
425      * @throws FileUploadException if there are problems reading/parsing the request or storing files.
426      */
427     public Map<String, List<I>> parseParameterMap(final RequestContext ctx) throws FileUploadException {
428         final var items = parseRequest(ctx);
429         final Map<String, List<I>> itemsMap = new HashMap<>(items.size());
430 
431         for (final I fileItem : items) {
432             final var fieldName = fileItem.getFieldName();
433             final var mappedItems = itemsMap.computeIfAbsent(fieldName, k -> new ArrayList<>());
434             mappedItems.add(fileItem);
435         }
436 
437         return itemsMap;
438     }
439 
440     /**
441      * Parses an <a href="http://www.ietf.org/rfc/rfc1867.txt">RFC 1867</a> compliant {@code multipart/form-data} stream.
442      *
443      * @param request The servlet request to be parsed.
444      * @return A list of {@code FileItem} instances parsed from the request, in the order that they were transmitted.
445      * @throws FileUploadException if there are problems reading/parsing the request or storing files.
446      */
447     public abstract List<I> parseRequest(R request) throws FileUploadException;
448 
449     /**
450      * Parses an <a href="http://www.ietf.org/rfc/rfc1867.txt">RFC 1867</a> compliant {@code multipart/form-data} stream.
451      *
452      * @param requestContext The context for the request to be parsed.
453      * @return A list of {@code FileItem} instances parsed from the request, in the order that they were transmitted.
454      * @throws FileUploadException if there are problems reading/parsing the request or storing files.
455      */
456     public List<I> parseRequest(final RequestContext requestContext) throws FileUploadException {
457         final List<I> itemList = new ArrayList<>();
458         var successful = false;
459         try {
460             final var fileItemFactory = Objects.requireNonNull(getFileItemFactory(), "No FileItemFactory has been set.");
461             final var buffer = new byte[IOUtils.DEFAULT_BUFFER_SIZE];
462             getItemIterator(requestContext).forEachRemaining(fileItemInput -> {
463                 if (itemList.size() == fileCountMax) {
464                     // The next item will exceed the limit.
465                     throw new FileUploadFileCountLimitException(ATTACHMENT, getFileCountMax(), itemList.size());
466                 }
467                 // Don't use getName() here to prevent an InvalidFileNameException.
468                 // @formatter:off
469                 final var fileItem = fileItemFactory.fileItemBuilder()
470                     .setFieldName(fileItemInput.getFieldName())
471                     .setContentType(fileItemInput.getContentType())
472                     .setFormField(fileItemInput.isFormField())
473                     .setFileName(fileItemInput.getName())
474                     .setFileItemHeaders(fileItemInput.getHeaders())
475                     .get();
476                 // @formatter:on
477                 itemList.add(fileItem);
478                 try (var inputStream = fileItemInput.getInputStream();
479                         var outputStream = fileItem.getOutputStream()) {
480                     IOUtils.copyLarge(inputStream, outputStream, buffer);
481                 } catch (final FileUploadException e) {
482                     throw e;
483                 } catch (final IOException e) {
484                     throw new FileUploadException(String.format("Processing of %s request failed. %s", MULTIPART_FORM_DATA, e.getMessage()), e);
485                 }
486             });
487             successful = true;
488             return itemList;
489         } catch (final FileUploadException e) {
490             throw e;
491         } catch (final IOException e) {
492             throw new FileUploadException(e.getMessage(), e);
493         } finally {
494             if (!successful) {
495                 for (final I fileItem : itemList) {
496                     try {
497                         fileItem.delete();
498                     } catch (final Exception ignored) {
499                         // ignored TODO perhaps add to tracker delete failure list somehow?
500                     }
501                 }
502             }
503         }
504     }
505 
506     /**
507      * Sets the maximum number of files allowed per request.
508      *
509      * @param fileCountMax The new limit. {@code -1} means no limit.
510      */
511     public void setFileCountMax(final long fileCountMax) {
512         this.fileCountMax = fileCountMax;
513     }
514 
515     /**
516      * Sets the factory class to use when creating file items.
517      *
518      * @param factory The factory class for new file items.
519      */
520     public void setFileItemFactory(final F factory) {
521         this.fileItemFactory = factory;
522     }
523 
524     /**
525      * Sets the maximum allowed size of a single uploaded file, as opposed to {@link #getSizeMax()}.
526      *
527      * @see #getFileSizeMax()
528      * @param fileSizeMax Maximum size of a single uploaded file.
529      */
530     public void setFileSizeMax(final long fileSizeMax) {
531         this.fileSizeMax = fileSizeMax;
532     }
533 
534     /**
535      * Specifies the character encoding to be used when reading the headers of individual part. When not specified, or {@code null}, the request encoding is
536      * used. If that is also not specified, or {@code null}, the platform default encoding is used.
537      *
538      * @param headerCharset The encoding used to read part headers.
539      */
540     public void setHeaderCharset(final Charset headerCharset) {
541         this.headerCharset = headerCharset;
542     }
543 
544     /**
545      * Sets the progress listener.
546      *
547      * @param progressListener The progress listener, if any. Defaults to null.
548      */
549     public void setProgressListener(final ProgressListener progressListener) {
550         this.progressListener = progressListener != null ? progressListener : ProgressListener.NOP;
551     }
552 
553     /**
554      * Sets the maximum allowed size of a complete request, as opposed to {@link #setFileSizeMax(long)}.
555      *
556      * @param sizeMax The maximum allowed size, in bytes. The default value of -1 indicates, that there is no limit.
557      * @see #getSizeMax()
558      */
559     public void setSizeMax(final long sizeMax) {
560         this.sizeMax = sizeMax;
561     }
562 
563 }