DefaultDownloader.java
- /*
- * *********************************************************************************************************************
- *
- * blueMarine II: Semantic Media Centre
- * http://tidalwave.it/projects/bluemarine2
- *
- * Copyright (C) 2015 - 2021 by Tidalwave s.a.s. (http://tidalwave.it)
- *
- * *********************************************************************************************************************
- *
- * Licensed under the Apache License, Version 2.0 (the "License"); you may not use this file except in compliance with
- * the License. You may obtain a copy of the License at
- *
- * http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software distributed under the License is distributed on
- * an "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the License for the
- * specific language governing permissions and limitations under the License.
- *
- * *********************************************************************************************************************
- *
- * git clone https://bitbucket.org/tidalwave/bluemarine2-src
- * git clone https://github.com/tidalwave-it/bluemarine2-src
- *
- * *********************************************************************************************************************
- */
- package it.tidalwave.bluemarine2.downloader.impl;
- import javax.annotation.Nonnull;
- import javax.annotation.PostConstruct;
- import javax.inject.Inject;
- import java.util.List;
- import java.util.Date;
- import java.io.ByteArrayOutputStream;
- import java.io.IOException;
- import java.net.URISyntaxException;
- import java.net.URL;
- import it.tidalwave.util.annotation.VisibleForTesting;
- import org.apache.http.HttpRequest;
- import org.apache.http.HttpResponse;
- import org.apache.http.ProtocolException;
- import org.apache.http.client.RedirectStrategy;
- import org.apache.http.client.cache.CacheResponseStatus;
- import org.apache.http.client.cache.HttpCacheContext;
- import org.apache.http.client.cache.HttpCacheEntry;
- import org.apache.http.client.cache.Resource;
- import org.apache.http.client.methods.CloseableHttpResponse;
- import org.apache.http.client.methods.HttpGet;
- import org.apache.http.client.methods.HttpUriRequest;
- import org.apache.http.message.BasicHeader;
- import org.apache.http.protocol.HttpContext;
- import org.apache.http.impl.client.CloseableHttpClient;
- import org.apache.http.impl.client.cache.CacheConfig;
- import org.apache.http.impl.client.cache.CachingHttpClients;
- import org.apache.http.impl.client.cache.HeapResource;
- import org.apache.http.impl.conn.PoolingHttpClientConnectionManager;
- import it.tidalwave.util.NotFoundException;
- import it.tidalwave.messagebus.MessageBus;
- import it.tidalwave.messagebus.annotation.ListensTo;
- import it.tidalwave.messagebus.annotation.SimpleMessageSubscriber;
- import it.tidalwave.bluemarine2.message.PowerOnNotification;
- import it.tidalwave.bluemarine2.downloader.DownloadComplete;
- import it.tidalwave.bluemarine2.downloader.DownloadComplete.Origin;
- import it.tidalwave.bluemarine2.downloader.DownloadRequest;
- import lombok.Cleanup;
- import lombok.extern.slf4j.Slf4j;
- import org.apache.http.HttpResponseInterceptor;
- import static it.tidalwave.bluemarine2.downloader.DownloaderPropertyNames.CACHE_FOLDER_PATH;
- /***********************************************************************************************************************
- *
- * @author Fabrizio Giudici
- *
- **********************************************************************************************************************/
- @SimpleMessageSubscriber @Slf4j
- public class DefaultDownloader
- {
- @Inject
- private MessageBus messageBus;
- @Inject
- private SimpleHttpCacheStorage cacheStorage;
- private PoolingHttpClientConnectionManager connectionManager;
- private CacheConfig cacheConfig;
- private CloseableHttpClient httpClient;
- private final HttpResponseInterceptor killCacheHeaders = (HttpResponse
- response, HttpContext context) ->
- {
- response.removeHeaders("Expires");
- response.removeHeaders("Pragma");
- response.removeHeaders("Cache-Control");
- response.addHeader("Expires", "Mon, 31 Dec 2099 00:00:00 GMT");
- };
- /*******************************************************************************************************************
- *
- *
- *
- ******************************************************************************************************************/
- // FIXME: this is because there's a fix, and we explicitly save stuff in the cache - see below
- private final RedirectStrategy dontFollowRedirect = new RedirectStrategy()
- {
- @Override
- public boolean isRedirected (HttpRequest request, HttpResponse response, HttpContext context)
- throws ProtocolException
- {
- return false;
- }
- @Override
- public HttpUriRequest getRedirect (HttpRequest request, HttpResponse response, HttpContext context)
- throws ProtocolException
- {
- return null;
- }
- };
- /*******************************************************************************************************************
- *
- *
- *
- ******************************************************************************************************************/
- @PostConstruct
- @VisibleForTesting void initialize()
- {
- connectionManager = new PoolingHttpClientConnectionManager();
- connectionManager.setMaxTotal(200);
- connectionManager.setDefaultMaxPerRoute(20);
- cacheConfig = CacheConfig.custom()
- .setAllow303Caching(true)
- .setMaxCacheEntries(Integer.MAX_VALUE)
- .setMaxObjectSize(Integer.MAX_VALUE)
- .setSharedCache(false)
- .setHeuristicCachingEnabled(true)
- .build();
- httpClient = CachingHttpClients.custom()
- .setHttpCacheStorage(cacheStorage)
- .setCacheConfig(cacheConfig)
- .setRedirectStrategy(dontFollowRedirect)
- .setUserAgent("blueMarine (fabrizio.giudici@tidalwave.it)")
- .setDefaultHeaders(List.of(new BasicHeader("Accept", "application/n3")))
- .setConnectionManager(connectionManager)
- .addInterceptorFirst(killCacheHeaders) // FIXME: only if explicitly configured
- .build();
- }
- /*******************************************************************************************************************
- *
- *
- *
- ******************************************************************************************************************/
- @VisibleForTesting void onPowerOnNotification (@ListensTo @Nonnull final PowerOnNotification notification)
- throws NotFoundException
- {
- log.info("onPowerOnNotification({})", notification);
- cacheStorage.setFolderPath(notification.getProperties().get(CACHE_FOLDER_PATH));
- }
- /*******************************************************************************************************************
- *
- *
- *
- ******************************************************************************************************************/
- @VisibleForTesting void onDownloadRequest (@ListensTo @Nonnull final DownloadRequest request)
- throws URISyntaxException
- {
- try
- {
- log.info("onDownloadRequest({})", request);
- URL url = request.getUrl();
- for (;;)
- {
- final HttpCacheContext context = HttpCacheContext.create();
- @Cleanup final CloseableHttpResponse response = httpClient.execute(new HttpGet(url.toURI()), context);
- final byte[] bytes = bytesFrom(response);
- final CacheResponseStatus cacheResponseStatus = context.getCacheResponseStatus();
- log.debug(">>>> cacheResponseStatus: {}", cacheResponseStatus);
- final Origin origin = cacheResponseStatus.equals(CacheResponseStatus.CACHE_HIT) ? Origin.CACHE
- : Origin.NETWORK;
- // FIXME: shouldn't do this by myself
- // FIXME: upon configuration, everything should be cached (needed for supporting integration tests)
- if (!origin.equals(Origin.CACHE) && List.of(200, 303).contains(response.getStatusLine().getStatusCode()))
- {
- final Date date = new Date();
- final Resource resource = new HeapResource(bytes);
- cacheStorage.putEntry(url.toExternalForm(),
- new HttpCacheEntry(date, date, response.getStatusLine(), response.getAllHeaders(), resource));
- }
- // FIXME: if the redirect were enabled, we could drop this check
- if (request.isOptionPresent(DownloadRequest.Option.FOLLOW_REDIRECT)
- && response.getStatusLine().getStatusCode() == 303) // SEE_OTHER FIXME
- {
- url = new URL(response.getFirstHeader("Location").getValue());
- log.info(">>>> following 'see also' to {} ...", url);
- }
- else
- {
- messageBus.publish(new DownloadComplete(request.getUrl(),
- response.getStatusLine().getStatusCode(),
- bytes,
- origin));
- return;
- }
- }
- }
- catch (IOException e)
- {
- log.error("{}: {}", request.getUrl(), e.toString());
- messageBus.publish(new DownloadComplete(request.getUrl(), -1, new byte[0], Origin.NETWORK));
- }
- }
- /*******************************************************************************************************************
- *
- *
- *
- ******************************************************************************************************************/
- @Nonnull
- private byte[] bytesFrom (@Nonnull final HttpResponse response)
- throws IOException
- {
- final ByteArrayOutputStream baos = new ByteArrayOutputStream();
- if (response.getEntity() != null)
- {
- response.getEntity().writeTo(baos);
- }
- return baos.toByteArray();
- }
- }