DefaultDownloader.java
/*
* *********************************************************************************************************************
*
* blueMarine II: Semantic Media Centre
* http://tidalwave.it/projects/bluemarine2
*
* Copyright (C) 2015 - 2021 by Tidalwave s.a.s. (http://tidalwave.it)
*
* *********************************************************************************************************************
*
* Licensed under the Apache License, Version 2.0 (the "License"); you may not use this file except in compliance with
* the License. You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software distributed under the License is distributed on
* an "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the License for the
* specific language governing permissions and limitations under the License.
*
* *********************************************************************************************************************
*
* git clone https://bitbucket.org/tidalwave/bluemarine2-src
* git clone https://github.com/tidalwave-it/bluemarine2-src
*
* *********************************************************************************************************************
*/
package it.tidalwave.bluemarine2.downloader.impl;
import javax.annotation.Nonnull;
import javax.annotation.PostConstruct;
import javax.inject.Inject;
import java.util.List;
import java.util.Date;
import java.io.ByteArrayOutputStream;
import java.io.IOException;
import java.net.URISyntaxException;
import java.net.URL;
import it.tidalwave.util.annotation.VisibleForTesting;
import org.apache.http.HttpRequest;
import org.apache.http.HttpResponse;
import org.apache.http.ProtocolException;
import org.apache.http.client.RedirectStrategy;
import org.apache.http.client.cache.CacheResponseStatus;
import org.apache.http.client.cache.HttpCacheContext;
import org.apache.http.client.cache.HttpCacheEntry;
import org.apache.http.client.cache.Resource;
import org.apache.http.client.methods.CloseableHttpResponse;
import org.apache.http.client.methods.HttpGet;
import org.apache.http.client.methods.HttpUriRequest;
import org.apache.http.message.BasicHeader;
import org.apache.http.protocol.HttpContext;
import org.apache.http.impl.client.CloseableHttpClient;
import org.apache.http.impl.client.cache.CacheConfig;
import org.apache.http.impl.client.cache.CachingHttpClients;
import org.apache.http.impl.client.cache.HeapResource;
import org.apache.http.impl.conn.PoolingHttpClientConnectionManager;
import it.tidalwave.util.NotFoundException;
import it.tidalwave.messagebus.MessageBus;
import it.tidalwave.messagebus.annotation.ListensTo;
import it.tidalwave.messagebus.annotation.SimpleMessageSubscriber;
import it.tidalwave.bluemarine2.message.PowerOnNotification;
import it.tidalwave.bluemarine2.downloader.DownloadComplete;
import it.tidalwave.bluemarine2.downloader.DownloadComplete.Origin;
import it.tidalwave.bluemarine2.downloader.DownloadRequest;
import lombok.Cleanup;
import lombok.extern.slf4j.Slf4j;
import org.apache.http.HttpResponseInterceptor;
import static it.tidalwave.bluemarine2.downloader.DownloaderPropertyNames.CACHE_FOLDER_PATH;
/***********************************************************************************************************************
*
* @author Fabrizio Giudici
*
**********************************************************************************************************************/
@SimpleMessageSubscriber @Slf4j
public class DefaultDownloader
{
@Inject
private MessageBus messageBus;
@Inject
private SimpleHttpCacheStorage cacheStorage;
private PoolingHttpClientConnectionManager connectionManager;
private CacheConfig cacheConfig;
private CloseableHttpClient httpClient;
private final HttpResponseInterceptor killCacheHeaders = (HttpResponse
response, HttpContext context) ->
{
response.removeHeaders("Expires");
response.removeHeaders("Pragma");
response.removeHeaders("Cache-Control");
response.addHeader("Expires", "Mon, 31 Dec 2099 00:00:00 GMT");
};
/*******************************************************************************************************************
*
*
*
******************************************************************************************************************/
// FIXME: this is because there's a fix, and we explicitly save stuff in the cache - see below
private final RedirectStrategy dontFollowRedirect = new RedirectStrategy()
{
@Override
public boolean isRedirected (HttpRequest request, HttpResponse response, HttpContext context)
throws ProtocolException
{
return false;
}
@Override
public HttpUriRequest getRedirect (HttpRequest request, HttpResponse response, HttpContext context)
throws ProtocolException
{
return null;
}
};
/*******************************************************************************************************************
*
*
*
******************************************************************************************************************/
@PostConstruct
@VisibleForTesting void initialize()
{
connectionManager = new PoolingHttpClientConnectionManager();
connectionManager.setMaxTotal(200);
connectionManager.setDefaultMaxPerRoute(20);
cacheConfig = CacheConfig.custom()
.setAllow303Caching(true)
.setMaxCacheEntries(Integer.MAX_VALUE)
.setMaxObjectSize(Integer.MAX_VALUE)
.setSharedCache(false)
.setHeuristicCachingEnabled(true)
.build();
httpClient = CachingHttpClients.custom()
.setHttpCacheStorage(cacheStorage)
.setCacheConfig(cacheConfig)
.setRedirectStrategy(dontFollowRedirect)
.setUserAgent("blueMarine (fabrizio.giudici@tidalwave.it)")
.setDefaultHeaders(List.of(new BasicHeader("Accept", "application/n3")))
.setConnectionManager(connectionManager)
.addInterceptorFirst(killCacheHeaders) // FIXME: only if explicitly configured
.build();
}
/*******************************************************************************************************************
*
*
*
******************************************************************************************************************/
@VisibleForTesting void onPowerOnNotification (@ListensTo @Nonnull final PowerOnNotification notification)
throws NotFoundException
{
log.info("onPowerOnNotification({})", notification);
cacheStorage.setFolderPath(notification.getProperties().get(CACHE_FOLDER_PATH));
}
/*******************************************************************************************************************
*
*
*
******************************************************************************************************************/
@VisibleForTesting void onDownloadRequest (@ListensTo @Nonnull final DownloadRequest request)
throws URISyntaxException
{
try
{
log.info("onDownloadRequest({})", request);
URL url = request.getUrl();
for (;;)
{
final HttpCacheContext context = HttpCacheContext.create();
@Cleanup final CloseableHttpResponse response = httpClient.execute(new HttpGet(url.toURI()), context);
final byte[] bytes = bytesFrom(response);
final CacheResponseStatus cacheResponseStatus = context.getCacheResponseStatus();
log.debug(">>>> cacheResponseStatus: {}", cacheResponseStatus);
final Origin origin = cacheResponseStatus.equals(CacheResponseStatus.CACHE_HIT) ? Origin.CACHE
: Origin.NETWORK;
// FIXME: shouldn't do this by myself
// FIXME: upon configuration, everything should be cached (needed for supporting integration tests)
if (!origin.equals(Origin.CACHE) && List.of(200, 303).contains(response.getStatusLine().getStatusCode()))
{
final Date date = new Date();
final Resource resource = new HeapResource(bytes);
cacheStorage.putEntry(url.toExternalForm(),
new HttpCacheEntry(date, date, response.getStatusLine(), response.getAllHeaders(), resource));
}
// FIXME: if the redirect were enabled, we could drop this check
if (request.isOptionPresent(DownloadRequest.Option.FOLLOW_REDIRECT)
&& response.getStatusLine().getStatusCode() == 303) // SEE_OTHER FIXME
{
url = new URL(response.getFirstHeader("Location").getValue());
log.info(">>>> following 'see also' to {} ...", url);
}
else
{
messageBus.publish(new DownloadComplete(request.getUrl(),
response.getStatusLine().getStatusCode(),
bytes,
origin));
return;
}
}
}
catch (IOException e)
{
log.error("{}: {}", request.getUrl(), e.toString());
messageBus.publish(new DownloadComplete(request.getUrl(), -1, new byte[0], Origin.NETWORK));
}
}
/*******************************************************************************************************************
*
*
*
******************************************************************************************************************/
@Nonnull
private byte[] bytesFrom (@Nonnull final HttpResponse response)
throws IOException
{
final ByteArrayOutputStream baos = new ByteArrayOutputStream();
if (response.getEntity() != null)
{
response.getEntity().writeTo(baos);
}
return baos.toByteArray();
}
}