caffe2
caffe2 copied to clipboard
My changes for Windows
These are my changes for building on Windows (I have Windows 10 and Visaul studio 2015) Strange formating, it is only diff log. I don't know how attach the file
diff --git a/caffe2/CMakeLists.txt b/caffe2/CMakeLists.txt
index 3a7688c7..1469300e 100644
--- a/caffe2/CMakeLists.txt
+++ b/caffe2/CMakeLists.txt
@@ -218,6 +218,9 @@ if (BUILD_PYTHON)
elseif (MSVC)
set_target_properties(caffe2_pybind11_state_gpu PROPERTIES SUFFIX ".pyd")
endif()
+ if (MSVC)
+ set(CMAKE_MODULE_LINKER_FLAGS "${CMAKE_MODULE_LINKER_FLAGS} /FORCE:MULTIPLE")
+ endif()
set_target_properties(
caffe2_pybind11_state_gpu PROPERTIES LIBRARY_OUTPUT_DIRECTORY
${CMAKE_BINARY_DIR}/caffe2/python)
diff --git a/caffe2/binaries/CMakeLists.txt b/caffe2/binaries/CMakeLists.txt
index 3ec61dbd..b9170d92 100644
--- a/caffe2/binaries/CMakeLists.txt
+++ b/caffe2/binaries/CMakeLists.txt
@@ -1,3 +1,7 @@
+if(MSVC)
+ set(CMAKE_EXE_LINKER_FLAGS "${CMAKE_EXE_LINKER_FLAGS} /FORCE:MULTIPLE")
+ENDIF()
+
caffe2_binary_target("convert_caffe_image_db.cc")
caffe2_binary_target("convert_db.cc")
caffe2_binary_target("db_throughput.cc")
diff --git a/caffe2/binaries/make_image_db.cc b/caffe2/binaries/make_image_db.cc
index 2bdbb53d..7e8a8a34 100644
--- a/caffe2/binaries/make_image_db.cc
+++ b/caffe2/binaries/make_image_db.cc
@@ -127,7 +127,7 @@ class Converter {
// Add raw file contents to DB if !raw
if (!caffe2::FLAGS_raw) {
- std::ifstream image_file_stream(input_folder + pair.first);
+ std::ifstream image_file_stream(input_folder + pair.first, std::ios::in | std::ios::binary);
if (!image_file_stream) {
LOG(ERROR) << "Cannot open " << input_folder << pair.first
<< ". Skipping.";
diff --git a/caffe2/core/blob_serialization.h b/caffe2/core/blob_serialization.h
index 66a1284d..48a36427 100644
--- a/caffe2/core/blob_serialization.h
+++ b/caffe2/core/blob_serialization.h
@@ -584,7 +584,7 @@ void TensorDeserializer<Context>::Deserialize(
for (int i = 0; i < chunkSize; ++i) {
temp_blob.Deserialize(proto.string_data(i));
if (i == 0) {
- raw_ptr = tensor->template raw_mutable_data(temp_blob.meta());
+ raw_ptr = tensor->raw_mutable_data(temp_blob.meta());
}
temp_blob.meta().copy()(
temp_blob.GetRaw(),
diff --git a/caffe2/core/operator_schema.cc b/caffe2/core/operator_schema.cc
index f6723cb6..77e10caf 100644
--- a/caffe2/core/operator_schema.cc
+++ b/caffe2/core/operator_schema.cc
@@ -248,7 +248,7 @@ OpSchema& OpSchema::ScalarType(::caffe2::TensorProto_DataType dt) {
}
OpSchema& OpSchema::CostInferenceFunction(
- CostInferenceFunctionType&& function) {
+ CostInferenceFunctionType function) {
cost_inference_function_ =
caffe2::make_unique<CostInferenceFunctionType>(function);
return *this;
diff --git a/caffe2/core/operator_schema.h b/caffe2/core/operator_schema.h
index 29c2ca0b..78796e3c 100644
--- a/caffe2/core/operator_schema.h
+++ b/caffe2/core/operator_schema.h
@@ -199,7 +199,7 @@ class OpSchema {
/**
* @brief Register the Cost inference function.
*/
- OpSchema& CostInferenceFunction(CostInferenceFunctionType&& function);
+ OpSchema& CostInferenceFunction(CostInferenceFunctionType function);
bool HasCostInferenceFunction() const {
return !!cost_inference_function_;
}
diff --git a/caffe2/image/image_input_op.h b/caffe2/image/image_input_op.h
index ac295c68..a38aec1d 100644
--- a/caffe2/image/image_input_op.h
+++ b/caffe2/image/image_input_op.h
@@ -256,13 +256,13 @@ ImageInputOp<Context>::ImageInputOp(
// hard-coded PCA eigenvectors and eigenvalues, based on RBG channel order
color_lighting_eigvecs_.push_back(
- std::vector<float>{-144.7125, 183.396, 102.2295});
+ std::vector<float>{-144.7125f, 183.396f, 102.2295f});
color_lighting_eigvecs_.push_back(
- std::vector<float>{-148.104, -1.1475, -207.57});
+ std::vector<float>{-148.104f, -1.1475f, -207.57f});
color_lighting_eigvecs_.push_back(
- std::vector<float>{-148.818, -177.174, 107.1765});
+ std::vector<float>{-148.818f, -177.174f, 107.1765f});
- color_lighting_eigvals_ = std::vector<float>{0.2175, 0.0188, 0.0045};
+ color_lighting_eigvals_ = std::vector<float>{0.2175f, 0.0188f, 0.0045f};
CAFFE_ENFORCE_GT(batch_size_, 0, "Batch size should be nonnegative.");
if (use_caffe_datum_) {
diff --git a/caffe2/operators/conv_op_cudnn.cc b/caffe2/operators/conv_op_cudnn.cc
index e8996415..775789a8 100644
--- a/caffe2/operators/conv_op_cudnn.cc
+++ b/caffe2/operators/conv_op_cudnn.cc
@@ -149,22 +149,32 @@ class CudnnConvOpBase : public ConvPoolOpBase<CUDAContext> {
switch (order_) {
case StorageOrder::NHWC:
if (size == 4) {
+#if (CUDNN_VERSION_MIN(7,0,0))
CUDNN_ENFORCE(cudnnSetTensor4dDescriptorEx(
desc_,
cudnnTypeWrapper<T>::type,
N,
-#if CUDNN_VERSION_MIN(7,0,0)
C,
-#else
- C / group_,
-#endif
H,
W,
H * W * C,
1,
W * C,
C));
- } else {
+#else
+ CUDNN_ENFORCE(cudnnSetTensor4dDescriptorEx(
+ desc_,
+ cudnnTypeWrapper<T>::type,
+ N,
+ C / group_,
+ H,
+ W,
+ H * W * C,
+ 1,
+ W * C,
+ C));
+#endif
+ } else {
#if !CUDNN_VERSION_MIN(7,0,0)
C = C / group_;
#endif
@@ -180,22 +190,32 @@ class CudnnConvOpBase : public ConvPoolOpBase<CUDAContext> {
break;
case StorageOrder::NCHW:
if (size == 4) {
+#if CUDNN_VERSION_MIN(7,0,0)
CUDNN_ENFORCE(cudnnSetTensor4dDescriptorEx(
desc_,
cudnnTypeWrapper<T>::type,
N,
-#if CUDNN_VERSION_MIN(7,0,0)
C,
-#else
- C / group_,
-#endif
H,
W,
C * H * W,
H * W,
W,
1));
- } else {
+#else
+ CUDNN_ENFORCE(cudnnSetTensor4dDescriptorEx(
+ desc_,
+ cudnnTypeWrapper<T>::type,
+ N,
+ C / group_,
+ H,
+ W,
+ C * H * W,
+ H * W,
+ W,
+ 1));
+#endif
+ } else {
#if !CUDNN_VERSION_MIN(7,0,0)
C = C / group_;
#endif
@@ -365,19 +385,26 @@ bool CudnnConvOp::DoRunWithType() {
if (filter_changed) {
cudnn_filter_dims_ = filter.dims();
if (kernel_.size() == 2) {
- CUDNN_ENFORCE(cudnnSetFilter4dDescriptor(
+#if CUDNN_VERSION_MIN(7,0,0)
+ CUDNN_ENFORCE(cudnnSetFilter4dDescriptor(
filter_desc_,
cudnnTypeWrapper<T_W>::type,
GetCudnnTensorFormat(order_),
-#if CUDNN_VERSION_MIN(7,0,0)
M,
-#else
- M / group_,
-#endif
C / group_,
kernel_h(),
kernel_w()));
- } else {
+#else
+ CUDNN_ENFORCE(cudnnSetFilter4dDescriptor(
+ filter_desc_,
+ cudnnTypeWrapper<T_W>::type,
+ GetCudnnTensorFormat(order_),
+ M / group_,
+ C / group_,
+ kernel_h(),
+ kernel_w()));
+#endif
+ } else {
vector<int> dims(filter.dims().begin(), filter.dims().end());
dims[0] /= group_;
#if !CUDNN_VERSION_MIN(7,0,0)
@@ -729,19 +756,26 @@ bool CudnnConvGradientOp::DoRunWithType() {
if (filter_changed) {
cudnn_filter_dims_ = filter.dims();
if (kernel_.size() == 2) {
+#if CUDNN_VERSION_MIN(7,0,0)
CUDNN_ENFORCE(cudnnSetFilter4dDescriptor(
filter_desc_,
cudnnTypeWrapper<T_W>::type,
GetCudnnTensorFormat(order_),
-#if CUDNN_VERSION_MIN(7,0,0)
M,
-#else
- M / group_,
-#endif
C / group_,
kernel_h(),
kernel_w()));
- } else {
+#else
+ CUDNN_ENFORCE(cudnnSetFilter4dDescriptor(
+ filter_desc_,
+ cudnnTypeWrapper<T_W>::type,
+ GetCudnnTensorFormat(order_),
+ M / group_,
+ C / group_,
+ kernel_h(),
+ kernel_w()));
+#endif
+ } else {
vector<int> dims(filter.dims().begin(), filter.dims().end());
#if !CUDNN_VERSION_MIN(7,0,0)
dims[0] /= group_;
diff --git a/caffe2/operators/layer_norm_op.cu b/caffe2/operators/layer_norm_op.cu
index 23ced9c8..84bf0161 100644
--- a/caffe2/operators/layer_norm_op.cu
+++ b/caffe2/operators/layer_norm_op.cu
@@ -277,7 +277,7 @@ bool LayerNormGradientOp<CUDAContext>::DoRunWithType<float>() {
stats_dims.push_back(1);
dmean_.Resize(stats_dims);
dstdev_.Resize(stats_dims);
- gscratch_.Resize(std::vector<size_t>{left, right});
+ gscratch_.Resize(std::vector<TIndex>{left, right});
std::vector<int> segs(left + 1);
std::iota(segs.begin(), segs.end(), 0);
@@ -307,7 +307,7 @@ bool LayerNormGradientOp<CUDAContext>::DoRunWithType<float>() {
dout.data<float>(),
gscratch_.mutable_data<float>());
- dstdev_.Resize(vector<size_t>{left, 1});
+ dstdev_.Resize(vector<TIndex>{left, 1});
// dstdev = reduce(temp1)
allocScratchAndReduce(
gscratch_.data<float>(),
diff --git a/caffe2/operators/recurrent_op_cudnn.cc b/caffe2/operators/recurrent_op_cudnn.cc
index 21b3ed3d..c9a19f11 100644
--- a/caffe2/operators/recurrent_op_cudnn.cc
+++ b/caffe2/operators/recurrent_op_cudnn.cc
@@ -131,10 +131,9 @@ void RecurrentBaseOp<T>::initialize(
// RNN setup
{
- CUDNN_ENFORCE(cudnnSetRNNDescriptor(
#if CUDNN_MAJOR >= 7
+ CUDNN_ENFORCE(cudnnSetRNNDescriptor(
cudnn_wrapper_.inline_cudnn_handle(),
-#endif
rnnDesc_,
hiddenSize,
numLayers,
@@ -142,10 +141,19 @@ void RecurrentBaseOp<T>::initialize(
rnnInput,
rnnDirection,
rnnMode,
-#if CUDNN_MAJOR >= 7
CUDNN_RNN_ALGO_STANDARD, // TODO: verify correctness / efficiency.
-#endif
cudnnTypeWrapper<T>::type));
+#else
+ CUDNN_ENFORCE(cudnnSetRNNDescriptor(
+ rnnDesc_,
+ hiddenSize,
+ numLayers,
+ dropoutDesc_,
+ rnnInput,
+ rnnDirection,
+ rnnMode,
+ cudnnTypeWrapper<T>::type));
+#endif
}
// X setup
{
diff --git a/caffe2/utils/GpuBitonicSort.cuh b/caffe2/utils/GpuBitonicSort.cuh
index f52bb508..668d9d80 100644
--- a/caffe2/utils/GpuBitonicSort.cuh
+++ b/caffe2/utils/GpuBitonicSort.cuh
@@ -28,6 +28,8 @@ __device__ inline void bitonicSwap(K& kA, V& vA,
}
};
+#define INTEGER_IS_POWER_OF_2(v) (v && !(v & (v - 1)))
+
template <typename Comparator, typename K, typename V,
int Power2SortSize,
int ThreadsPerBlock>
@@ -39,9 +41,9 @@ __device__ inline void bitonicSort(K* keys,
// Assume the sort is taking place in shared memory
// static_assert(Power2SortSize * (sizeof(K) + sizeof(V)) < 32768,
// "sort data too large (>32768 bytes)");
- static_assert(math::integerIsPowerOf2(Power2SortSize),
+ static_assert(/*math::integerIsPowerOf2*/INTEGER_IS_POWER_OF_2(Power2SortSize),
"sort size must be power of 2");
- static_assert(math::integerIsPowerOf2(ThreadsPerBlock),
+ static_assert(/*math::integerIsPowerOf2*/INTEGER_IS_POWER_OF_2(ThreadsPerBlock),
"threads in block must be power of 2");
// If what we are sorting is too small, then not all threads
@@ -107,7 +109,7 @@ __device__ inline void warpBitonicSort(K* keys,
// Smaller sorts should use a warp shuffle sort
static_assert(Power2SortSize > kWarpSize,
"sort not large enough");
- static_assert(math::integerIsPowerOf2(Power2SortSize),
+ static_assert(/*math::integerIsPowerOf2*/INTEGER_IS_POWER_OF_2(Power2SortSize),
"sort size must be power of 2");
static_assert(Power2SortSize <= kMaxBitonicSortSize,
"sort size <= 4096 only supported");
diff --git a/caffe2/utils/proto_utils.cc b/caffe2/utils/proto_utils.cc
index 65d53676..5d08c3cb 100644
--- a/caffe2/utils/proto_utils.cc
+++ b/caffe2/utils/proto_utils.cc
@@ -183,7 +183,7 @@ bool ReadProtoFromBinaryFile(const char* filename, MessageLite* proto) {
}
void WriteProtoToBinaryFile(const MessageLite& proto, const char* filename) {
- int fd = open(filename, O_WRONLY | O_CREAT | O_TRUNC, 0644);
+ int fd = open(filename, O_WRONLY | O_CREAT | O_TRUNC | O_BINARY, 0644);
CAFFE_ENFORCE_NE(
fd, -1, "File cannot be created: ", filename, " error number: ", errno);
std::unique_ptr<ZeroCopyOutputStream> raw_output(new FileOutputStream(fd));
diff --git a/cmake/Cuda.cmake b/cmake/Cuda.cmake
index 2425375e..54605ef1 100644
--- a/cmake/Cuda.cmake
+++ b/cmake/Cuda.cmake
@@ -6,7 +6,7 @@
# Default is set to cuda 9. If we detect the cuda architectores to be less than
# 9, we will lower it to the corresponding known archs.
set(Caffe2_known_gpu_archs "30 35 50 52 60 61 70") # for CUDA 9.x
-set(Caffe2_known_gpu_archs8 "20 21(20) 30 35 50 52 60 61") # for CUDA 8.x
+set(Caffe2_known_gpu_archs8 "20 21(20) 30 35 50 52 60 61 62") # for CUDA 8.x
set(Caffe2_known_gpu_archs7 "20 21(20) 30 35 50 52") # for CUDA 7.x
diff --git a/cmake/Dependencies.cmake b/cmake/Dependencies.cmake
index b18288d1..da53e3d4 100644
--- a/cmake/Dependencies.cmake
+++ b/cmake/Dependencies.cmake
@@ -145,7 +145,7 @@ if(USE_LMDB)
find_package(LMDB)
if(LMDB_FOUND)
caffe2_include_directories(${LMDB_INCLUDE_DIR})
- list(APPEND Caffe2_DEPENDENCY_LIBS ${LMDB_LIBRARIES})
+ list(APPEND Caffe2_DEPENDENCY_LIBS "ntdll.lib" ${LMDB_LIBRARIES})
else()
message(WARNING "Not compiling with LMDB. Suppress this warning with -DUSE_LMDB=OFF")
set(USE_LMDB OFF)
@@ -213,6 +213,7 @@ if(USE_OPENCV)
# OpenCV 2
find_package(OpenCV QUIET COMPONENTS core highgui imgproc)
endif()
+ set(OpenCV_FOUND TRUE)
if(OpenCV_FOUND)
caffe2_include_directories(${OpenCV_INCLUDE_DIRS})
list(APPEND Caffe2_DEPENDENCY_LIBS ${OpenCV_LIBS})
@@ -356,9 +357,9 @@ if(USE_NCCL)
if(NOT USE_CUDA)
message(WARNING "If not using cuda, one should not use NCCL either.")
set(USE_NCCL OFF)
- elseif(NOT ${CMAKE_SYSTEM_NAME} STREQUAL "Linux")
- message(WARNING "NCCL is currently only supported under Linux.")
- set(USE_NCCL OFF)
+ #elseif(NOT ${CMAKE_SYSTEM_NAME} STREQUAL "Linux")
+ # message(WARNING "NCCL is currently only supported under Linux.")
+ # set(USE_NCCL OFF)
else()
include("cmake/External/nccl.cmake")
caffe2_include_directories(${NCCL_INCLUDE_DIRS})
diff --git a/cmake/Utils.cmake b/cmake/Utils.cmake
index b804d9a0..11d89c09 100644
--- a/cmake/Utils.cmake
+++ b/cmake/Utils.cmake
@@ -245,6 +245,9 @@ function(caffe2_binary_target target_name_or_src)
add_executable(${__target} ${__srcs})
add_dependencies(${__target} ${Caffe2_MAIN_LIBS_ORDER})
target_link_libraries(${__target} ${Caffe2_MAIN_LIBS} ${Caffe2_DEPENDENCY_LIBS})
+ if (USE_CUDA)
+ target_link_libraries(${__target} ${Caffe2_CUDA_DEPENDENCY_LIBS})
+ endif()
install(TARGETS ${__target} DESTINATION bin)
endfunction()
Your post is badly formatted that I can't figure out what is changed.
(I updated your comment a bit - you can mark it as a code section.)
I assume that the current head is passing Windows building - if not please let us know for sure :) And feel free to send a pull request.