Skip to content

Commit

Permalink
Merge pull request #662 from FederatedAI/dev-2.5.3
Browse files Browse the repository at this point in the history
Dev 2.5.3
  • Loading branch information
forgivedengkai authored Nov 9, 2023
2 parents 4e6b1ed + d292b16 commit 7fef0e0
Show file tree
Hide file tree
Showing 25 changed files with 402 additions and 249 deletions.
2 changes: 1 addition & 1 deletion BUILD_INFO
Original file line number Diff line number Diff line change
@@ -1 +1 @@
eggroll.version=2.5.2
eggroll.version=2.5.3
3 changes: 3 additions & 0 deletions bin/gpu/nvidia.sh
Original file line number Diff line number Diff line change
@@ -0,0 +1,3 @@
result=$(nvidia-smi --query-gpu=name --format=csv, noheader|grep 'NVIDIA'|wc -l)
echo $result

Empty file added conf/node-extend-env.properties
Empty file.
3 changes: 2 additions & 1 deletion conf/whitelist.json
Original file line number Diff line number Diff line change
Expand Up @@ -27,7 +27,8 @@
"FeatureImportance"
],
"federatedml.ensemble.basic_algorithms.decision_tree.tree_core.g_h_optim": [
"SplitInfoPackage"
"SplitInfoPackage",
"SplitInfoPackage2"
],
"federatedml.ensemble.basic_algorithms.decision_tree.tree_core.node": [
"Node"
Expand Down
107 changes: 52 additions & 55 deletions jvm/core/main/java/com/webank/eggroll/core/env/SysInfoLinux.java
Original file line number Diff line number Diff line change
Expand Up @@ -2,11 +2,7 @@

package com.webank.eggroll.core.env;

import java.io.BufferedReader;
import java.io.FileInputStream;
import java.io.FileNotFoundException;
import java.io.InputStreamReader;
import java.io.IOException;
import java.io.*;
import java.math.BigInteger;
import java.nio.charset.Charset;
import java.util.ArrayList;
Expand All @@ -19,6 +15,8 @@
import com.google.common.annotations.VisibleForTesting;


import com.webank.eggroll.core.constant.ErConfKey;
import com.webank.eggroll.core.constant.NodeManagerConfKeys;
import org.apache.commons.lang3.StringUtils;
import org.slf4j.Logger;
import org.slf4j.LoggerFactory;
Expand Down Expand Up @@ -151,27 +149,62 @@ private static long getConf(String attr) {
}


public int getGpuNumberV2() throws IOException {
String gpus = null;
int result = 0;
try {
String[] cmd = new String[]{"/bin/sh", "-c", "nvidia-smi --query-gpu=name --format=csv, noheader"};
ShellCommandExecutor shellExecutorClk = new ShellCommandExecutor(cmd);
shellExecutorClk.execute();
String cmdReturnString = shellExecutorClk.getOutput();
if (StringUtils.isNotEmpty(cmdReturnString)) {
String[] elems = cmdReturnString.split("\n");
for(String e:elems){
if(e.contains("NVIDIA"))
result=result+1;
}
}
} catch (Exception ignore) {
}
return result;
}

public int getGpuNumber() throws IOException {
String gpus = null;
int result = 0;
try{

String[] cmd = new String[] { "/bin/sh", "-c", "nvidia-smi --query-gpu=name --format=csv, noheader" };
ShellCommandExecutor shellExecutorClk = new ShellCommandExecutor(cmd);
// name
// NVIDIA Tesla V100-SXM2-32GB
// NVIDIA Tesla V100-SXM2-32GB
// NVIDIA Tesla V100-SXM2-32GB
// NVIDIA Tesla V100-SXM2-32GB
shellExecutorClk.execute();
String cmdReturnString = shellExecutorClk.getOutput();
if (StringUtils.isNotEmpty(cmdReturnString))
result = cmdReturnString.split("\n").length-1;
}catch(Exception ignore){}
ErConfKey shellConfig = NodeManagerConfKeys.CONFKEY_NODE_MANAGER_GPU_NUM_SHELL();
String shell = shellConfig.get();
String eggrollHome = System.getenv("EGGROLL_HOME");
String path = eggrollHome+"/bin/gpu/"+shell;
File file = new File(path);
if(StringUtils.isNotEmpty(path)&&file.exists()) {
String[] cmd = new String[]{"/bin/sh", "-c", path};
ShellCommandExecutor shellExecutorClk = new ShellCommandExecutor(cmd);
shellExecutorClk.execute();
String cmdReturnString = shellExecutorClk.getOutput();
try {
cmdReturnString=cmdReturnString.replace("\n","");
cmdReturnString=cmdReturnString.replace("\r","");
result = new Integer(cmdReturnString);
} catch (Throwable e) {
e.printStackTrace();
}
System.err.println("get gpu num exec "+path +" return "+cmdReturnString +" result :"+result) ;
}else{
System.err.println("get gpu shell is not set");
}
}catch(Exception ignore){
ignore.printStackTrace();
}
if(result==0){
result = getGpuNumberV2();
}
return result;
}



public int getProcess(int pid) {


Expand Down Expand Up @@ -724,43 +757,7 @@ public long getStorageBytesWritten() {
return numDisksBytesWritten;
}

/**
* Test the {@link SysInfoLinux}.
*
* @param args - arguments to this calculator test
*/
public static void main(String[] args) {
SysInfoLinux plugin = new SysInfoLinux();
// System.out.println("Physical memory Size (bytes) : "
// + plugin.getPhysicalMemorySize());
// System.out.println("Total Virtual memory Size (bytes) : "
// + plugin.getVirtualMemorySize());
// System.out.println("Available Physical memory Size (bytes) : "
// + plugin.getAvailablePhysicalMemorySize());
// System.out.println("Total Available Virtual memory Size (bytes) : "
// + plugin.getAvailableVirtualMemorySize());
// System.out.println("Number of Processors : " + plugin.getNumProcessors());
// System.out.println("CPU frequency (kHz) : " + plugin.getCpuFrequency());
// System.out.println("Cumulative CPU time (ms) : " +
// plugin.getCumulativeCpuTime());
// System.out.println("Total network read (bytes) : "
// + plugin.getNetworkBytesRead());
// System.out.println("Total network written (bytes) : "
// + plugin.getNetworkBytesWritten());
// System.out.println("Total storage read (bytes) : "
// + plugin.getStorageBytesRead());
// System.out.println("Total storage written (bytes) : "
// + plugin.getStorageBytesWritten());
// try {
// // Sleep so we can compute the CPU usage
// Thread.sleep(500L);
// } catch (InterruptedException e) {
// // do nothing
// }
// System.out.println("CPU usage % : " + plugin.getCpuUsagePercentage());

plugin.getProcess(1000);
}


@VisibleForTesting
void setReadCpuInfoFile(boolean readCpuInfoFileValue) {
Expand Down
153 changes: 85 additions & 68 deletions jvm/core/main/java/com/webank/eggroll/core/util/NetUtils.java
Original file line number Diff line number Diff line change
Expand Up @@ -16,14 +16,15 @@

package com.webank.eggroll.core.util;

import com.google.common.collect.Lists;
import com.google.common.collect.Maps;
import org.apache.commons.lang3.StringUtils;
import org.slf4j.Logger;
import org.slf4j.LoggerFactory;

import java.io.IOException;
import java.net.*;
import java.util.Enumeration;
import java.util.Optional;
import java.util.*;
import java.util.concurrent.ThreadLocalRandom;
import java.util.regex.Pattern;

Expand Down Expand Up @@ -52,12 +53,7 @@ public class NetUtils {
private static volatile InetAddress LOCAL_ADDRESS = null;

public static void main(String[] args) {
// System.out.println(NetUtils.getLocalHost());
// System.out.println(NetUtils.getAvailablePort());
// System.out.println(NetUtils.getLocalAddress());
// System.out.println(NetUtils.getLocalIp());
// System.out.println(NetUtils.getIpByHost("127.0.0.1"));
// System.out.println(NetUtils.getLocalAddress0(""));
System.out.println(NetUtils.getLocalHost(""));
}

public static int getRandomPort() {
Expand Down Expand Up @@ -180,17 +176,19 @@ static InetAddress normalizeV6Address(Inet6Address address) {
return address;
}

public static String getLocalHost() {
InetAddress address = getLocalAddress();
return address == null ? LOCALHOST_VALUE : address.getHostAddress();
public static String getLocalHost(String deviceName) {
String result = "";
InetAddress address = getLocalAddress(deviceName);
result= address == null ? LOCALHOST_VALUE : address.getHostAddress();
return result;
}


public static InetAddress getLocalAddress() {
public static InetAddress getLocalAddress(String deviceName) {
if (LOCAL_ADDRESS != null) {
return LOCAL_ADDRESS;
}
InetAddress localAddress = getLocalAddress0("");
InetAddress localAddress = getLocalAddress0(deviceName);
LOCAL_ADDRESS = localAddress;
return localAddress;
}
Expand All @@ -209,26 +207,26 @@ private static Optional<InetAddress> toValidAddress(InetAddress address) {
}


public static String getLocalIp() {

try {
InetAddress inetAddress = getLocalAddress0("eth0");
if (inetAddress != null) {
return inetAddress.getHostAddress();
} else {
inetAddress = getLocalAddress0("");
}
if (inetAddress != null) {
return inetAddress.getHostAddress();
} else {
throw new RuntimeException("can not get local ip");
}

} catch (Throwable e) {
logger.error(e.getMessage(), e);
}
return "";
}
// public static String getLocalIp() {
//
// try {
// InetAddress inetAddress = getLocalAddress0("eth0");
// if (inetAddress != null) {
// return inetAddress.getHostAddress();
// } else {
// inetAddress = getLocalAddress0("");
// }
// if (inetAddress != null) {
// return inetAddress.getHostAddress();
// } else {
// throw new RuntimeException("can not get local ip");
// }
//
// } catch (Throwable e) {
// logger.error(e.getMessage(), e);
// }
// return "";
// }

private static String getIpByEthNum(String ethNum) {
try {
Expand Down Expand Up @@ -258,62 +256,80 @@ public static String getOsName() {
String osName = System.getProperty("os.name");
return osName;
}
private static InetAddress chooseAddressFromInterface(NetworkInterface network){
Enumeration<InetAddress> addresses = network.getInetAddresses();
while (addresses.hasMoreElements()) {
try {
Optional<InetAddress> addressOp = toValidAddress(addresses.nextElement());
if (addressOp.isPresent()) {
try {
if (addressOp.get().isReachable(10000)) {
return addressOp.get();
}
} catch (IOException e) {
// ignore
}
}
} catch (Throwable e) {
logger.warn(e.getMessage());
}
}
return null;
}


private static InetAddress getLocalAddress0(String name) {
InetAddress localAddress = null;
InetAddress other = null;
try {
localAddress = InetAddress.getLocalHost();
Optional<InetAddress> addressOp = toValidAddress(localAddress);
if (addressOp.isPresent()) {
return addressOp.get();
// return addressOp.get();
} else {
localAddress = null;
}
} catch (Throwable e) {
logger.warn(e.getMessage());
e.printStackTrace();
}
// if(StringUtils.isNotEmpty(name)||localAddress.getHostAddress().equals(LOCALHOST_VALUE)) {

try {
Enumeration<NetworkInterface> interfaces = NetworkInterface.getNetworkInterfaces();
if (null == interfaces) {
return localAddress;
}
while (interfaces.hasMoreElements()) {
try {
try {
Enumeration<NetworkInterface> interfaces = NetworkInterface.getNetworkInterfaces();
if (null == interfaces) {
return localAddress;
}
Map<String,NetworkInterface> networkIMap= Maps.newLinkedHashMap();
while (interfaces.hasMoreElements()){
NetworkInterface network = interfaces.nextElement();
if (network.isLoopback() || network.isVirtual() || !network.isUp()) {
continue;
}
if (StringUtils.isNotEmpty(name)) {
if (!network.getName().equals(name)) {
networkIMap.put(network.getName(),network);
}
if (StringUtils.isNotEmpty(name)&&networkIMap.get(name)!=null) {
return chooseAddressFromInterface(networkIMap.get(name));
}

List<String> names = Lists.newArrayList(networkIMap.keySet());
for(Map.Entry<String,NetworkInterface> entry:networkIMap.entrySet()){
try {
if (entry.getValue().isLoopback() || entry.getValue().isVirtual() || !entry.getValue().isUp()) {
continue;
}
}
Enumeration<InetAddress> addresses = network.getInetAddresses();
while (addresses.hasMoreElements()) {
try {
Optional<InetAddress> addressOp = toValidAddress(addresses.nextElement());
if (addressOp.isPresent()) {
try {
if (addressOp.get().isReachable(10000)) {
return addressOp.get();
}
} catch (IOException e) {
// ignore
}
}
} catch (Throwable e) {
logger.warn(e.getMessage());
other= chooseAddressFromInterface(entry.getValue());
if(other!=null){
break;
}
}catch (Throwable e){

}
} catch (Throwable e) {
logger.warn(e.getMessage());
}

} catch (Throwable e) {
logger.warn(e.getMessage());
}
} catch (Throwable e) {
logger.warn(e.getMessage());
}
if(localAddress==null||localAddress.getHostAddress().equals(LOCALHOST_VALUE)&&other!=null){
localAddress=other;
}

return localAddress;
}

Expand Down Expand Up @@ -531,4 +547,5 @@ private static Integer getNumOfIpSegment(String ipSegment, boolean isIpv4) {
return Integer.parseInt(ipSegment, 16);
}


}
Loading

0 comments on commit 7fef0e0

Please sign in to comment.