2023-01-13 11:26:39 +08:00
package service
import (
"context"
2023-05-18 11:01:34 +08:00
"encoding/base64"
2023-01-13 11:26:39 +08:00
"fmt"
"git.hpds.cc/Component/logging"
"hpds-iot-web/config"
"hpds-iot-web/internal/proto"
"hpds-iot-web/model"
2023-05-18 11:01:34 +08:00
"hpds-iot-web/pkg/utils"
2023-05-14 18:23:12 +08:00
"math"
"math/rand"
2023-01-13 11:26:39 +08:00
"net/http"
2023-05-18 11:01:34 +08:00
"path"
2023-01-13 11:26:39 +08:00
"time"
"xorm.io/xorm"
)
type DatasetService interface {
GetOwnerProjectList ( ctx context . Context , req proto . OwnerProjectRequest ) ( rsp * proto . BaseResponse , err error )
DatasetList ( ctx context . Context , req proto . DatasetRequest ) ( rsp * proto . BaseResponse , err error )
ImportDataset ( ctx context . Context , req proto . ImportDatasetRequest ) ( rsp * proto . BaseResponse , err error )
2023-04-24 15:21:17 +08:00
DatasetInfo ( ctx context . Context , req proto . DatasetItemRequest ) ( rsp * proto . BaseResponse , err error )
2023-05-14 18:23:12 +08:00
2023-05-18 11:01:34 +08:00
CreateTrainDataset ( ctx context . Context , req proto . TrainDatasetRequest ) ( rsp * proto . BaseResponse , err error )
TrainDatasetList ( ctx context . Context , req proto . TrainDatasetItemRequest ) ( rsp * proto . BaseResponse , err error )
TrainDatasetFileList ( ctx context . Context , req proto . TrainDatasetItemRequest ) ( rsp * proto . BaseResponse , err error )
2023-01-13 11:26:39 +08:00
}
func NewDatasetService ( cfg * config . WebConfig , engine * xorm . Engine , logger * logging . Logger ) DatasetService {
return & repo {
AppConfig : cfg ,
engine : engine ,
logger : logger ,
}
}
func ( rp * repo ) GetOwnerProjectList ( ctx context . Context , req proto . OwnerProjectRequest ) ( rsp * proto . BaseResponse , err error ) {
rsp = new ( proto . BaseResponse )
select {
case <- ctx . Done ( ) :
err = fmt . Errorf ( "超时/取消" )
rsp . Code = http . StatusInternalServerError
rsp . Status = http . StatusText ( http . StatusInternalServerError )
rsp . Message = "超时/取消"
rsp . Err = ctx . Err ( )
return rsp , ctx . Err ( )
default :
ownerList := make ( [ ] model . Owner , 0 )
err := rp . engine . Where ( "(? = '' or owner_name like ?)" , req . Key , "%" + req . Key + "%" ) .
And ( "status = 1" ) . Find ( & ownerList )
if err != nil {
goto ReturnPoint
}
data := make ( [ ] proto . OwnerProjectItem , len ( ownerList ) )
for k , v := range ownerList {
projectList := make ( [ ] proto . ProjectItem , 0 )
err = rp . engine . Table ( "project" ) . Cols ( "concat('" + fmt . Sprintf ( "%d" , v . OwnerId ) + "-', project_id) as project_id" , "project_name" ) .
Where ( "owner_id = ?" , v . OwnerId ) . And ( "status = 1" ) . Find ( & projectList )
if err != nil {
goto ReturnPoint
}
data [ k ] = proto . OwnerProjectItem {
OwnerId : v . OwnerId ,
OwnerName : fmt . Sprintf ( "%s[%s]" , v . OwnerName , v . ChargeUser ) ,
ProjectList : projectList ,
}
}
rsp . Code = http . StatusOK
rsp . Status = http . StatusText ( http . StatusOK )
rsp . Message = "成功"
rsp . Data = data
rsp . Err = err
return rsp , err
}
ReturnPoint :
if err != nil {
rsp . Code = http . StatusInternalServerError
rsp . Status = http . StatusText ( http . StatusInternalServerError )
rsp . Err = err
rsp . Message = "失败"
}
return rsp , err
}
func ( rp * repo ) DatasetList ( ctx context . Context , req proto . DatasetRequest ) ( rsp * proto . BaseResponse , err error ) {
rsp = new ( proto . BaseResponse )
select {
case <- ctx . Done ( ) :
err = fmt . Errorf ( "超时/取消" )
rsp . Code = http . StatusInternalServerError
rsp . Status = http . StatusText ( http . StatusInternalServerError )
rsp . Message = "超时/取消"
rsp . Err = ctx . Err ( )
return rsp , ctx . Err ( )
default :
list := make ( [ ] model . Dataset , 0 )
2023-04-24 15:21:17 +08:00
st := rp . engine . Where ( "(? = '' or dataset_name like ?)" , req . DatasetName , "%" + req . DatasetName + "%" ) .
2023-01-13 11:26:39 +08:00
And ( "(? = '' or create_at >= ?)" , req . StartTime , req . StartTime ) .
And ( "(? = '' or create_at < ?)" , req . EndTime , req . EndTime ) .
And ( "(? = 0 or owner_id = ?)" , req . OwnerId , req . OwnerId ) .
2023-04-24 15:21:17 +08:00
And ( "status = 1" )
if len ( req . ProjectId ) > 0 {
st . In ( "project_id" , req . ProjectId )
}
count , err := st . Limit ( int ( req . Size ) , int ( ( ( req . Page ) - 1 ) * req . Size ) ) . FindAndCount ( & list )
2023-01-13 11:26:39 +08:00
if err != nil {
goto ReturnPoint
}
2023-06-17 09:38:26 +08:00
type QuantityStatistics struct {
TotalNumber int64
TotalSize int64
}
2023-01-13 11:26:39 +08:00
data := make ( [ ] proto . DatasetItem , len ( list ) )
for k , v := range list {
2023-03-24 08:42:28 +08:00
detailList := make ( [ ] model . FileManager , 0 )
2023-06-17 09:38:26 +08:00
datasetCount , err := rp . engine . Where ( "dataset_id = ?" , v . DatasetId ) . FindAndCount ( & detailList )
2023-01-13 11:26:39 +08:00
if err != nil {
goto ReturnPoint
}
2023-03-24 08:42:28 +08:00
fm := new ( model . FileManager )
2023-06-17 09:38:26 +08:00
datasetSize , err := rp . engine . Where ( "dataset_id = ?" , v . DatasetId ) . SumInt ( fm , "file_size" )
if err != nil {
goto ReturnPoint
}
qs := new ( QuantityStatistics )
_ , err = rp . engine . SQL ( ` select sum(file_size) total_size, count(file_id) total_number from file_manager where is_disease > 0 ` ) . Get ( qs )
2023-03-24 08:42:28 +08:00
if err != nil {
goto ReturnPoint
}
2023-01-13 11:26:39 +08:00
data [ k ] = proto . DatasetItem {
DatasetId : v . DatasetId ,
DatasetName : v . DatasetName ,
DatasetDesc : v . DatasetDesc ,
2023-03-23 18:03:09 +08:00
StoreName : v . StoreName ,
2023-01-13 11:26:39 +08:00
CategoryId : v . CategoryId ,
ProjectId : v . ProjectId ,
OwnerId : v . OwnerId ,
Creator : v . Creator ,
CreateAt : v . CreateAt ,
DatasetCount : datasetCount ,
2023-03-24 08:42:28 +08:00
DatasetSize : datasetSize ,
2023-06-17 09:38:26 +08:00
LabelCount : qs . TotalNumber ,
LabelSize : qs . TotalSize ,
2023-01-13 11:26:39 +08:00
}
}
rsp . Code = http . StatusOK
rsp . Status = http . StatusText ( http . StatusOK )
rsp . Message = "成功"
rsp = FillPaging ( count , req . Page , req . Size , data , rsp )
rsp . Err = err
return rsp , err
}
ReturnPoint :
if err != nil {
rsp . Code = http . StatusInternalServerError
rsp . Status = http . StatusText ( http . StatusInternalServerError )
rsp . Err = err
rsp . Message = "失败"
}
return rsp , err
}
func ( rp * repo ) ImportDataset ( ctx context . Context , req proto . ImportDatasetRequest ) ( rsp * proto . BaseResponse , err error ) {
rsp = new ( proto . BaseResponse )
select {
case <- ctx . Done ( ) :
err = fmt . Errorf ( "超时/取消" )
rsp . Code = http . StatusInternalServerError
rsp . Status = http . StatusText ( http . StatusInternalServerError )
rsp . Message = "超时/取消"
rsp . Err = ctx . Err ( )
return rsp , ctx . Err ( )
default :
item := & model . Dataset {
DatasetName : req . DatasetName ,
DatasetDesc : req . DatasetDesc ,
CategoryId : req . CategoryId ,
ProjectId : req . ProjectId ,
OwnerId : req . OwnerId ,
2023-03-23 18:03:09 +08:00
StoreName : req . StoreName ,
2023-01-13 11:26:39 +08:00
Creator : req . Creator ,
Status : 1 ,
CreateAt : time . Now ( ) . Unix ( ) ,
UpdateAt : time . Now ( ) . Unix ( ) ,
}
_ , err = rp . engine . Insert ( item )
if err != nil {
goto ReturnPoint
}
rsp . Code = http . StatusOK
rsp . Status = http . StatusText ( http . StatusOK )
rsp . Message = "成功"
rsp . Data = item
rsp . Err = err
return rsp , err
}
ReturnPoint :
if err != nil {
rsp . Code = http . StatusInternalServerError
rsp . Status = http . StatusText ( http . StatusInternalServerError )
rsp . Err = err
rsp . Message = "失败"
}
return rsp , err
}
2023-04-24 15:21:17 +08:00
func ( rp * repo ) DatasetInfo ( ctx context . Context , req proto . DatasetItemRequest ) ( rsp * proto . BaseResponse , err error ) {
rsp = new ( proto . BaseResponse )
select {
case <- ctx . Done ( ) :
err = fmt . Errorf ( "超时/取消" )
rsp . Code = http . StatusInternalServerError
rsp . Status = http . StatusText ( http . StatusInternalServerError )
rsp . Message = "超时/取消"
rsp . Err = ctx . Err ( )
return rsp , ctx . Err ( )
default :
item := new ( model . Dataset )
var b bool
b , err = rp . engine . ID ( req . DatasetId ) . Get ( item )
if err != nil {
goto ReturnPoint
}
if ! b {
err = fmt . Errorf ( "未能找到对应的数据集" )
goto ReturnPoint
}
rsp . Code = http . StatusOK
rsp . Status = http . StatusText ( http . StatusOK )
rsp . Message = "成功"
rsp . Err = err
rsp . Data = item
return rsp , err
}
ReturnPoint :
if err != nil {
rsp . Code = http . StatusInternalServerError
rsp . Status = http . StatusText ( http . StatusInternalServerError )
rsp . Err = err
rsp . Message = "失败"
}
return rsp , err
}
2023-05-14 18:23:12 +08:00
2023-05-18 11:01:34 +08:00
func ( rp * repo ) CreateTrainDataset ( ctx context . Context , req proto . TrainDatasetRequest ) ( rsp * proto . BaseResponse , err error ) {
2023-05-14 18:23:12 +08:00
rsp = new ( proto . BaseResponse )
select {
case <- ctx . Done ( ) :
err = fmt . Errorf ( "超时/取消" )
rsp . Code = http . StatusInternalServerError
rsp . Status = http . StatusText ( http . StatusInternalServerError )
rsp . Message = "超时/取消"
rsp . Err = ctx . Err ( )
return rsp , ctx . Err ( )
default :
var (
h bool
trainFileList [ ] model . FileManager
valFileList [ ] model . FileManager
testFileList [ ] model . FileManager
//wg sync.WaitGroup
)
log := new ( model . DatasetOperationLog )
dataset := new ( model . Dataset )
h , err = rp . engine . ID ( req . DatasetId ) . Get ( dataset )
if err != nil {
goto ReturnPoint
}
if ! h {
err = fmt . Errorf ( "未能找到对应的采集数据集" )
goto ReturnPoint
}
fileList := make ( [ ] model . FileManager , 0 )
2023-05-18 11:01:34 +08:00
err = rp . engine . Where ( "dataset_id = ?" , req . DatasetId ) . And ( "is_disease > 0" ) . Find ( & fileList )
2023-05-14 18:23:12 +08:00
if err != nil {
goto ReturnPoint
}
if req . TargetData == 0 {
req . TargetData = len ( fileList )
}
if req . TargetData > len ( fileList ) {
2023-05-18 11:01:34 +08:00
err = fmt . Errorf ( "超出现有标注数据集数量" )
2023-05-14 18:23:12 +08:00
goto ReturnPoint
}
if req . SplitMethod == 1 { //随机
rand . Seed ( time . Now ( ) . UnixNano ( ) )
rand . Shuffle ( len ( fileList ) , func ( i , j int ) {
fileList [ i ] , fileList [ j ] = fileList [ j ] , fileList [ i ]
} )
}
trainNumber := int ( math . Floor ( float64 ( int64 ( req . TargetData ) * req . TrainNumber ) / 100 ) )
valNumber := int ( math . Floor ( float64 ( int64 ( req . TargetData ) * req . ValidationNumber ) / 100 ) )
testNumber := req . TargetData - trainNumber - valNumber
2023-05-18 11:01:34 +08:00
if trainNumber - 1 > 1 {
trainFileList = fileList [ : trainNumber - 1 ]
} else {
trainFileList = make ( [ ] model . FileManager , 0 )
trainFileList = append ( trainFileList , fileList [ 0 ] )
}
if trainNumber != trainNumber + valNumber - 1 {
valFileList = fileList [ trainNumber : trainNumber + valNumber - 1 ]
} else {
valFileList = make ( [ ] model . FileManager , 0 )
valFileList = append ( valFileList , fileList [ trainNumber ] )
}
if trainNumber + valNumber < len ( fileList ) {
testFileList = fileList [ trainNumber + valNumber : ]
} else {
testFileList = make ( [ ] model . FileManager , 0 )
testFileList = append ( testFileList , fileList [ trainNumber + valNumber ] )
}
2023-05-14 18:23:12 +08:00
train := new ( model . TrainingDataset )
h , err = rp . engine . Where ( "name = ?" , req . TrainName ) . Get ( train )
if err != nil {
goto ReturnPoint
}
if ! h {
train . Name = req . TrainName
train . DatasetDesc = req . TrainDesc
2023-05-18 11:01:34 +08:00
//train.DatasetId = req.DatasetId
2023-05-14 18:23:12 +08:00
train . CategoryId = dataset . CategoryId
_ , err = rp . engine . Insert ( train )
if err != nil {
goto ReturnPoint
}
}
log . TargetData = int64 ( req . TargetData )
log . DatasetId = req . DatasetId
log . TrainingDatasetId = train . DatasetId
log . SplitMethod = req . SplitMethod
log . TrainNumber = int64 ( trainNumber )
log . ValidationNumber = int64 ( valNumber )
log . TestNumber = int64 ( testNumber )
log . Creator = req . UserId
_ , err = rp . engine . Insert ( log )
if err != nil {
goto ReturnPoint
}
//wg.Add(3)
2023-05-18 11:01:34 +08:00
go BatchCopyData ( trainFileList , train . DatasetId , log . LogId , req . UserId , 1 , req . TrainName , rp ) //, &wg
go BatchCopyData ( valFileList , train . DatasetId , log . LogId , req . UserId , 2 , req . TrainName , rp ) //, &wg
go BatchCopyData ( testFileList , train . DatasetId , log . LogId , req . UserId , 3 , req . TrainName , rp ) //, &wg
2023-05-14 18:23:12 +08:00
//wg.Wait()
rsp . Code = http . StatusOK
rsp . Status = http . StatusText ( http . StatusOK )
rsp . Message = "成功"
rsp . Err = err
rsp . Data = log
return rsp , err
}
ReturnPoint :
if err != nil {
rsp . Code = http . StatusInternalServerError
rsp . Status = http . StatusText ( http . StatusInternalServerError )
rsp . Err = err
rsp . Message = "失败"
}
return rsp , err
}
2023-05-18 11:01:34 +08:00
func BatchCopyData ( list [ ] model . FileManager , trainId , logId , userId int64 , categoryId int , trainName string , rp * repo ) { //, wg *sync.WaitGroup
2023-05-14 18:23:12 +08:00
batchList := make ( [ ] model . TrainingDatasetDetail , len ( list ) )
for k , v := range list {
2023-05-18 11:01:34 +08:00
dir := "no_disease"
if v . IsDisease == 1 {
dir = "disease"
}
utils . DownloadMinioFileToLocalPath ( v . AccessUrl , path . Join ( rp . AppConfig . TrainDir , trainName , model . GetTrainCategory ( categoryId ) , dir ) , v . FileName ,
rp . AppConfig . Minio . Protocol , rp . AppConfig . Minio . Endpoint , rp . AppConfig . Minio . Bucket , rp . AppConfig . Minio . AccessKeyId ,
rp . AppConfig . Minio . SecretAccessKey , rp . logger )
2023-05-14 18:23:12 +08:00
item := model . TrainingDatasetDetail {
FileName : v . FileName ,
2023-05-18 11:01:34 +08:00
FilePath : path . Join ( rp . AppConfig . TrainDir , trainName , model . GetTrainCategory ( categoryId ) , dir , v . FileName ) ,
2023-05-14 18:23:12 +08:00
DatasetId : trainId ,
CategoryId : categoryId ,
FileSize : v . FileSize ,
FileMd5 : v . FileMd5 ,
2023-05-18 11:01:34 +08:00
IsDisease : v . IsDisease ,
2023-05-14 18:23:12 +08:00
OperationLogId : logId ,
Creator : userId ,
CreateAt : time . Now ( ) . Unix ( ) ,
UpdateAt : time . Now ( ) . Unix ( ) ,
}
batchList [ k ] = item
}
2023-05-18 11:01:34 +08:00
_ , _ = rp . engine . Insert ( batchList )
2023-05-14 18:23:12 +08:00
//wg.Done()
}
2023-05-18 11:01:34 +08:00
func ( rp * repo ) TrainDatasetList ( ctx context . Context , req proto . TrainDatasetItemRequest ) ( rsp * proto . BaseResponse , err error ) {
rsp = new ( proto . BaseResponse )
select {
case <- ctx . Done ( ) :
err = fmt . Errorf ( "超时/取消" )
rsp . Code = http . StatusInternalServerError
rsp . Status = http . StatusText ( http . StatusInternalServerError )
rsp . Message = "超时/取消"
rsp . Err = ctx . Err ( )
return rsp , ctx . Err ( )
default :
type QuantityStatistics struct {
DatasetId int64
CategoryId int
Total int64
}
var (
count int64
list [ ] proto . TrainingDataset
)
trainingList := make ( [ ] model . TrainingDataset , 0 )
count , err = rp . engine . Where ( "(?=0 or dataset_id = ?)" , req . DatasetId , req . DatasetId ) .
And ( "(?= 0 or category_id = ?)" , req . BizType , req . BizType ) .
And ( "(? ='' or name like ?)" , req . TrainName , "%" + req . TrainName + "%" ) .
Limit ( int ( req . Size ) , int ( ( ( req . Page ) - 1 ) * req . Size ) ) . FindAndCount ( & trainingList )
if err != nil {
goto ReturnPoint
}
list = make ( [ ] proto . TrainingDataset , len ( trainingList ) )
for k , v := range trainingList {
qs := make ( [ ] QuantityStatistics , 0 )
err = rp . engine . SQL ( "select dataset_id, category_id, count(1) as total from training_dataset_detail where dataset_id = ? group by category_id, dataset_id" , v . DatasetId ) . Find ( & qs )
if err != nil {
goto ReturnPoint
}
item := proto . TrainingDataset {
DatasetId : v . DatasetId ,
Name : v . Name ,
CategoryId : v . CategoryId ,
DatasetDesc : v . DatasetDesc ,
TotalSize : 0 ,
TrainSize : 0 ,
ValSize : 0 ,
TestSize : 0 ,
StoreName : v . StoreName ,
CreateAt : v . CreateAt ,
UpdateAt : v . UpdateAt ,
}
for _ , val := range qs {
switch val . CategoryId {
case 1 :
item . TrainSize = val . Total
case 2 :
item . ValSize = val . Total
case 3 :
item . TestSize = val . Total
}
item . TotalSize += val . Total
}
list [ k ] = item
}
rsp . Code = http . StatusOK
rsp . Status = http . StatusText ( http . StatusOK )
rsp . Message = "成功"
rsp = FillPaging ( count , req . Page , req . Size , list , rsp )
rsp . Err = err
return rsp , err
}
ReturnPoint :
if err != nil {
rsp . Code = http . StatusInternalServerError
rsp . Status = http . StatusText ( http . StatusInternalServerError )
rsp . Err = err
rsp . Message = "失败"
}
return rsp , err
}
func ( rp * repo ) TrainDatasetFileList ( ctx context . Context , req proto . TrainDatasetItemRequest ) ( rsp * proto . BaseResponse , err error ) {
rsp = new ( proto . BaseResponse )
select {
case <- ctx . Done ( ) :
err = fmt . Errorf ( "超时/取消" )
rsp . Code = http . StatusInternalServerError
rsp . Status = http . StatusText ( http . StatusInternalServerError )
rsp . Message = "超时/取消"
rsp . Err = ctx . Err ( )
return rsp , ctx . Err ( )
default :
var (
count int64
list [ ] proto . TrainingDatasetFileItem
)
fileList := make ( [ ] model . TrainingDatasetDetail , 0 )
count , err = rp . engine . Where ( "dataset_id = ?" , req . DatasetId ) .
Limit ( int ( req . Size ) , int ( ( ( req . Page ) - 1 ) * req . Size ) ) . FindAndCount ( & fileList )
if err != nil {
goto ReturnPoint
}
list = make ( [ ] proto . TrainingDatasetFileItem , len ( fileList ) )
for k , v := range fileList {
buff := utils . ReadFile ( v . FilePath )
img := utils . BuffToImage ( buff )
buf := utils . ImageToBuff ( img , "jpeg" )
list [ k ] = proto . TrainingDatasetFileItem {
DetailId : v . DetailId ,
FileName : v . FileName ,
FileSize : v . FileSize ,
FilePath : v . FilePath ,
FileContent : "data:image/jpeg;base64," + base64 . StdEncoding . EncodeToString ( buf . Bytes ( ) ) ,
IsDisease : v . IsDisease ,
CategoryId : v . CategoryId ,
}
}
rsp . Code = http . StatusOK
rsp . Status = http . StatusText ( http . StatusOK )
rsp . Message = "成功"
rsp = FillPaging ( count , req . Page , req . Size , list , rsp )
rsp . Err = err
return rsp , err
}
ReturnPoint :
if err != nil {
rsp . Code = http . StatusInternalServerError
rsp . Status = http . StatusText ( http . StatusInternalServerError )
rsp . Err = err
rsp . Message = "失败"
}
return rsp , err
}