Skip to content
GitLab
Explore
Sign in
Primary navigation
Search or go to…
Project
H
hidra
Manage
Activity
Members
Labels
Plan
Issues
Issue boards
Milestones
Wiki
Code
Merge requests
Repository
Branches
Commits
Tags
Repository graph
Compare revisions
Snippets
Build
Pipelines
Jobs
Pipeline schedules
Artifacts
Deploy
Releases
Container Registry
Model registry
Operate
Environments
Monitor
Incidents
Analyze
Value stream analytics
Contributor analytics
CI/CD analytics
Repository analytics
Model experiments
Help
Help
Support
GitLab documentation
Compare GitLab plans
Community forum
Contribute to GitLab
Provide feedback
Keyboard shortcuts
?
Snippets
Groups
Projects
Show more breadcrumbs
steinbac
hidra
Commits
cf3bfbee
Commit
cf3bfbee
authored
9 years ago
by
Manuela Kuhn
Browse files
Options
Downloads
Patches
Plain Diff
Added fileMover and receiver from Marco
parent
25299638
No related branches found
Branches containing commit
No related tags found
Tags containing commit
No related merge requests found
Changes
2
Hide whitespace changes
Inline
Side-by-side
Showing
2 changed files
ZeroMQTunnel/fileMover.py
+892
-0
892 additions, 0 deletions
ZeroMQTunnel/fileMover.py
ZeroMQTunnel/receiver.py
+416
-0
416 additions, 0 deletions
ZeroMQTunnel/receiver.py
with
1308 additions
and
0 deletions
ZeroMQTunnel/fileMover.py
0 → 100644
+
892
−
0
View file @
cf3bfbee
from
__builtin__
import
open
,
type
__author__
=
'
Marco Strutz <marco.strutz@desy.de>
'
import
time
import
argparse
import
zmq
import
os
import
logging
import
sys
import
json
import
traceback
from
multiprocessing
import
Process
,
freeze_support
DEFAULT_CHUNK_SIZE
=
1048576
#
# -------------------------- class: WorkerProcess --------------------------------------
#
class
WorkerProcess
():
id
=
None
dataStreamIp
=
None
dataStreamPort
=
None
logfileFullPath
=
None
zmqContextForWorker
=
None
zmqMessageChunkSize
=
None
zmqCleanerIp
=
None
# responsable to delete files
zmqCleanerPort
=
None
# responsable to delete files
fileWaitTime_inMs
=
None
fileMaxWaitTime_InMs
=
None
def
__init__
(
self
,
id
,
dataStreamIp
,
dataStreamPort
,
logfileFullPath
,
chunkSize
,
zmqCleanerIp
,
zmqCleanerPort
,
fileWaitTimeInMs
=
2000.0
,
fileMaxWaitTimeInMs
=
10000.0
):
self
.
id
=
id
self
.
dataStreamIp
=
dataStreamIp
self
.
dataStreamPort
=
dataStreamPort
self
.
logfileFullPath
=
logfileFullPath
self
.
zmqMessageChunkSize
=
chunkSize
self
.
zmqCleanerIp
=
zmqCleanerIp
self
.
zmqCleanerPort
=
zmqCleanerPort
self
.
fileWaitTime_inMs
=
fileWaitTimeInMs
self
.
fileMaxWaitTime_InMs
=
fileMaxWaitTimeInMs
self
.
initLogging
(
logfileFullPath
)
try
:
self
.
process
()
except
KeyboardInterrupt
:
# trace = traceback.format_exc()
logging
.
debug
(
"
KeyboardInterrupt detected. Shutting down workerProcess.
"
)
self
.
zmqContextForWorker
.
destroy
()
else
:
trace
=
traceback
.
format_exc
()
logging
.
error
(
"
Stopping workerProcess due to unknown error condition.
"
)
logging
.
debug
(
"
Error was:
"
+
str
(
trace
))
def
process
(
self
):
"""
sends a
'
ready
'
to a broker and receives a
'
job
'
to process.
The
'
job
'
will be to pass the file of an fileEvent to the
dataPipe.
Why?
-> the simulated
"
onClosed
"
event waits for a file for being
not modified within a certain period of time.
Instead of processing file after file the work will be
spreaded to many workerThreads. So each thread can wait
individual periods of time for a file without blocking
new file events - as new file events will be handled by
another workerThread.
"""
"""
takes the fileEventMessage, reading and passing the new file to
a separate data-messagePipe. Afterwards the original file
will be removed.
"""
id
=
self
.
id
dataStreamIp
=
self
.
dataStreamIp
dataStreamPort
=
self
.
dataStreamPort
logging
.
debug
(
"
new workerThread started. id=
"
+
str
(
id
))
#initialize router
zmqContextForWorker
=
zmq
.
Context
()
self
.
zmqContextForWorker
=
zmqContextForWorker
zmqDataStreamSocket
=
zmqContextForWorker
.
socket
(
zmq
.
PUSH
)
connectionStrDataStreamSocket
=
"
tcp://{ip}:{port}
"
.
format
(
ip
=
dataStreamIp
,
port
=
dataStreamPort
)
zmqDataStreamSocket
.
connect
(
connectionStrDataStreamSocket
)
routerSocket
=
zmqContextForWorker
.
socket
(
zmq
.
REQ
)
routerSocket
.
identity
=
u
"
worker-{ID}
"
.
format
(
ID
=
id
).
encode
(
"
ascii
"
)
connectionStrRouterSocket
=
"
tcp://{ip}:{port}
"
.
format
(
ip
=
"
127.0.0.1
"
,
port
=
"
50000
"
)
routerSocket
.
connect
(
connectionStrRouterSocket
)
processingJobs
=
True
jobCount
=
0
#init Cleaner message-pipe
cleanerSocket
=
zmqContextForWorker
.
socket
(
zmq
.
PUSH
)
connectionStringCleanerSocket
=
"
tcp://{ip}:{port}
"
.
format
(
ip
=
self
.
zmqCleanerIp
,
port
=
self
.
zmqCleanerPort
)
cleanerSocket
.
connect
(
connectionStringCleanerSocket
)
while
processingJobs
:
#sending a "ready"-signal to the router.
#the reply will contain the actual job/task.
logging
.
debug
(
"
worker-
"
+
str
(
id
)
+
"
: sending ready signal
"
)
routerSocket
.
send
(
b
"
READY
"
)
# Get workload from router, until finished
logging
.
debug
(
"
worker-
"
+
str
(
id
)
+
"
: waiting for new job
"
)
workload
=
routerSocket
.
recv
()
logging
.
debug
(
"
worker-
"
+
str
(
id
)
+
"
: new job received
"
)
finished
=
workload
==
b
"
END
"
if
finished
:
processingJobs
=
False
logging
.
debug
(
"
router requested to shutdown worker-thread. Worker processed: %d files
"
%
jobCount
)
break
jobCount
+=
1
#convert fileEventMessage back to a dictionary
fileEventMessageDict
=
None
try
:
fileEventMessageDict
=
json
.
loads
(
str
(
workload
))
logging
.
debug
(
"
str(messageDict) =
"
+
str
(
fileEventMessageDict
)
+
"
type(messageDict) =
"
+
str
(
type
(
fileEventMessageDict
)))
except
Exception
,
e
:
errorMessage
=
"
Unable to convert message into a dictionary.
"
logging
.
error
(
errorMessage
)
logging
.
debug
(
"
Error was:
"
+
str
(
e
))
#extract fileEvent metadata
try
:
#TODO validate fileEventMessageDict dict
filename
=
fileEventMessageDict
[
"
filename
"
]
sourcePath
=
fileEventMessageDict
[
"
sourcePath
"
]
relativeParent
=
fileEventMessageDict
[
"
relativeParent
"
]
except
Exception
,
e
:
errorMessage
=
"
Invalid fileEvent message received.
"
logging
.
error
(
errorMessage
)
logging
.
debug
(
"
Error was:
"
+
str
(
e
))
logging
.
debug
(
"
fileEventMessageDict=
"
+
str
(
fileEventMessageDict
))
#skip all further instructions and continue with next iteration
continue
#passing file to data-messagPipe
try
:
logging
.
debug
(
"
worker-
"
+
str
(
id
)
+
"
: passing new file to data-messagePipe...
"
)
self
.
passFileToDataStream
(
zmqDataStreamSocket
,
filename
,
sourcePath
,
relativeParent
)
logging
.
debug
(
"
worker-
"
+
str
(
id
)
+
"
: passing new file to data-messagePipe...success.
"
)
except
Exception
,
e
:
errorMessage
=
"
Unable to pass new file to data-messagePipe.
"
logging
.
error
(
errorMessage
)
logging
.
error
(
"
Error was:
"
+
str
(
e
))
logging
.
debug
(
"
worker-
"
+
str
(
id
)
+
"
: passing new file to data-messagePipe...failed.
"
)
#skip all further instructions and continue with next iteration
continue
#send remove-request to message pipe
try
:
#sending to pipe
logging
.
debug
(
"
send file-event to cleaner-pipe...
"
)
cleanerSocket
.
send
(
workload
)
logging
.
debug
(
"
send file-event to cleaner-pipe...success.
"
)
#TODO: remember workload. append to list?
# can be used to verify files which have been processed twice or more
except
Exception
,
e
:
errorMessage
=
"
Unable to notify Cleaner-pipe to delete file:
"
+
str
(
filename
)
logging
.
error
(
errorMessage
)
logging
.
debug
(
"
fileEventMessageDict=
"
+
str
(
fileEventMessageDict
))
def
getFileWaitTimeInMs
(
self
):
waitTime
=
2000.0
return
waitTime
def
getFileMaxWaitTimeInMs
(
self
):
maxWaitTime
=
10000.0
return
maxWaitTime
def
passFileToDataStream
(
self
,
zmqDataStreamSocket
,
filename
,
sourcePath
,
relativeParent
):
"""
filesizeRequested == filesize submitted by file-event. In theory it can differ to real file size
"""
# filename = "img.tiff"
# filepath = "C:\dir"
#
# --> sourceFilePathFull = 'C:\\dir\img.tiff'
sourceFilePathFull
=
os
.
path
.
join
(
sourcePath
,
filename
)
#reading source file into memory
try
:
#wait x seconds if file was modified within past y seconds
fileWaitTimeInMs
=
self
.
getFileWaitTimeInMs
()
fileMaxWaitTimeInMs
=
self
.
getFileMaxWaitTimeInMs
()
fileIsStillInUse
=
True
#true == still being written to file by a process
timeStartWaiting
=
time
.
time
()
while
fileIsStillInUse
:
#skip waiting periode if waiting to long for file to get closed
if
time
.
time
()
-
timeStartWaiting
>=
(
fileMaxWaitTimeInMs
/
1000
):
logging
.
debug
(
"
waited to long for file getting closed. aborting
"
)
break
#wait for other process to finish file access
#grabs time when file was modified last
statInfo
=
os
.
stat
(
sourceFilePathFull
)
fileLastModified
=
statInfo
.
st_mtime
logging
.
debug
(
"'"
+
str
(
sourceFilePathFull
)
+
"'
modified last:
"
+
str
(
fileLastModified
))
timeNow
=
time
.
time
()
timeDiff
=
timeNow
-
fileLastModified
logging
.
debug
(
"
timeNow=
"
+
str
(
timeNow
)
+
"
timeDiff=
"
+
str
(
timeDiff
))
waitTimeInSeconds
=
fileWaitTimeInMs
/
1000
if
timeDiff
>=
waitTimeInSeconds
:
fileIsStillInUse
=
False
logging
.
debug
(
"
File was not modified within past
"
+
str
(
fileWaitTimeInMs
)
+
"
ms.
"
)
else
:
logging
.
debug
(
"
still waiting for file to get closed...
"
)
time
.
sleep
(
fileWaitTimeInMs
/
1000
)
#for quick testing set filesize of file as chunksize
logging
.
debug
(
"
get filesize for
'"
+
str
(
sourceFilePathFull
)
+
"'
...
"
)
filesize
=
os
.
path
.
getsize
(
sourceFilePathFull
)
chunksize
=
filesize
#can be used later on to split multipart message
logging
.
debug
(
"
filesize(%s) = %s
"
%
(
sourceFilePathFull
,
str
(
filesize
)))
except
Exception
,
e
:
errorMessage
=
"
Unable to get file metadata for
'"
+
str
(
sourceFilePathFull
)
+
"'
.
"
logging
.
error
(
errorMessage
)
logging
.
debug
(
"
Error was:
"
+
str
(
e
))
raise
Exception
(
e
)
try
:
logging
.
debug
(
"
opening
'"
+
str
(
sourceFilePathFull
)
+
"'
...
"
)
fileDescriptor
=
open
(
str
(
sourceFilePathFull
),
"
rb
"
)
except
Exception
,
e
:
errorMessage
=
"
Unable to read source file
'"
+
str
(
sourceFilePathFull
)
+
"'
.
"
logging
.
error
(
errorMessage
)
logging
.
debug
(
"
Error was:
"
+
str
(
e
))
raise
Exception
(
e
)
#build payload for message-pipe by putting source-file into a message
try
:
payloadMetadata
=
self
.
buildPayloadMetadata
(
filename
,
filesize
,
sourcePath
,
relativeParent
)
except
Exception
,
e
:
errorMessage
=
"
Unable to assemble multi-part message.
"
logging
.
error
(
errorMessage
)
logging
.
debug
(
"
Error was:
"
+
str
(
e
))
raise
Exception
(
e
)
#send message
try
:
logging
.
debug
(
"
Passing multipart-message...
"
)
chunkNumber
=
0
stillChunksToRead
=
True
while
stillChunksToRead
:
chunkNumber
+=
1
#read next chunk from file
fileContentAsByteObject
=
fileDescriptor
.
read
(
self
.
getChunkSize
())
#detect if end of file has been reached
if
not
fileContentAsByteObject
:
stillChunksToRead
=
False
#as chunk is empty decrease chunck-counter
chunkNumber
-=
1
#assemble metadata for zmq-message
chunkPayloadMetadata
=
payloadMetadata
.
copy
()
chunkPayloadMetadata
[
"
chunkNumber
"
]
=
chunkNumber
chunkPayloadMetadataJson
=
json
.
dumps
(
chunkPayloadMetadata
)
chunkPayload
=
[]
chunkPayload
.
append
(
chunkPayloadMetadataJson
)
chunkPayload
.
append
(
fileContentAsByteObject
)
#send to zmq pipe
zmqDataStreamSocket
.
send_multipart
(
chunkPayload
)
#close file
fileDescriptor
.
close
()
# zmqDataStreamSocket.send_multipart(multipartMessage)
logging
.
debug
(
"
Passing multipart-message...done.
"
)
except
Exception
,
e
:
logging
.
error
(
"
Unable to send multipart-message
"
)
logging
.
debug
(
"
Error was:
"
+
str
(
e
))
logging
.
info
(
"
Passing multipart-message...failed.
"
)
raise
Exception
(
e
)
def
appendFileChunksToPayload
(
self
,
payload
,
sourceFilePathFull
,
fileDescriptor
,
chunkSize
):
try
:
# chunksize = 16777216 #16MB
logging
.
debug
(
"
reading file
'"
+
str
(
sourceFilePathFull
)
+
"'
to memory
"
)
# FIXME: chunk is read-out as str. why not as bin? will probably add to much overhead to zmq-message
fileContentAsByteObject
=
fileDescriptor
.
read
(
chunkSize
)
while
fileContentAsByteObject
!=
""
:
payload
.
append
(
fileContentAsByteObject
)
fileContentAsByteObject
=
fileDescriptor
.
read
(
chunkSize
)
except
Exception
,
e
:
raise
Exception
(
str
(
e
))
def
buildPayloadMetadata
(
self
,
filename
,
filesize
,
sourcePath
,
relativeParent
):
"""
builds metadata for zmq-multipart-message. should be used as first element for payload.
:param filename:
:param filesize:
:param sourcePath:
:param relativeParent:
:return:
"""
#add metadata to multipart
logging
.
debug
(
"
create metadata for source file...
"
)
metadataDict
=
{
"
filename
"
:
filename
,
"
filesize
"
:
filesize
,
"
sourcePath
"
:
sourcePath
,
"
relativeParent
"
:
relativeParent
,
"
chunkSize
"
:
self
.
getChunkSize
()}
logging
.
debug
(
"
metadataDict =
"
+
str
(
metadataDict
))
return
metadataDict
def
getChunkSize
(
self
):
return
self
.
zmqMessageChunkSize
def
showFilesystemStatistics
(
self
,
vfsPath
):
statvfs
=
os
.
statvfs
(
vfsPath
)
totalSize
=
statvfs
.
f_frsize
*
statvfs
.
f_blocks
freeBytes
=
statvfs
.
f_frsize
*
statvfs
.
f_bfree
freeSpaceAvailableForUser
=
statvfs
.
f_frsize
*
statvfs
.
f_bavail
#in bytes
freeSpaceAvailableForUser_gigabytes
=
freeSpaceAvailableForUser
/
1024
/
1024
/
1024
freeUserSpaceLeft_percent
=
(
float
(
freeBytes
)
/
float
(
totalSize
)
)
*
100
# print "{number:.{digits}f}".format(number=freeUserSpaceLeft_percent, digits=0)
# print int(freeUserSpaceLeft_percent)
logging
.
debug
(
"
vfsstat: freeSpaceAvailableForUser=
"
+
str
(
freeSpaceAvailableForUser_gigabytes
)
+
"
Gigabytes
"
+
"
(
"
+
str
(
int
(
freeUserSpaceLeft_percent
))
+
"
% free disk space left)
"
)
#warn if disk space is running low
highWaterMark
=
85
if
int
(
freeUserSpaceLeft_percent
)
>=
int
(
highWaterMark
):
logging
.
warning
(
"
Running low in disk space!
"
+
str
(
int
(
freeUserSpaceLeft_percent
))
+
"
% free disk space left.
"
)
def
initLogging
(
self
,
filenameFullPath
):
#@see https://docs.python.org/2/howto/logging-cookbook.html
#log everything to file
logging
.
basicConfig
(
level
=
logging
.
DEBUG
,
format
=
'
[%(asctime)s] [PID %(process)d] [%(filename)s] [%(module)s:%(funcName)s] [%(name)s] [%(levelname)s] %(message)s
'
,
datefmt
=
'
%Y-%m-%d_%H:%M
'
,
filename
=
filenameFullPath
,
filemode
=
"
a
"
)
#log info to stdout, display messages with different format than the file output
console
=
logging
.
StreamHandler
()
console
.
setLevel
(
logging
.
INFO
)
formatter
=
logging
.
Formatter
(
"
%(asctime)s > %(message)s
"
)
console
.
setFormatter
(
formatter
)
logging
.
getLogger
(
""
).
addHandler
(
console
)
#
# -------------------------- class: FileMover --------------------------------------
#
class
FileMover
():
patterns
=
[
"
*
"
]
fileList_newFiles
=
list
()
fileCount_newFiles
=
0
zmqContext
=
None
messageSocket
=
None
# to receiver fileMove-jobs as json-encoded dictionary
dataSocket
=
None
# to send fileObject as multipart message
bindingIpForSocket
=
None
zqmFileEventServerIp
=
"
127.0.0.1
"
# serverIp for incoming messages
tcpPort_messageStream
=
"
6060
"
dataStreamIp
=
"
127.0.0.1
"
# ip of dataStream-socket to push new files to
dataStreamPort
=
"
6061
"
# port number of dataStream-socket to push new files to
zmqCleanerIp
=
"
127.0.0.1
"
# zmq pull endpoint, responsable to delete files
zmqCleanerPort
=
"
6062
"
# zmq pull endpoint, responsable to delete files
fileWaitTimeInMs
=
None
fileMaxWaitTimeInMs
=
None
currentZmqDataStreamSocketListIndex
=
None
# Index-Number of a socket used to send datafiles to
logfileFullPath
=
None
chunkSize
=
None
def
process
(
self
):
try
:
self
.
startReceiving
()
except
KeyboardInterrupt
:
logging
.
debug
(
"
KeyboardInterrupt detected. Shutting down fileMover.
"
)
logging
.
info
(
"
Shutting down fileMover as KeyboardInterrupt was detected.
"
)
self
.
zmqContext
.
destroy
()
else
:
logging
.
error
(
"
Unknown Error. Quitting.
"
)
logging
.
info
(
"
Stopping fileMover due to unknown error condition.
"
)
def
__init__
(
self
,
bindingIpForSocket
,
bindingPortForSocket
,
dataStreamIp
,
dataStreamPort
,
parallelDataStreams
,
logfileFullPath
,
chunkSize
,
zmqCleanerIp
,
zmqCleanerPort
,
fileWaitTimeInMs
,
fileMaxWaitTimeInMs
):
logging
.
info
(
"
registering zmq global context
"
)
#create zmq context
zmqContext
=
zmq
.
Context
()
self
.
zmqContext
=
zmqContext
self
.
bindingIpForSocket
=
bindingIpForSocket
self
.
tcpPort_messageStream
=
bindingPortForSocket
self
.
dataStreamIp
=
dataStreamIp
self
.
dataStreamPort
=
dataStreamPort
self
.
parallelDataStreams
=
parallelDataStreams
self
.
logfileFullPath
=
logfileFullPath
self
.
chunkSize
=
chunkSize
self
.
zmqCleanerIp
=
zmqCleanerIp
self
.
zmqCleanerPort
=
zmqCleanerPort
self
.
fileWaitTimeInMs
=
fileWaitTimeInMs
self
.
fileMaxWaitTimeInMs
=
fileMaxWaitTimeInMs
#create zmq sockets. one for incoming file events, one for passing fileObjects to
self
.
messageSocket
=
self
.
getZmqSocket_Pull
(
self
.
zmqContext
)
self
.
dataSocket
=
self
.
getZmqSocket_Push
(
self
.
zmqContext
)
def
getFileWaitTimeInMs
(
self
):
return
self
.
fileWaitTimeInMs
def
getFileMaxWaitTimeInMs
(
self
):
return
self
.
fileMaxWaitTimeInMs
def
startReceiving
(
self
):
#create socket
zmqContext
=
self
.
zmqContext
zmqSocketForNewFileEvents
=
self
.
createPullSocket
()
logging
.
debug
(
"
new message-socket crated for: new file events.
"
)
parallelDataStreams
=
int
(
self
.
parallelDataStreams
)
logging
.
debug
(
"
new message-socket crated for: passing file objects.
"
)
incomingMessageCounter
=
0
#setting up router for load-balancing worker-threads.
#each worker-thread will handle a file event
routerSocket
=
self
.
zmqContext
.
socket
(
zmq
.
ROUTER
)
routerSocket
.
bind
(
"
tcp://127.0.0.1:50000
"
)
logging
.
debug
(
"
routerSocket started for
'
tcp://127.0.0.1:50000
'"
)
#start worker-threads. each will have its own PushSocket.
workerThreadList
=
list
()
numberOfWorkerThreads
=
parallelDataStreams
fileWaitTimeInMs
=
self
.
getFileWaitTimeInMs
()
fileMaxWaitTimeInMs
=
self
.
getFileMaxWaitTimeInMs
()
for
threadNumber
in
range
(
numberOfWorkerThreads
):
logging
.
debug
(
"
instantiate new workerProcess (nr
"
+
str
(
threadNumber
))
newWorkerThread
=
Process
(
target
=
WorkerProcess
,
args
=
(
threadNumber
,
self
.
dataStreamIp
,
self
.
dataStreamPort
,
logfileFullPath
,
self
.
chunkSize
,
zmqCleanerIp
,
zmqCleanerPort
,
fileWaitTimeInMs
,
fileMaxWaitTimeInMs
))
workerThreadList
.
append
(
newWorkerThread
)
logging
.
debug
(
"
start worker process nr
"
+
str
(
threadNumber
))
newWorkerThread
.
start
()
#run loop, and wait for incoming messages
continueReceiving
=
True
logging
.
debug
(
"
waiting for new fileEvent-messages
"
)
while
continueReceiving
:
try
:
incomingMessage
=
zmqSocketForNewFileEvents
.
recv
()
logging
.
debug
(
"
new fileEvent-message received.
"
)
logging
.
debug
(
"
message content:
"
+
str
(
incomingMessage
))
incomingMessageCounter
+=
1
logging
.
debug
(
"
processFileEvent...
"
)
self
.
processFileEvent
(
incomingMessage
,
routerSocket
)
#TODO refactor as separate process to emphasize unblocking
logging
.
debug
(
"
processFileEvent...done
"
)
except
Exception
,
e
:
print
"
exception
"
logging
.
error
(
"
Failed to receive new fileEvent-message.
"
)
logging
.
error
(
sys
.
exc_info
())
#TODO might using a error-count and threashold when to stop receiving, e.g. after 100 misses?
# continueReceiving = False
print
"
shutting down fileEvent-receiver...
"
try
:
logging
.
debug
(
"
shutting down zeromq...
"
)
self
.
stopReceiving
(
zmqSocketForNewFileEvents
,
zmqContext
)
logging
.
debug
(
"
shutting down zeromq...done.
"
)
except
:
logging
.
error
(
sys
.
exc_info
())
logging
.
error
(
"
shutting down zeromq...failed.
"
)
def
routeFileEventToWorkerThread
(
self
,
fileEventMessage
,
routerSocket
):
# LRU worker is next waiting in the queue
logging
.
debug
(
"
waiting for available workerThread.
"
)
# address == "worker-0"
# empty == ""
# ready == "READY"
address
,
empty
,
ready
=
routerSocket
.
recv_multipart
()
logging
.
debug
(
"
available workerThread detected.
"
)
logging
.
debug
(
"
passing job to workerThread...
"
)
routerSocket
.
send_multipart
([
address
,
b
''
,
fileEventMessage
,
])
logging
.
debug
(
"
passing job to workerThread...done.
"
)
def
processFileEvent
(
self
,
fileEventMessage
,
routerSocket
):
self
.
routeFileEventToWorkerThread
(
fileEventMessage
,
routerSocket
)
def
stopReceiving
(
self
,
zmqSocket
,
msgContext
):
try
:
logging
.
debug
(
"
closing zmqSocket...
"
)
zmqSocket
.
close
()
logging
.
debug
(
"
closing zmqSocket...done.
"
)
except
:
logging
.
debug
(
"
closing zmqSocket...failed.
"
)
logging
.
error
(
sys
.
exc_info
())
try
:
logging
.
debug
(
"
closing zmqContext...
"
)
msgContext
.
destroy
()
logging
.
debug
(
"
closing zmqContext...done.
"
)
except
:
logging
.
debug
(
"
closing zmqContext...failed.
"
)
logging
.
error
(
sys
.
exc_info
())
def
getZmqSocket_Pull
(
self
,
context
):
pattern_pull
=
zmq
.
PULL
assert
isinstance
(
context
,
zmq
.
sugar
.
context
.
Context
)
socket
=
context
.
socket
(
pattern_pull
)
return
socket
def
getZmqSocket_Push
(
self
,
context
):
pattern
=
zmq
.
PUSH
assert
isinstance
(
context
,
zmq
.
sugar
.
context
.
Context
)
socket
=
context
.
socket
(
pattern
)
return
socket
def
createPullSocket
(
self
):
#get default message-socket
socket
=
self
.
messageSocket
logging
.
info
(
"
binding to message socket: tcp://
"
+
self
.
bindingIpForSocket
+
"
:%s
"
%
self
.
tcpPort_messageStream
)
socket
.
bind
(
'
tcp://
'
+
self
.
bindingIpForSocket
+
'
:%s
'
%
self
.
tcpPort_messageStream
)
return
socket
class
Cleaner
():
"""
* received cleaning jobs via zeromq,
such as removing a file
* Does regular checks on the watched directory,
such as
- deleting files which have been successfully send
to target but still remain in the watched directory
- poll the watched directory and reissue new files
to fileMover which have not been detected yet
"""
logfileFullPath
=
None
bindingPortForSocket
=
None
bindingIpForSocket
=
None
zmqContextForCleaner
=
None
def
__init__
(
self
,
logfilePath
,
bindingIp
=
"
127.0.0.1
"
,
bindingPort
=
"
6062
"
,
verbose
=
False
):
self
.
bindingPortForSocket
=
bindingPort
self
.
bindingIpForSocket
=
bindingIp
self
.
initLogging
(
logfilePath
,
verbose
)
log
=
self
.
getLogger
()
try
:
self
.
process
()
except
KeyboardInterrupt
:
log
.
debug
(
"
KeyboardInterrupt detected. Shutting down workerProcess.
"
)
self
.
zmqContextForCleaner
.
destroy
()
else
:
trace
=
traceback
.
format_exc
()
log
.
error
(
"
Stopping cleanerProcess due to unknown error condition.
"
)
log
.
debug
(
"
Error was:
"
+
str
(
trace
))
def
getLogger
(
self
):
logger
=
logging
.
getLogger
(
"
cleaner
"
)
return
logger
def
initLogging
(
self
,
logfilePath
,
verbose
):
#@see https://docs.python.org/2/howto/logging-cookbook.html
logfilePathFull
=
os
.
path
.
join
(
logfilePath
,
"
cleaner.log
"
)
logger
=
logging
.
getLogger
(
"
cleaner
"
)
#more detailed logging if verbose-option has been set
loggingLevel
=
logging
.
INFO
if
verbose
:
loggingLevel
=
logging
.
DEBUG
#log everything to file
fileHandler
=
logging
.
FileHandler
(
filename
=
logfilePathFull
,
mode
=
"
a
"
)
fileHandlerFormat
=
logging
.
Formatter
(
datefmt
=
'
%Y-%m-%d_%H:%M:%S
'
,
fmt
=
'
[%(asctime)s] [PID %(process)d] [%(filename)s] [%(module)s:%(funcName)s] [%(name)s] [%(levelname)s] %(message)s
'
)
fileHandler
.
setFormatter
(
fileHandlerFormat
)
fileHandler
.
setLevel
(
loggingLevel
)
logger
.
addHandler
(
fileHandler
)
def
process
(
self
):
processingJobs
=
True
log
=
self
.
getLogger
()
#create zmq context
zmqContextForCleaner
=
zmq
.
Context
()
self
.
zmqContextForCleaner
=
zmqContextForCleaner
#bind to local port
zmqJobSocket
=
zmqContextForCleaner
.
socket
(
zmq
.
PULL
)
zmqJobSocket
.
bind
(
'
tcp://
'
+
self
.
bindingIpForSocket
+
'
:%s
'
%
self
.
bindingPortForSocket
)
#processing messaging
while
processingJobs
:
#waiting for new jobs
workload
=
zmqJobSocket
.
recv
()
#transform to dictionary
try
:
workloadDict
=
json
.
loads
(
str
(
workload
))
except
:
errorMessage
=
"
invalid job received. skipping job
"
log
.
error
(
errorMessage
)
log
.
debug
(
"
workload=
"
+
str
(
workload
))
continue
#extract fileEvent metadata
try
:
#TODO validate fileEventMessageDict dict
filename
=
workloadDict
[
"
filename
"
]
sourcePath
=
workloadDict
[
"
sourcePath
"
]
relativeParent
=
workloadDict
[
"
relativeParent
"
]
# filesize = workloadDict["filesize"]
except
Exception
,
e
:
errorMessage
=
"
Invalid fileEvent message received.
"
log
.
error
(
errorMessage
)
log
.
debug
(
"
Error was:
"
+
str
(
e
))
log
.
debug
(
"
workloadDict=
"
+
str
(
workloadDict
))
#skip all further instructions and continue with next iteration
continue
#removing source file
sourceFilepath
=
None
try
:
logging
.
debug
(
"
removing source file...
"
)
#generate target filepath
sourceFilepath
=
os
.
path
.
join
(
sourcePath
,
filename
)
self
.
removeFile
(
sourceFilepath
)
# #show filesystem statistics
# try:
# self.showFilesystemStatistics(sourcePath)
# except Exception, f:
# logging.warning("Unable to get filesystem statistics")
# logging.debug("Error was: " + str(f))
log
.
debug
(
"
file removed:
"
+
str
(
sourceFilepath
))
log
.
debug
(
"
removing source file...success.
"
)
except
Exception
,
e
:
errorMessage
=
"
Unable to remove source file.
"
log
.
error
(
errorMessage
)
trace
=
traceback
.
format_exc
()
log
.
error
(
"
Error was:
"
+
str
(
trace
))
log
.
debug
(
"
sourceFilepath=
"
+
str
(
sourceFilepath
))
log
.
debug
(
"
removing source file...failed.
"
)
#skip all further instructions and continue with next iteration
continue
def
removeFile
(
self
,
filepath
):
log
=
self
.
getLogger
()
maxAttemptsToRemoveFile
=
2
waitTimeBetweenAttemptsInMs
=
500
iterationCount
=
0
log
.
info
(
"
Removing file
'"
+
str
(
filepath
)
+
"'
(attempt
"
+
str
(
iterationCount
)
+
"
)...
"
)
fileWasRemoved
=
False
while
iterationCount
<=
maxAttemptsToRemoveFile
and
not
fileWasRemoved
:
iterationCount
+=
1
try
:
os
.
remove
(
filepath
)
fileWasRemoved
=
True
log
.
debug
(
"
Removing file
'"
+
str
(
filepath
)
+
"'
(attempt
"
+
str
(
iterationCount
)
+
"
)...success.
"
)
except
Exception
,
e
:
trace
=
traceback
.
format_exc
()
warningMessage
=
"
Unable to remove file {FILE}.
"
.
format
(
FILE
=
str
(
filepath
))
log
.
warning
(
warningMessage
)
log
.
debug
(
"
trace=
"
+
str
(
trace
))
log
.
warning
(
"
will try again in {MS}ms.
"
.
format
(
MS
=
str
(
waitTimeBetweenAttemptsInMs
)))
if
not
fileWasRemoved
:
log
.
error
(
"
Removing file
'"
+
str
(
filepath
)
+
"'
(attempt
"
+
str
(
iterationCount
)
+
"
)...FAILED.
"
)
raise
Exception
(
"
maxAttemptsToRemoveFile reached (value={ATTEMPT}). Unable to remove file
'
{FILE}
'
.
"
.
format
(
ATTEMPT
=
str
(
iterationCount
),
FILE
=
filepath
))
def
getNewFiles
(
self
):
"""
returns a list with all files currently available
in the watched directory
"""
raise
NotImplementedError
def
sendNewFileEventToFileMover
(
self
):
"""
sends a new-file-event to the fileMover-messageQueue
"""
raise
NotImplementedError
def
argumentParsing
():
parser
=
argparse
.
ArgumentParser
()
parser
.
add_argument
(
"
--logfilePath
"
,
type
=
str
,
help
=
"
path where logfile will be created
"
,
default
=
"
/tmp/log/
"
)
parser
.
add_argument
(
"
--logfileName
"
,
type
=
str
,
help
=
"
filename used for logging
"
,
default
=
"
fileMover.log
"
)
parser
.
add_argument
(
"
--bindingIpForSocket
"
,
type
=
str
,
help
=
"
local ip to bind to
"
,
default
=
"
127.0.0.1
"
)
parser
.
add_argument
(
"
--bindingPortForSocket
"
,
type
=
str
,
help
=
"
local port to bind to
"
,
default
=
"
6060
"
)
parser
.
add_argument
(
"
--dataStreamIp
"
,
type
=
str
,
help
=
"
ip of dataStream-socket to push new files to
"
,
default
=
"
127.0.0.1
"
)
parser
.
add_argument
(
"
--dataStreamPort
"
,
type
=
str
,
help
=
"
port number of dataStream-socket to push new files to
"
,
default
=
"
6061
"
)
parser
.
add_argument
(
"
--zmqCleanerIp
"
,
type
=
str
,
help
=
"
zmq-pull-socket which deletes given files
"
,
default
=
"
127.0.0.1
"
)
parser
.
add_argument
(
"
--zmqCleanerPort
"
,
type
=
str
,
help
=
"
zmq-pull-socket which deletes given files
"
,
default
=
"
6063
"
)
parser
.
add_argument
(
"
--parallelDataStreams
"
,
type
=
int
,
help
=
"
number of parallel data streams. default is 1
"
,
default
=
"
1
"
)
parser
.
add_argument
(
"
--chunkSize
"
,
type
=
int
,
help
=
"
chunk size of file-parts getting send via zmq
"
,
default
=
DEFAULT_CHUNK_SIZE
)
parser
.
add_argument
(
"
--verbose
"
,
help
=
"
more verbose output
"
,
action
=
"
store_true
"
)
parser
.
add_argument
(
"
--fileWaitTimeInMs
"
,
type
=
int
,
help
=
argparse
.
SUPPRESS
,
default
=
2000
)
parser
.
add_argument
(
"
--fileMaxWaitTimeInMs
"
,
type
=
int
,
help
=
argparse
.
SUPPRESS
,
default
=
10000
)
arguments
=
parser
.
parse_args
()
return
arguments
def
checkFolderForExistance
(
watchFolderPath
):
"""
abort if watch-folder does not exist
:return:
"""
#check folder path for existance. exits if it does not exist
if
not
os
.
path
.
exists
(
watchFolderPath
):
logging
.
error
(
"
WatchFolder
'
%s
'
does not exist. Abort.
"
%
str
(
watchFolderPath
))
sys
.
exit
(
1
)
def
checkLogfileFolder
(
logfilePath
):
"""
abort if watch-folder does not exist
:return:
"""
#check folder path for existance. exits if it does not exist
if
not
os
.
path
.
exists
(
logfilePath
):
logging
.
error
(
"
LogfileFilder
'
%s
'
does not exist. Abort.
"
%
str
(
logfilePath
))
sys
.
exit
(
1
)
def
initLogging
(
filenameFullPath
,
verbose
):
#@see https://docs.python.org/2/howto/logging-cookbook.html
#more detailed logging if verbose-option has been set
loggingLevel
=
logging
.
INFO
if
verbose
:
loggingLevel
=
logging
.
DEBUG
#log everything to file
logging
.
basicConfig
(
level
=
loggingLevel
,
format
=
'
[%(asctime)s] [PID %(process)d] [%(filename)s] [%(module)s:%(funcName)s] [%(name)s] [%(levelname)s] %(message)s
'
,
datefmt
=
'
%Y-%m-%d_%H:%M:%S
'
,
filename
=
filenameFullPath
,
filemode
=
"
a
"
)
#log info to stdout, display messages with different format than the file output
console
=
logging
.
StreamHandler
()
console
.
setLevel
(
logging
.
WARNING
)
formatter
=
logging
.
Formatter
(
"
%(asctime)s > %(message)s
"
)
console
.
setFormatter
(
formatter
)
logging
.
getLogger
(
""
).
addHandler
(
console
)
if
__name__
==
'
__main__
'
:
freeze_support
()
#see https://docs.python.org/2/library/multiprocessing.html#windows
arguments
=
argumentParsing
()
logFile
=
str
(
arguments
.
logfilePath
)
+
"
/
"
+
str
(
arguments
.
logfileName
)
bindingIpForSocket
=
str
(
arguments
.
bindingIpForSocket
)
bindingPortForSocket
=
str
(
arguments
.
bindingPortForSocket
)
dataStreamIp
=
str
(
arguments
.
dataStreamIp
)
dataStreamPort
=
str
(
arguments
.
dataStreamPort
)
logfilePath
=
str
(
arguments
.
logfilePath
)
logfileName
=
str
(
arguments
.
logfileName
)
parallelDataStreams
=
str
(
arguments
.
parallelDataStreams
)
zmqCleanerIp
=
str
(
arguments
.
zmqCleanerIp
)
zmqCleanerPort
=
str
(
arguments
.
zmqCleanerPort
)
chunkSize
=
arguments
.
chunkSize
verbose
=
arguments
.
verbose
logfileFullPath
=
os
.
path
.
join
(
logfilePath
,
logfileName
)
fileWaitTimeInMs
=
float
(
arguments
.
fileWaitTimeInMs
)
fileMaxWaitTimeInMs
=
float
(
arguments
.
fileMaxWaitTimeInMs
)
#enable logging
initLogging
(
logfileFullPath
,
verbose
)
cleanerThread
=
Process
(
target
=
Cleaner
,
args
=
(
logfilePath
,
zmqCleanerIp
,
zmqCleanerPort
))
logging
.
debug
(
"
cleaner thread started
"
)
cleanerThread
.
start
()
#start new fileMover
# try:
fileMover
=
FileMover
(
bindingIpForSocket
,
bindingPortForSocket
,
dataStreamIp
,
dataStreamPort
,
parallelDataStreams
,
logfileFullPath
,
chunkSize
,
zmqCleanerIp
,
zmqCleanerPort
,
fileWaitTimeInMs
,
fileMaxWaitTimeInMs
)
fileMover
.
process
()
# except KeyboardInterrupt, ke:
# print "keyboardInterrupt detected."
# except Exception, e:
# print "unknown exception detected."
This diff is collapsed.
Click to expand it.
ZeroMQTunnel/receiver.py
0 → 100644
+
416
−
0
View file @
cf3bfbee
__author__
=
'
Marco Strutz <marco.strutz@desy.de>
'
import
time
import
zmq
import
sys
import
random
import
json
import
argparse
import
logging
import
errno
import
os
import
traceback
class
FileReceiver
:
globalZmqContext
=
None
outputDir
=
None
bindingPortForDataStream
=
None
zqmDataStreamIp
=
None
bindingIpForDataStream
=
None
zqmFileserverIp
=
None
def
__init__
(
self
,
outputDir
,
bindingPortForDataStream
,
zqmFileserverIp
):
self
.
outputDir
=
outputDir
self
.
bindingPortForDataStream
=
bindingPortForDataStream
self
.
zqmFileserverIp
=
zqmFileserverIp
# "global" in Python is per-module !
global
globalZmqContext
self
.
globalZmqContext
=
zmq
.
Context
()
try
:
logging
.
info
(
"
Start receiving new files
"
)
self
.
startReceiving
()
logging
.
info
(
"
Stopped receiving.
"
)
except
Exception
,
e
:
logging
.
error
(
"
Unknown error while receiving files. Need to abort.
"
)
logging
.
debug
(
"
Error was:
"
+
str
(
e
))
except
:
trace
=
traceback
.
format_exc
()
logging
.
info
(
"
Unkown error state. Shutting down...
"
)
logging
.
debug
(
"
Error was:
"
+
str
(
trace
))
self
.
globalZmqContext
.
destroy
()
logging
.
info
(
"
Quitting.
"
)
def
receiveMessage
(
self
,
socket
):
assert
isinstance
(
socket
,
zmq
.
sugar
.
socket
.
Socket
)
logging
.
debug
(
"
receiving messages...
"
)
# message = socket.recv()
# while True:
message
=
socket
.
recv_multipart
()
return
message
def
getZmqContext
(
self
):
#get reference for global context-var
globalZmqContext
=
self
.
globalZmqContext
return
globalZmqContext
def
getZmqSocket_Pull
(
self
,
context
):
pattern_pull
=
zmq
.
PULL
assert
isinstance
(
context
,
zmq
.
sugar
.
context
.
Context
)
socket
=
context
.
socket
(
pattern_pull
)
return
socket
def
createPullSocket
(
self
,
context
):
assert
isinstance
(
context
,
zmq
.
sugar
.
context
.
Context
)
socket
=
self
.
getZmqSocket_Pull
(
context
)
logging
.
info
(
"
binding to data socket: tcp://
"
+
self
.
zqmFileserverIp
+
"
:%s
"
%
self
.
bindingPortForDataStream
)
socket
.
bind
(
'
tcp://
'
+
self
.
zqmFileserverIp
+
'
:%s
'
%
self
.
bindingPortForDataStream
)
return
socket
def
startReceiving
(
self
):
#create pull socket
try
:
logging
.
info
(
"
creating local pullSocket for incoming files...
"
)
zmqContext
=
self
.
getZmqContext
()
zmqSocket
=
self
.
createPullSocket
(
zmqContext
)
logging
.
info
(
"
creating local pullSocket for incoming files...done.
"
)
except
Exception
,
e
:
errorMessage
=
"
Unable to create zeromq context.
"
logging
.
error
(
errorMessage
)
logging
.
debug
(
"
Error was:
"
+
str
(
e
))
logging
.
info
(
"
creating local pullSocket for incoming files...failed.
"
)
raise
Exception
(
e
)
#run loop, and wait for incoming messages
continueStreaming
=
True
loopCounter
=
0
#counter of total received messages
continueReceiving
=
True
#receiving will stop if value gets False
logging
.
debug
(
"
Waiting for new messages...
"
)
while
continueReceiving
:
try
:
# multipartMessage = zmqSocket.recv_multipart()
# logging.info("New message received.")
# logging.debug("message-type : " + str(type(multipartMessage)))
# logging.debug("message-length: " + str(len(multipartMessage)))
# loopCounter+=1
#save all chunks to file
while
True
:
multipartMessage
=
zmqSocket
.
recv_multipart
()
#append to file
try
:
logging
.
debug
(
"
append to file based on multipart-message...
"
)
#TODO: save message to file using a thread (avoids blocking)
#TODO: instead of open/close file for each chunk recyle the file-descriptor for all chunks opened
self
.
appendChunksToFileFromMultipartMessage
(
multipartMessage
)
logging
.
debug
(
"
append to file based on multipart-message...success.
"
)
except
Exception
,
e
:
errorMessage
=
"
Unable to append multipart-content to file.
"
logging
.
error
(
errorMessage
)
logging
.
debug
(
"
Error was:
"
+
str
(
e
))
logging
.
debug
(
"
append to file based on multipart-message...failed.
"
)
except
:
errorMessage
=
"
Unable to append multipart-content to file. Unknown Error.
"
logging
.
error
(
errorMessage
)
logging
.
debug
(
"
append to file based on multipart-message...failed.
"
)
if
len
(
multipartMessage
[
1
])
==
0
:
#indicated end of file. closing file and leave loop
logging
.
debug
(
"
last file-chunk received. stop appending.
"
)
break
payloadMetadata
=
str
(
multipartMessage
[
0
])
payloadMetadataDict
=
json
.
loads
(
payloadMetadata
)
filename
=
self
.
generateTargetFilepath
(
payloadMetadataDict
)
logging
.
info
(
"
New file received and saved:
"
+
str
(
filename
))
# logging.debug("message-type : " + str(type(multipartMessage)))
# logging.debug("message-length: " + str(len(multipartMessage)))
loopCounter
+=
1
except
KeyboardInterrupt
:
logging
.
debug
(
"
Keyboard interrupt detected. Stop receiving.
"
)
break
except
:
logging
.
error
(
"
receive message...failed.
"
)
logging
.
error
(
sys
.
exc_info
())
continueReceiving
=
False
# #create new file object
# try:
# logging.debug("create new file based on multipart-message...")
# #TODO: save message to file using a thread (avoids blocking)
# self.saveFileFromMultipartMessage(multipartMessage)
# logging.debug("create new file based on multipart-message...success.")
# except Exception, e:
# errorMessage = "Unable to save multipart-content as file."
# logging.error(errorMessage)
# logging.debug("Error was: " + str(e))
# logging.debug("create new file based on multipart-message...failed.")
# except:
# errorMessage = "Unable to save multipart-content as file. Unknown Error."
# logging.error(errorMessage)
# logging.debug("create new file based on multipart-message...failed.")
logging
.
info
(
"
shutting down receiver...
"
)
try
:
logging
.
debug
(
"
shutting down zeromq...
"
)
self
.
stopReceiving
(
zmqSocket
,
zmqContext
)
logging
.
debug
(
"
shutting down zeromq...done.
"
)
except
:
logging
.
error
(
sys
.
exc_info
())
logging
.
error
(
"
shutting down zeromq...failed.
"
)
def
saveFileFromMultipartMessage
(
self
,
multipartMessage
):
#extract multipart message
try
:
configDictJson
=
multipartMessage
[
0
]
except
:
logging
.
error
(
"
an empty config was transferred for multipartMessage
"
)
try
:
chunkCount
=
len
(
multipartMessage
)
-
1
#-1 as the first element keeps the dictionary/metadata
payload
=
multipartMessage
[
1
:]
except
:
logging
.
warning
(
"
an empty file was received within the multipart-message
"
)
payload
=
None
#TODO validate multipartMessage (like correct dict-values for metadata)
logging
.
debug
(
"
multipartMessage.metadata =
"
+
str
(
configDictJson
))
#extraction metadata from multipart-message
configDict
=
json
.
loads
(
configDictJson
)
targetFilename
=
configDict
[
"
filename
"
]
targetFilesize
=
configDict
[
"
filesize
"
]
targetRelativePath
=
configDict
[
"
relativeParent
"
]
#generate target filepath
targetPath
=
self
.
outputDir
+
"
/
"
+
targetRelativePath
targetFilepath
=
targetPath
+
"
/
"
+
targetFilename
logging
.
debug
(
"
new file is going to be created at:
"
+
targetFilepath
)
#write payload to file
try
:
newFile
=
open
(
targetFilepath
,
"
w
"
)
except
IOError
,
e
:
# errno.ENOENT == "No such file or directory"
if
e
.
errno
==
errno
.
ENOENT
:
#TODO create subdirectory first, then try to open the file again
try
:
os
.
makedirs
(
targetPath
)
newFile
=
open
(
targetFilepath
,
"
w
"
)
except
Exception
,
f
:
errorMessage
=
"
unable to save payload to file:
'"
+
targetFilepath
+
"'"
logging
.
error
(
errorMessage
)
logging
.
debug
(
"
Error was:
"
+
str
(
f
))
raise
Exception
(
errorMessage
)
except
Exception
,
e
:
logging
.
error
(
"
failed to save payload to file:
'"
+
targetFilepath
+
"'"
)
logging
.
debug
(
"
Error was:
"
+
str
(
e
))
logging
.
debug
(
"
ErrorTyp:
"
+
str
(
type
(
e
)))
logging
.
debug
(
"
e.errno =
"
+
str
(
e
.
errno
)
+
"
errno.EEXIST==
"
+
str
(
errno
.
EEXIST
))
#only write data if a payload exist
try
:
if
payload
!=
None
:
for
chunk
in
payload
:
newFile
.
write
(
chunk
)
newFile
.
close
()
except
Exception
,
e
:
errorMessage
=
"
unable to write data to file.
"
logging
.
error
(
errorMessage
)
logging
.
debug
(
"
Error was:
"
+
str
(
e
))
raise
Exception
(
errorMessage
)
def
generateTargetFilepath
(
self
,
configDict
):
"""
generates full path where target file will saved to.
"""
targetFilename
=
configDict
[
"
filename
"
]
targetRelativePath
=
configDict
[
"
relativeParent
"
]
if
targetRelativePath
is
''
or
targetRelativePath
is
None
:
targetPath
=
self
.
getOutputDir
()
else
:
targetPath
=
os
.
path
.
join
(
self
.
getOutputDir
(),
targetRelativePath
)
targetFilepath
=
os
.
path
.
join
(
targetPath
,
targetFilename
)
return
targetFilepath
def
getOutputDir
(
self
):
return
self
.
outputDir
def
generateTargetPath
(
self
,
configDict
):
"""
generates path where target file will saved to.
"""
targetRelativePath
=
configDict
[
"
relativeParent
"
]
outputDir
=
self
.
getOutputDir
()
targetPath
=
os
.
path
.
join
(
outputDir
,
targetRelativePath
)
return
targetPath
def
appendChunksToFileFromMultipartMessage
(
self
,
multipartMessage
):
#extract multipart message
try
:
configDictJson
=
multipartMessage
[
0
]
except
:
logging
.
error
(
"
an empty config was transferred for multipartMessage
"
)
try
:
chunkCount
=
len
(
multipartMessage
)
-
1
#-1 as the first element keeps the dictionary/metadata
payload
=
multipartMessage
[
1
:]
except
:
logging
.
warning
(
"
an empty file was received within the multipart-message
"
)
payload
=
None
#TODO validate multipartMessage (like correct dict-values for metadata)
logging
.
debug
(
"
multipartMessage.metadata =
"
+
str
(
configDictJson
))
#extraction metadata from multipart-message
configDict
=
json
.
loads
(
configDictJson
)
#generate target filepath
targetFilepath
=
self
.
generateTargetFilepath
(
configDict
)
logging
.
debug
(
"
new file is going to be created at:
"
+
targetFilepath
)
#append payload to file
try
:
newFile
=
open
(
targetFilepath
,
"
a
"
)
except
IOError
,
e
:
# errno.ENOENT == "No such file or directory"
if
e
.
errno
==
errno
.
ENOENT
:
#TODO create subdirectory first, then try to open the file again
try
:
targetPath
=
self
.
generateTargetPath
(
configDict
)
os
.
makedirs
(
targetPath
)
newFile
=
open
(
targetFilepath
,
"
w
"
)
logging
.
info
(
"
New target directory created:
"
+
str
(
targetPath
))
except
Exception
,
f
:
errorMessage
=
"
unable to save payload to file:
'"
+
targetFilepath
+
"'"
logging
.
error
(
errorMessage
)
logging
.
debug
(
"
Error was:
"
+
str
(
f
))
logging
.
debug
(
"
targetPath=
"
+
str
(
targetPath
))
raise
Exception
(
errorMessage
)
except
Exception
,
e
:
logging
.
error
(
"
failed to append payload to file:
'"
+
targetFilepath
+
"'"
)
logging
.
debug
(
"
Error was:
"
+
str
(
e
))
logging
.
debug
(
"
ErrorTyp:
"
+
str
(
type
(
e
)))
logging
.
debug
(
"
e.errno =
"
+
str
(
e
.
errno
)
+
"
errno.EEXIST==
"
+
str
(
errno
.
EEXIST
))
#only write data if a payload exist
try
:
if
payload
!=
None
:
for
chunk
in
payload
:
newFile
.
write
(
chunk
)
newFile
.
close
()
except
Exception
,
e
:
errorMessage
=
"
unable to append data to file.
"
logging
.
error
(
errorMessage
)
logging
.
debug
(
"
Error was:
"
+
str
(
e
))
raise
Exception
(
errorMessage
)
def
stopReceiving
(
self
,
zmqSocket
,
msgContext
):
try
:
logging
.
debug
(
"
closing zmqSocket...
"
)
zmqSocket
.
close
()
logging
.
debug
(
"
closing zmqSocket...done.
"
)
except
:
logging
.
error
(
"
closing zmqSocket...failed.
"
)
logging
.
error
(
sys
.
exc_info
())
try
:
logging
.
debug
(
"
closing zmqContext...
"
)
msgContext
.
destroy
()
logging
.
debug
(
"
closing zmqContext...done.
"
)
except
:
logging
.
error
(
"
closing zmqContext...failed.
"
)
logging
.
error
(
sys
.
exc_info
())
def
argumentParsing
():
parser
=
argparse
.
ArgumentParser
()
parser
.
add_argument
(
"
--outputDir
"
,
type
=
str
,
help
=
"
where incoming data will be stored to
"
,
default
=
"
/tmp/watchdog/data_mirror/
"
)
parser
.
add_argument
(
"
--tcpPortDataStream
"
,
type
=
int
,
help
=
"
tcp port of data pipe
"
,
default
=
6061
)
parser
.
add_argument
(
"
--logfile
"
,
type
=
str
,
help
=
"
file used for logging
"
,
default
=
"
/tmp/watchdog/fileReceiver.log
"
)
parser
.
add_argument
(
"
--bindingIpForDataStream
"
,
type
=
str
,
help
=
"
local ip to bind dataStream to
"
,
default
=
"
127.0.0.1
"
)
parser
.
add_argument
(
"
--verbose
"
,
help
=
"
more verbose output
"
,
action
=
"
store_true
"
)
arguments
=
parser
.
parse_args
()
# TODO: check folder-directory for existance
return
arguments
def
initLogging
(
filenameFullPath
,
verbose
):
#@see https://docs.python.org/2/howto/logging-cookbook.html
#more detailed logging if verbose-option has been set
loggingLevel
=
logging
.
INFO
if
verbose
:
loggingLevel
=
logging
.
DEBUG
#log everything to file
logging
.
basicConfig
(
level
=
loggingLevel
,
format
=
'
[%(asctime)s] [PID %(process)d] [%(filename)s] [%(module)s:%(funcName)s] [%(name)s] [%(levelname)s] %(message)s
'
,
datefmt
=
'
%Y-%m-%d_%H:%M:%S
'
,
filename
=
filenameFullPath
,
filemode
=
"
a
"
)
#log info to stdout, display messages with different format than the file output
console
=
logging
.
StreamHandler
()
console
.
setLevel
(
logging
.
WARNING
)
formatter
=
logging
.
Formatter
(
"
%(asctime)s > %(message)s
"
)
console
.
setFormatter
(
formatter
)
logging
.
getLogger
(
""
).
addHandler
(
console
)
if
__name__
==
"
__main__
"
:
#argument parsing
arguments
=
argumentParsing
()
outputDir
=
arguments
.
outputDir
verbose
=
arguments
.
verbose
zqmDataStreamPort
=
str
(
arguments
.
tcpPortDataStream
)
zqmDataStreamIp
=
str
(
arguments
.
bindingIpForDataStream
)
logFile
=
arguments
.
logfile
logfileFilePath
=
arguments
.
logfile
#enable logging
initLogging
(
logfileFilePath
,
verbose
)
#start file receiver
myWorker
=
FileReceiver
(
outputDir
,
zqmDataStreamPort
,
zqmDataStreamIp
)
\ No newline at end of file
This diff is collapsed.
Click to expand it.
Preview
0%
Loading
Try again
or
attach a new file
.
Cancel
You are about to add
0
people
to the discussion. Proceed with caution.
Finish editing this message first!
Save comment
Cancel
Please
register
or
sign in
to comment