GdbDataRemover.py 4.3 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124
  1. """
  2. File: GdbDataRemover.py
  3. Authors:
  4. Mobing
  5. 2019/7/1 - initial release
  6. """
  7. from __future__ import print_function
  8. import argparse
  9. from gremlin_python.driver import client
  10. from gremlin_python.driver.resultset import ResultSet
  11. class PColors:
  12. RED = '\033[91m'
  13. GREEN = '\033[92m'
  14. YELLOW = '\033[0;32m'
  15. BLUE = '\033[94m'
  16. ENDC = '\033[0m'
  17. def __init__(self):
  18. pass
  19. class PrintUtil:
  20. def __init__(self):
  21. pass
  22. @staticmethod
  23. def rprint(msg):
  24. print(PColors.RED + msg + PColors.ENDC)
  25. @staticmethod
  26. def yprint(msg, new_line=True):
  27. print(PColors.YELLOW + msg + PColors.ENDC, end="\n" if new_line else "\r")
  28. class GdbDataRemover:
  29. def __init__(self, gdb_client, limit):
  30. self.gdb_client = gdb_client
  31. self.limit = limit
  32. def drop(self, label, drop_edge_only):
  33. if label is None:
  34. self.__drop_all(True)
  35. if not drop_edge_only:
  36. self.__drop_all(False)
  37. else:
  38. self.__drop_by_label(label, drop_edge_only)
  39. def __drop_all(self, drop_edge_only):
  40. marker = "E" if drop_edge_only else "V"
  41. cnt_dsl = "g.%s().count()" % marker
  42. cnt_params = {}
  43. drop_dsl = "g.%s().limit(limit).sideEffect(drop()).count()" % marker
  44. drop_params = {
  45. "limit": self.limit,
  46. }
  47. print_marker = "edges" if drop_edge_only else "vertices"
  48. PrintUtil.rprint("Start to remove all %s: " % print_marker)
  49. self.__generic_batch_drop(cnt_dsl, cnt_params,
  50. drop_dsl, drop_params)
  51. def __drop_by_label(self, label, drop_edge_only):
  52. marker = "E" if drop_edge_only else "V"
  53. label_cnt_dsl = "g.%s().hasLabel(drop_label).count()" % marker
  54. label_cnt_params = {
  55. "drop_label": label,
  56. }
  57. label_drop_dsl = "g.%s().hasLabel(drop_label).limit(limit).sideEffect(drop()).count()" % marker
  58. label_drop_params = {
  59. "drop_label": label,
  60. "limit": self.limit,
  61. }
  62. print_marker = "edges" if drop_edge_only else "vertices"
  63. PrintUtil.rprint("Start to remove all %s with label %s: " % (print_marker, label))
  64. self.__generic_batch_drop(label_cnt_dsl, label_cnt_params,
  65. label_drop_dsl, label_drop_params)
  66. def __generic_batch_drop(self, cnt_dsl, cnt_params, drop_dsl, drop_params):
  67. cnt_result = self.gdb_client.submit(cnt_dsl, cnt_params)
  68. cnt = cnt_result.one()[0]
  69. if 0 == cnt:
  70. PrintUtil.rprint("total cnt: %d, no need to drop" % cnt)
  71. return 0
  72. else:
  73. PrintUtil.rprint("total cnt: %d, begin to drop" % cnt)
  74. total_dropped_cnt = 0
  75. while cnt > total_dropped_cnt:
  76. curr_drop_result = self.gdb_client.submit(drop_dsl, drop_params) # type: ResultSet
  77. curr_dropped_cnt = curr_drop_result.one()[0]
  78. total_dropped_cnt += curr_dropped_cnt
  79. PrintUtil.yprint("%d" % total_dropped_cnt, False)
  80. if 0 == curr_dropped_cnt or self.limit < curr_dropped_cnt:
  81. break
  82. PrintUtil.yprint("")
  83. return total_dropped_cnt
  84. def main():
  85. parser = argparse.ArgumentParser()
  86. parser.add_argument('--host', dest="host", type=str, required=True)
  87. parser.add_argument('--port', dest="port", type=int, default=8182)
  88. parser.add_argument('--username', dest="username", type=str, required=True)
  89. parser.add_argument('--password', dest="password", type=str, required=True)
  90. parser.add_argument('--limit', dest="limit", type=int, default=500)
  91. parser.add_argument('--label', dest="label", type=str, default=None, help="drop element with specified label")
  92. parser.add_argument('--edge', dest="drop_edge_only", action="store_true", help="only drop edge")
  93. args = parser.parse_args()
  94. print(args)
  95. # gdb_client = client.Client('ws://%s:%d/gremlin' % (args.host, args.port),
  96. # 'g', username=args.username, password=args.password)
  97. gdb_client = client.Client('ws://gds-bp130d7rgd9m7n61150070pub.graphdb.rds.aliyuncs.com:3734/gremlin', 'g', username="bxkc", password="k0n1bxkc!0K^Em%j")
  98. gdb_data_remover = GdbDataRemover(gdb_client, args.limit)
  99. gdb_data_remover.drop(args.label, args.drop_edge_only)
  100. if __name__ == '__main__':
  101. main()