qqubb commited on
Commit
a0e7778
·
1 Parent(s): 03820e0

initial code to handle multiple cards

Browse files
__pycache__/compliance_analysis.cpython-310.pyc CHANGED
Binary files a/__pycache__/compliance_analysis.cpython-310.pyc and b/__pycache__/compliance_analysis.cpython-310.pyc differ
 
__pycache__/utils.cpython-310.pyc CHANGED
Binary files a/__pycache__/utils.cpython-310.pyc and b/__pycache__/utils.cpython-310.pyc differ
 
compliance_analysis.py CHANGED
@@ -1,5 +1,5 @@
1
  import yaml
2
- from utils import set_type, set_operator_role_and_location, set_eu_market_status, check_within_scope_cc, check_within_scope_act
3
 
4
  # Create some variables we will use throughout our analysis
5
 
@@ -31,22 +31,40 @@ dispositive_variables = {
31
 
32
  def check_overall_compliance(dispositive_variables, cc_files):
33
 
 
34
  # check intended purposes
35
- dispositive_variables = check_intended_purpose(dispositive_variables, cc_files)
36
 
37
  # for each model_cc and data_cc - run analysis with ref to project_cc
38
 
39
- dispositive_variables = run_compliance_analysis_on_data(dispositive_variables, data_cc_yaml)
40
- dispositive_variables = run_compliance_analysis_on_model(dispositive_variables, model_cc_yaml)
41
 
42
  dispositive_variables = run_compliance_analysis_on_project(dispositive_variables, project_cc_yaml)
43
 
44
  return dispositive_variables
45
 
46
- def run_compliance_analysis_on_project(dispositive_variables, project_cc_yaml):
47
-
48
- # Determine project type (AI system vs. GPAI model) as well as operator type. We will use these for different things.
49
- project_type = set_type(dispositive_variables, project_cc_yaml)
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
50
  set_operator_role_and_location(dispositive_variables, project_cc_yaml)
51
  set_eu_market_status(dispositive_variables, project_cc_yaml)
52
 
@@ -106,35 +124,35 @@ def run_compliance_analysis_on_project(dispositive_variables, project_cc_yaml):
106
 
107
  if gpai_model:
108
 
109
- # # If the project is a GPAI model with systematic risk, check that is has additionally met all the requirements for such systems:
110
 
111
- # if gpai_model_systematic_risk:
112
 
113
- # # Do this by examining the Project CC
114
 
115
- # for key, value in project_cc_yaml['gpai_obligations_for_systemic_risk_models']:
116
- # if not value:
117
- # msg = ("GPAI model with systematic risk fails the transparency requirements under Article 55.")
118
 
119
- # Do this by examining the Project CC
120
 
121
  for key, value in project_cc_yaml['gpai_model_obligations']:
122
  if not value:
123
  msg = ("GPAI model fails the transparency requirements under Article 53.")
124
 
125
 
126
- if gpai_model_systematic_risk:
127
- for key, value in project_cc_yaml['gpai_models_with_systemic_risk_obligations']:
128
 
129
 
130
- # if ai_system:
131
- # for key, value in project_cc_yaml['']:
132
  # TODO to be included in project_cc
133
-
134
 
135
- # TODO: No matter where we land with an orchestrator function, this function must also check to the value it has set for both
136
- # GPAI models with and without systemic risk and then check to see if the relevant requirement have met if either of these values applies.
137
- # This will look a lot like what is happening above for high-risk AI systems.
138
 
139
  return dispositive_variables
140
 
@@ -159,7 +177,6 @@ def run_compliance_analysis_on_data(dispositive_variables, data_cc_yaml):
159
  # if not value:
160
  # msg = (f"Because of the dataset represented by {filename}, this GPAI fails the transparency requirements under Article 53.")
161
 
162
-
163
  # TODO: No matter where we land with an orchestrator function, this function must also check to the value that has been set for both
164
  # GPAI models with and without systemic risk and then check to see if the relevant requirements have met if either of these values applies.
165
  # Right now it is only checking high-risk AI system requirements. Another thing that we likely have to add here is the cross-comparison of the
 
1
  import yaml
2
+ from utils import set_operator_role_and_location, set_eu_market_status, check_within_scope_cc, check_within_scope_act
3
 
4
  # Create some variables we will use throughout our analysis
5
 
 
31
 
32
  def check_overall_compliance(dispositive_variables, cc_files):
33
 
34
+
35
  # check intended purposes
36
+ # dispositive_variables = check_intended_purpose(dispositive_variables, cc_files)
37
 
38
  # for each model_cc and data_cc - run analysis with ref to project_cc
39
 
40
+ # dispositive_variables = run_compliance_analysis_on_data(dispositive_variables, data_cc_yaml)
41
+ # dispositive_variables = run_compliance_analysis_on_model(dispositive_variables, model_cc_yaml)
42
 
43
  dispositive_variables = run_compliance_analysis_on_project(dispositive_variables, project_cc_yaml)
44
 
45
  return dispositive_variables
46
 
47
+ def run_compliance_analysis_on_project(project_cc_yaml):
48
+
49
+ # Project Type
50
+ if project_cc_yaml['ai_system']['ai_system']['value']:
51
+ dispositive_variables['ai_project_type']['ai_system'] = True
52
+ if project_cc_yaml['gpai_model']['gpai_model']['value']:
53
+ dispositive_variables['ai_project_type']['gpai_model'] = True
54
+ if dispositive_variables['ai_project_type']['ai_system'] and dispositive_variables['ai_project_type']['gpai_model']:
55
+ dispositive_variables['msg'] = "Your project cannot be both an AI system and a GPAI model. Please revise your Project CC accordingly."
56
+ return dispositive_variables
57
+
58
+ if ai_system == True:
59
+ for key, value in project_cc_yaml['high_risk_ai_system']:
60
+ if value and sum(map(bool, [project_cc_yaml['high_risk_ai_system']['filter_exception_rights'],project_cc_yaml['high_risk_ai_system']['filter_exception_narrow'],project_cc_yaml['high_risk_ai_system']['filter_exception_human'],project_cc_yaml['high_risk_ai_system']['filter_exception_deviation'], project_cc_yaml['high_risk_ai_system']['filter_exception_prep']])) < 1:
61
+ project_type = "high_risk_ai_system"
62
+
63
+ if gpai_model == True:
64
+ if project_cc_yaml['gpai_model_systematic_risk']['evaluation'] or project_cc_yaml['gpai_model_systematic_risk']['flops']:
65
+ project_type = "gpai_model_systematic_risk"
66
+
67
+ # Operator Type
68
  set_operator_role_and_location(dispositive_variables, project_cc_yaml)
69
  set_eu_market_status(dispositive_variables, project_cc_yaml)
70
 
 
124
 
125
  if gpai_model:
126
 
127
+ # # If the project is a GPAI model with systematic risk, check that is has additionally met all the requirements for such systems:
128
 
129
+ # if gpai_model_systematic_risk:
130
 
131
+ # # Do this by examining the Project CC
132
 
133
+ # for key, value in project_cc_yaml['gpai_obligations_for_systemic_risk_models']:
134
+ # if not value:
135
+ # msg = ("GPAI model with systematic risk fails the transparency requirements under Article 55.")
136
 
137
+ # Do this by examining the Project CC
138
 
139
  for key, value in project_cc_yaml['gpai_model_obligations']:
140
  if not value:
141
  msg = ("GPAI model fails the transparency requirements under Article 53.")
142
 
143
 
144
+ # if gpai_model_systematic_risk:
145
+ # for key, value in project_cc_yaml['gpai_models_with_systemic_risk_obligations']:
146
 
147
 
148
+ # if ai_system:
149
+ # for key, value in project_cc_yaml['']:
150
  # TODO to be included in project_cc
151
+
152
 
153
+ # TODO: No matter where we land with an orchestrator function, this function must also check to the value it has set for both
154
+ # GPAI models with and without systemic risk and then check to see if the relevant requirement have met if either of these values applies.
155
+ # This will look a lot like what is happening above for high-risk AI systems.
156
 
157
  return dispositive_variables
158
 
 
177
  # if not value:
178
  # msg = (f"Because of the dataset represented by {filename}, this GPAI fails the transparency requirements under Article 53.")
179
 
 
180
  # TODO: No matter where we land with an orchestrator function, this function must also check to the value that has been set for both
181
  # GPAI models with and without systemic risk and then check to see if the relevant requirements have met if either of these values applies.
182
  # Right now it is only checking high-risk AI system requirements. Another thing that we likely have to add here is the cross-comparison of the
data_cc.yaml CHANGED
@@ -1,3 +1,5 @@
 
 
1
  intended_purpose:
2
  safety_component:
3
  article: 'Art. 6(1)(a)'
@@ -109,8 +111,8 @@ data_and_data_governance:
109
  article: 'Art. 10(3)'
110
  verbose: 'Training data possesses the appropriate statistical properties, including, where applicable, as regards the people in relation to whom the system is intended to be used'
111
  value: !!bool false
112
- contextual: 'Art. 10(4)'
113
- article:
114
  verbose: 'Training data takes into account, to the extent required by the intended purpose, the characteristics or elements that are particular to the specific geographical, contextual, behavioural or functional setting within which the system is intended to be used'
115
  value: !!bool false
116
  personal_data_necessary:
 
1
+ card_type: "data" # "project", "data" or "model"
2
+
3
  intended_purpose:
4
  safety_component:
5
  article: 'Art. 6(1)(a)'
 
111
  article: 'Art. 10(3)'
112
  verbose: 'Training data possesses the appropriate statistical properties, including, where applicable, as regards the people in relation to whom the system is intended to be used'
113
  value: !!bool false
114
+ contextual:
115
+ article: 'Art. 10(4)'
116
  verbose: 'Training data takes into account, to the extent required by the intended purpose, the characteristics or elements that are particular to the specific geographical, contextual, behavioural or functional setting within which the system is intended to be used'
117
  value: !!bool false
118
  personal_data_necessary:
model_cc.yaml CHANGED
@@ -1,3 +1,5 @@
 
 
1
  intended_purpose:
2
  safety_component:
3
  article: 'Art. 6(1)(a)'
 
1
+ card_type: "model" # "project", "data" or "model"
2
+
3
  intended_purpose:
4
  safety_component:
5
  article: 'Art. 6(1)(a)'
project_cc.yaml CHANGED
@@ -1,6 +1,7 @@
1
 
2
  # Information related to high-level characteristics of AI project, including the role of the operator, their location, and where the output is used
3
-
 
4
  operator_details:
5
  provider:
6
  article: 'Art. 2'
@@ -29,13 +30,13 @@ ai_system:
29
  ai_system:
30
  article: 'Art. 3(1)'
31
  verbose: 'AI project is a machine-based system that is designed to operate with varying levels of autonomy and that may exhibit adaptiveness after deployment, and that, for explicit or implicit objectives, infers, from the input it receives, how to generate outputs such as predictions, content, recommendations, or decisions that can influence physical or virtual environments'
32
- value: !!bool false
33
 
34
  gpai_model:
35
  gpai_model:
36
  article: 'Art. 3(63)'
37
  verbose: 'AI project is an AI model, including where such an AI model is trained with a large amount of data using self-supervision at scale, that displays significant generality and is capable of competently performing a wide range of distinct tasks regardless of the way the model is placed on the market and that can be integrated into a variety of downstream systems or applications, except AI models that are used for research, development or prototyping activities before they are placed on the market'
38
- value: !!bool false
39
 
40
  gpai_model_systematic_risk:
41
  evaluation:
 
1
 
2
  # Information related to high-level characteristics of AI project, including the role of the operator, their location, and where the output is used
3
+ card_type: "project" # "project", "data" or "model"
4
+
5
  operator_details:
6
  provider:
7
  article: 'Art. 2'
 
30
  ai_system:
31
  article: 'Art. 3(1)'
32
  verbose: 'AI project is a machine-based system that is designed to operate with varying levels of autonomy and that may exhibit adaptiveness after deployment, and that, for explicit or implicit objectives, infers, from the input it receives, how to generate outputs such as predictions, content, recommendations, or decisions that can influence physical or virtual environments'
33
+ value: !!bool true
34
 
35
  gpai_model:
36
  gpai_model:
37
  article: 'Art. 3(63)'
38
  verbose: 'AI project is an AI model, including where such an AI model is trained with a large amount of data using self-supervision at scale, that displays significant generality and is capable of competently performing a wide range of distinct tasks regardless of the way the model is placed on the market and that can be integrated into a variety of downstream systems or applications, except AI models that are used for research, development or prototyping activities before they are placed on the market'
39
+ value: !!bool true
40
 
41
  gpai_model_systematic_risk:
42
  evaluation:
run.py ADDED
@@ -0,0 +1,88 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import yaml
2
+ import json
3
+ from pathlib import Path
4
+ import pandas as pd
5
+ from compliance_analysis import run_compliance_analysis_on_project, run_compliance_analysis_on_data, run_compliance_analysis_on_model
6
+
7
+ pd.set_option('display.max_columns', None)
8
+ pd.set_option('display.max_rows', None)
9
+
10
+ files = ["./project_cc.yaml", "./data_cc.yaml", "./data_cc.yaml", "./model_cc.yaml", "./model_cc.yaml", "./model_cc.yaml"]
11
+
12
+ # def load_data(files):
13
+ # cards = []
14
+ # for file in files:
15
+ # with open(file, 'r') as f:
16
+ # if Path(f.name).name == "project_cc.yaml":
17
+ # content = f.read()
18
+ # project_cc_yaml = yaml.safe_load(content)
19
+ # data = project_cc_yaml
20
+ # card_type = "project"
21
+ # cards.append((card_type, data))
22
+ # if Path(f.name).name == "data_cc.yaml":
23
+ # data_cc_yaml = yaml.safe_load(content)
24
+ # data = data_cc_yaml
25
+ # card_type = "data"
26
+ # cards.append((card_type, data))
27
+ # if Path(f.name).name == "model_cc.yaml":
28
+ # model_cc_yaml = yaml.safe_load(content)
29
+ # data = model_cc_yaml
30
+ # card_type = "model"
31
+ # cards.append((card_type, data))
32
+ # return cards
33
+
34
+ # cards = load_data(files)
35
+
36
+ def gather_cards(files):
37
+ cards = {}
38
+ cards['project_file'] = ''
39
+ cards['data_files'] = []
40
+ cards['model_files'] = []
41
+ for file in files:
42
+ with open(file, 'r') as f:
43
+ content = yaml.safe_load(f.read())
44
+ if content['card_type'] == "project":
45
+ cards['project_file'] = f.name
46
+ if content['card_type'] == "data":
47
+ cards['data_files'].append(f.name)
48
+ if content['card_type'] == "model":
49
+ cards['model_files'].append(f.name)
50
+ return cards
51
+
52
+ cards = gather_cards(files)
53
+ print(cards)
54
+
55
+ # def load_data(files):
56
+ # cards = []
57
+ # for file in files:
58
+ # with open(file, 'r') as f:
59
+ # if Path(f.name).name == "project_cc.yaml":
60
+ # content = f.read()
61
+ # project_cc_yaml = yaml.safe_load(content)
62
+ # data = project_cc_yaml
63
+ # card_type = "project"
64
+ # cards.append((card_type, data))
65
+ # if Path(f.name).name == "data_cc.yaml":
66
+ # data_cc_yaml = yaml.safe_load(content)
67
+ # data = data_cc_yaml
68
+ # card_type = "data"
69
+ # cards.append((card_type, data))
70
+ # if Path(f.name).name == "model_cc.yaml":
71
+ # model_cc_yaml = yaml.safe_load(content)
72
+ # data = model_cc_yaml
73
+ # card_type = "model"
74
+ # cards.append((card_type, data))
75
+ # return cards
76
+
77
+ # def process_files(data):
78
+ # results = []
79
+ # dispositive_variables = check_overall_compliance(yaml.safe_load(data))
80
+ # results.append(dispositive_variables['msg'])
81
+ # return results
82
+
83
+ # for card in cards:
84
+ # data = card[1]
85
+ # yaml_data = yaml.dump(data, sort_keys=False)
86
+ # process_files(yaml_data)
87
+ # print(process_files(yaml_data))
88
+
utils.py CHANGED
@@ -3,30 +3,6 @@ import yaml
3
  # We could probably combine set_type, set_operator_role_and_location, and set_eu_market_status into a single function that sets all project_variables
4
  # We will have to add a couple other things to that function as well
5
 
6
- def set_type(dispositive_variables, project_cc_yaml):
7
-
8
- project_type = None
9
-
10
- ai_system = dispositive_variables['ai_project_type']['ai_system']
11
- gpai_model = dispositive_variables['ai_project_type']['gpai_model']
12
-
13
- if project_cc_yaml['ai_system']['ai_system']['value']:
14
- ai_system = True
15
- if project_cc_yaml['gpai_model']['gpai_model']['value']:
16
- gpai_model = True
17
- if ai_system and gpai_model:
18
- msg = ("Your project cannot be both an AI system and a GPAI model. Please revise your Project CC accordingly.")
19
- if ai_system == True:
20
- for key, value in project_cc_yaml['high_risk_ai_system']:
21
- if value and sum(map(bool, [project_cc_yaml['high_risk_ai_system']['filter_exception_rights'],project_cc_yaml['high_risk_ai_system']['filter_exception_narrow'],project_cc_yaml['high_risk_ai_system']['filter_exception_human'],project_cc_yaml['high_risk_ai_system']['filter_exception_deviation'], project_cc_yaml['high_risk_ai_system']['filter_exception_prep']])) < 1:
22
- project_type = "high_risk_ai_system"
23
-
24
- if gpai_model == True:
25
- if project_cc_yaml['gpai_model_systematic_risk']['evaluation'] or project_cc_yaml['gpai_model_systematic_risk']['flops']:
26
- project_type = "gpai_model_systematic_risk"
27
-
28
- return project_type
29
-
30
  def set_operator_role_and_location(dispositive_variables, project_cc_yaml):
31
  operators = 0
32
 
 
3
  # We could probably combine set_type, set_operator_role_and_location, and set_eu_market_status into a single function that sets all project_variables
4
  # We will have to add a couple other things to that function as well
5
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
6
  def set_operator_role_and_location(dispositive_variables, project_cc_yaml):
7
  operators = 0
8